diff --git a/README.md b/README.md index d33994e..2675f82 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ Select the model that aligns with your hardware capabilities. 3. **Bootstrap**: Run it. The agent will self-provision an isolated Python environment (~2GB) on first launch. 4. **Updates**: Simply replace the `.exe`. The **Smart Bootstrapper** will detect the update and sync only the changed files, preserving your settings and skipping unnecessary downloads. -### īŋŊ Troubleshooting +### 🔧 Troubleshooting * **App crashes on start**: Ensure you have [Microsoft Visual C++ Redistributable 2015-2022](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist) installed. * **"Simulate Typing" is slow**: Some applications (remote desktops, older games) choke on super-fast input. Lower the typing speed in Settings to ~1200 CPM. * **No Audio**: The agent listens to the **Default Communication Device**. Check your Windows Sound Control Panel. @@ -111,10 +111,36 @@ Select the model that aligns with your hardware capabilities. The engine supports 99 languages. You can lock the engine to a specific language in Settings to improve accuracy, or leave it on **Auto-Detect** for multilingual usage. -Afrikaans, Albanian, Amharic, Arabic, Armenian, Assamese, Azerbaijani, Bashkir, Basque, Belarusian, Bengali, Bosnian, Breton, Bulgarian, Burmese, Castilian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Faroese, Finnish, Flemish, French, Galician, Georgian, German, Greek, Gujarati, Haitian, Hausa, Hawaiian, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Javanese, Kannada, Kazakh, Khmer, Korean, Lao, Latin, Latvian, Lingala, Lithuanian, Luxembourgish, Macedonian, Malagasy, Malay, Malayalam, Maltese, Maori, Marathi, Moldavian, Mongolian, Myanmar, Nepali, Norwegian, Occitan, Panjabi, Pashto, Persian, Polish, Portuguese, Punjabi, Romanian, Russian, Sanskrit, Serbian, Shona, Sindhi, Sinhala, Slovak, Slovenian, Somali, Spanish, Sundanese, Swahili, Swedish, Tagalog, Tajik, Tamil, Tatar, Telugu, Thai, Tibetan, Turkish, Turkmen, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yiddish, Yoruba. +([See full language list below](#full-language-list)) --- +## 🌐 Full Language List + +| | | | | | +| :--- | :--- | :--- | :--- | :--- | +| Afrikaans đŸ‡ŋđŸ‡Ļ | Albanian đŸ‡Ļ🇱 | Amharic đŸ‡Ē🇹 | Arabic 🇸đŸ‡Ļ | Armenian đŸ‡Ļ🇲 | +| Assamese đŸ‡ŽđŸ‡ŗ | Azerbaijani đŸ‡ĻđŸ‡ŋ | Bashkir 🇷đŸ‡ē | Basque đŸ‡Ē🇸 | Belarusian 🇧🇾 | +| Bengali 🇧🇩 | Bosnian 🇧đŸ‡Ļ | Breton đŸ‡Ģ🇷 | Bulgarian 🇧đŸ‡Ŧ | Burmese 🇲🇲 | +| Castilian đŸ‡Ē🇸 | Catalan đŸ‡Ē🇸 | Chinese đŸ‡¨đŸ‡ŗ | Croatian 🇭🇷 | Czech 🇨đŸ‡ŋ | +| Danish 🇩🇰 | Dutch đŸ‡ŗđŸ‡ą | English đŸ‡ē🇸 | Estonian đŸ‡ĒđŸ‡Ē | Faroese đŸ‡Ģ🇴 | +| Finnish đŸ‡Ģ🇮 | Flemish 🇧đŸ‡Ē | French đŸ‡Ģ🇷 | Galician đŸ‡Ē🇸 | Georgian đŸ‡ŦđŸ‡Ē | +| German 🇩đŸ‡Ē | Greek đŸ‡Ŧ🇷 | Gujarati đŸ‡ŽđŸ‡ŗ | Haitian 🇭🇹 | Hausa đŸ‡ŗđŸ‡Ŧ | +| Hawaiian đŸ‡ē🇸 | Hebrew 🇮🇱 | Hindi đŸ‡ŽđŸ‡ŗ | Hungarian 🇭đŸ‡ē | Icelandic 🇮🇸 | +| Indonesian 🇮🇩 | Italian 🇮🇹 | Japanese đŸ‡¯đŸ‡ĩ | Javanese 🇮🇩 | Kannada đŸ‡ŽđŸ‡ŗ | +| Kazakh 🇰đŸ‡ŋ | Khmer 🇰🇭 | Korean 🇰🇷 | Lao 🇱đŸ‡Ļ | Latin đŸ‡ģđŸ‡Ļ | +| Latvian 🇱đŸ‡ģ | Lingala 🇨🇩 | Lithuanian 🇱🇹 | Luxembourgish 🇱đŸ‡ē | Macedonian 🇲🇰 | +| Malagasy 🇲đŸ‡Ŧ | Malay 🇲🇾 | Malayalam đŸ‡ŽđŸ‡ŗ | Maltese 🇲🇹 | Maori đŸ‡ŗđŸ‡ŋ | +| Marathi đŸ‡ŽđŸ‡ŗ | Moldavian 🇲🇩 | Mongolian đŸ‡˛đŸ‡ŗ | Myanmar 🇲🇲 | Nepali đŸ‡ŗđŸ‡ĩ | +| Norwegian đŸ‡ŗđŸ‡´ | Occitan đŸ‡Ģ🇷 | Panjabi đŸ‡ŽđŸ‡ŗ | Pashto đŸ‡ĻđŸ‡Ģ | Persian 🇮🇷 | +| Polish đŸ‡ĩ🇱 | Portuguese đŸ‡ĩ🇹 | Punjabi đŸ‡ŽđŸ‡ŗ | Romanian 🇷🇴 | Russian 🇷đŸ‡ē | +| Sanskrit đŸ‡ŽđŸ‡ŗ | Serbian 🇷🇸 | Shona đŸ‡ŋđŸ‡ŧ | Sindhi đŸ‡ĩ🇰 | Sinhala 🇱🇰 | +| Slovak 🇸🇰 | Slovenian 🇸🇮 | Somali 🇸🇴 | Spanish đŸ‡Ē🇸 | Sundanese 🇮🇩 | +| Swahili 🇰đŸ‡Ē | Swedish 🇸đŸ‡Ē | Tagalog đŸ‡ĩ🇭 | Tajik đŸ‡šđŸ‡¯ | Tamil đŸ‡ŽđŸ‡ŗ | +| Tatar 🇷đŸ‡ē | Telugu đŸ‡ŽđŸ‡ŗ | Thai 🇹🇭 | Tibetan đŸ‡¨đŸ‡ŗ | Turkish 🇹🇷 | +| Turkmen 🇹🇲 | Ukrainian đŸ‡ēđŸ‡Ļ | Urdu đŸ‡ĩ🇰 | Uzbek đŸ‡ēđŸ‡ŋ | Vietnamese đŸ‡ģe | +| Welsh đŸ´ķ §ķ ĸķ ˇķ Ŧķ ŗķ ŋ | Yiddish 🇮🇱 | Yoruba đŸ‡ŗđŸ‡Ŧ | | | +
### âš–ī¸ PUBLIC DOMAIN (CC0 1.0) diff --git a/main.py b/main.py index bc884f8..2d543b0 100644 --- a/main.py +++ b/main.py @@ -87,7 +87,7 @@ def _silent_shutdown_hook(exc_type, exc_value, exc_tb): sys.excepthook = _silent_shutdown_hook class DownloadWorker(QThread): - """Background worker for model downloads.""" + """Background worker for model downloads with REAL progress.""" progress = Signal(int) finished = Signal() error = Signal(str) @@ -98,20 +98,73 @@ class DownloadWorker(QThread): def run(self): try: - from faster_whisper import download_model + import requests + from tqdm import tqdm model_path = get_models_path() - # Download to a specific subdirectory to keep things clean and predictable - # This matches the logic in transcriber.py which looks for this specific path dest_dir = model_path / f"faster-whisper-{self.model_name}" - logging.info(f"Downloading Model '{self.model_name}' to {dest_dir}...") + dest_dir.mkdir(parents=True, exist_ok=True) - # Ensure parent exists - model_path.mkdir(parents=True, exist_ok=True) + # Files to download for a standard faster-whisper model + # We map local filenames to HF repo filenames + repo_id = f"Systran/faster-whisper-{self.model_name}" + files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"] - # output_dir in download_model specifies where the model files are saved - download_model(self.model_name, output_dir=str(dest_dir)) + # Check if Preprocessor config exists (sometimes it does, usually optional for whisper?) + # We'll stick to the core 4. + base_url = f"https://huggingface.co/{repo_id}/resolve/main" + + logging.info(f"Downloading {self.model_name} from {base_url}...") + + # 1. Calculate Total Size + total_size = 0 + file_sizes = {} + + with requests.Session() as s: + for fname in files: + url = f"{base_url}/{fname}" + head = s.head(url, allow_redirects=True) + if head.status_code == 200: + size = int(head.headers.get('content-length', 0)) + file_sizes[fname] = size + total_size += size + else: + # Fallback for vocabulary.json vs vocabulary.txt + if fname == "vocabulary.json": + # Try .txt? Or just skip if not found? + # Faster-whisper usually has vocabulary.json + pass + + # 2. Download loop + downloaded_bytes = 0 + + with requests.Session() as s: + for fname in files: + if fname not in file_sizes: continue + + url = f"{base_url}/{fname}" + dest_file = dest_dir / fname + + # Resume check? + # Simpler to just overwrite for reliability unless we want complex resume logic. + # We'll overwrite. + + resp = s.get(url, stream=True) + resp.raise_for_status() + + with open(dest_file, 'wb') as f: + for chunk in resp.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + downloaded_bytes += len(chunk) + + # Emit Progress + if total_size > 0: + pct = int((downloaded_bytes / total_size) * 100) + self.progress.emit(pct) + self.finished.emit() + except Exception as e: logging.error(f"Download failed: {e}") self.error.emit(str(e)) diff --git a/src/core/transcriber.py b/src/core/transcriber.py index 42b958e..ccd346b 100644 --- a/src/core/transcriber.py +++ b/src/core/transcriber.py @@ -94,20 +94,59 @@ class WhisperTranscriber: language = self.config.get("language") # Use task override if provided, otherwise config - final_task = task if task else self.config.get("task") + # Ensure safe string and lowercase ("transcribe" vs "Transcribe") + raw_task = task if task else self.config.get("task") + final_task = str(raw_task).strip().lower() if raw_task else "transcribe" + + # Sanity check for valid Whisper tasks + if final_task not in ["transcribe", "translate"]: + logging.warning(f"Invalid task '{final_task}' detected. Defaulting to 'transcribe'.") + final_task = "transcribe" + + # Language handling + final_language = language if language != "auto" else None + + # Anti-Hallucination: Force condition_on_previous_text=False for translation + condition_prev = self.config.get("condition_on_previous_text") + + # Helper options for Translation Stability + initial_prompt = self.config.get("initial_prompt") + + if final_task == "translate": + condition_prev = False + # Force beam search if user has set it to greedy (1) + # Translation requires more search breadth to find the English mapping + if beam_size < 5: + logging.info("Forcing beam_size=5 for Translation task.") + beam_size = 5 + + # Inject guidance prompt if none exists + if not initial_prompt: + initial_prompt = "Translate this to English." + + logging.info(f"Model Dispatch: Task='{final_task}', Language='{final_language}', ConditionPrev={condition_prev}, Beam={beam_size}") + + # Build arguments dynamically to avoid passing None if that's the issue + transcribe_opts = { + "beam_size": beam_size, + "best_of": best_of, + "vad_filter": vad, + "task": final_task, + "vad_parameters": dict(min_silence_duration_ms=500), + "condition_on_previous_text": condition_prev, + "without_timestamps": True + } + + if initial_prompt: + transcribe_opts["initial_prompt"] = initial_prompt + + # Only add language if it's explicitly set (not None/Auto) + # This avoids potentially confusing the model with explicit None + if final_language: + transcribe_opts["language"] = final_language # Transcribe - segments, info = self.model.transcribe( - audio_data, - beam_size=beam_size, - best_of=best_of, - vad_filter=vad, - task=final_task, - language=language if language != "auto" else None, - vad_parameters=dict(min_silence_duration_ms=500), - condition_on_previous_text=self.config.get("condition_on_previous_text"), - without_timestamps=True - ) + segments, info = self.model.transcribe(audio_data, **transcribe_opts) # Aggregate text text_result = "" diff --git a/src/utils/window_hook.py b/src/utils/window_hook.py index d43f5c0..d408b6c 100644 --- a/src/utils/window_hook.py +++ b/src/utils/window_hook.py @@ -55,6 +55,10 @@ except AttributeError: def LOWORD(l): return l & 0xffff def HIWORD(l): return (l >> 16) & 0xffff +GWL_EXSTYLE = -20 +WS_EX_TRANSPARENT = 0x00000020 +WS_EX_LAYERED = 0x00080000 + class WindowHook: def __init__(self, hwnd, width, height, initial_scale=1.0): self.hwnd = hwnd @@ -68,7 +72,31 @@ class WindowHook: self.enabled = True # New flag def set_enabled(self, enabled): + """ + Enables or disables interaction. + When disabled, we set WS_EX_TRANSPARENT so clicks pass through physically. + """ + if self.enabled == enabled: + return + self.enabled = enabled + + # Get current styles + style = user32.GetWindowLongW(self.hwnd, GWL_EXSTYLE) + + if not enabled: + # Enable Click-Through (Add Transparent) + # We also ensure Layered is set (Qt usually sets it, but good to be sure) + new_style = style | WS_EX_TRANSPARENT | WS_EX_LAYERED + else: + # Disable Click-Through (Remove Transparent) + new_style = style & ~WS_EX_TRANSPARENT + + if new_style != style: + SetWindowLongPtr(self.hwnd, GWL_EXSTYLE, new_style) + + # Force a redraw/frame update just in case + user32.SetWindowPos(self.hwnd, 0, 0, 0, 0, 0, 0x0027) # SWP_NOMOVE | SWP_NOSIZE | SWP_NOZORDER | SWP_FRAMECHANGED def install(self): proc_address = ctypes.cast(self.new_wnd_proc, ctypes.c_void_p)