diff --git a/README.md b/README.md
index d33994e..2675f82 100644
--- a/README.md
+++ b/README.md
@@ -100,7 +100,7 @@ Select the model that aligns with your hardware capabilities.
3. **Bootstrap**: Run it. The agent will self-provision an isolated Python environment (~2GB) on first launch.
4. **Updates**: Simply replace the `.exe`. The **Smart Bootstrapper** will detect the update and sync only the changed files, preserving your settings and skipping unnecessary downloads.
-### īŋŊ Troubleshooting
+### đ§ Troubleshooting
* **App crashes on start**: Ensure you have [Microsoft Visual C++ Redistributable 2015-2022](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist) installed.
* **"Simulate Typing" is slow**: Some applications (remote desktops, older games) choke on super-fast input. Lower the typing speed in Settings to ~1200 CPM.
* **No Audio**: The agent listens to the **Default Communication Device**. Check your Windows Sound Control Panel.
@@ -111,10 +111,36 @@ Select the model that aligns with your hardware capabilities.
The engine supports 99 languages. You can lock the engine to a specific language in Settings to improve accuracy, or leave it on **Auto-Detect** for multilingual usage.
-Afrikaans, Albanian, Amharic, Arabic, Armenian, Assamese, Azerbaijani, Bashkir, Basque, Belarusian, Bengali, Bosnian, Breton, Bulgarian, Burmese, Castilian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Faroese, Finnish, Flemish, French, Galician, Georgian, German, Greek, Gujarati, Haitian, Hausa, Hawaiian, Hebrew, Hindi, Hungarian, Icelandic, Indonesian, Italian, Japanese, Javanese, Kannada, Kazakh, Khmer, Korean, Lao, Latin, Latvian, Lingala, Lithuanian, Luxembourgish, Macedonian, Malagasy, Malay, Malayalam, Maltese, Maori, Marathi, Moldavian, Mongolian, Myanmar, Nepali, Norwegian, Occitan, Panjabi, Pashto, Persian, Polish, Portuguese, Punjabi, Romanian, Russian, Sanskrit, Serbian, Shona, Sindhi, Sinhala, Slovak, Slovenian, Somali, Spanish, Sundanese, Swahili, Swedish, Tagalog, Tajik, Tamil, Tatar, Telugu, Thai, Tibetan, Turkish, Turkmen, Ukrainian, Urdu, Uzbek, Vietnamese, Welsh, Yiddish, Yoruba.
+([See full language list below](#full-language-list))
---
+## đ Full Language List
+
+| | | | | |
+| :--- | :--- | :--- | :--- | :--- |
+| Afrikaans đŋđĻ | Albanian đĻđą | Amharic đĒđš | Arabic đ¸đĻ | Armenian đĻđ˛ |
+| Assamese đŽđŗ | Azerbaijani đĻđŋ | Bashkir đˇđē | Basque đĒđ¸ | Belarusian đ§đž |
+| Bengali đ§đŠ | Bosnian đ§đĻ | Breton đĢđˇ | Bulgarian đ§đŦ | Burmese đ˛đ˛ |
+| Castilian đĒđ¸ | Catalan đĒđ¸ | Chinese đ¨đŗ | Croatian đđˇ | Czech đ¨đŋ |
+| Danish đŠđ° | Dutch đŗđą | English đēđ¸ | Estonian đĒđĒ | Faroese đĢđ´ |
+| Finnish đĢđŽ | Flemish đ§đĒ | French đĢđˇ | Galician đĒđ¸ | Georgian đŦđĒ |
+| German đŠđĒ | Greek đŦđˇ | Gujarati đŽđŗ | Haitian đđš | Hausa đŗđŦ |
+| Hawaiian đēđ¸ | Hebrew đŽđą | Hindi đŽđŗ | Hungarian đđē | Icelandic đŽđ¸ |
+| Indonesian đŽđŠ | Italian đŽđš | Japanese đ¯đĩ | Javanese đŽđŠ | Kannada đŽđŗ |
+| Kazakh đ°đŋ | Khmer đ°đ | Korean đ°đˇ | Lao đąđĻ | Latin đģđĻ |
+| Latvian đąđģ | Lingala đ¨đŠ | Lithuanian đąđš | Luxembourgish đąđē | Macedonian đ˛đ° |
+| Malagasy đ˛đŦ | Malay đ˛đž | Malayalam đŽđŗ | Maltese đ˛đš | Maori đŗđŋ |
+| Marathi đŽđŗ | Moldavian đ˛đŠ | Mongolian đ˛đŗ | Myanmar đ˛đ˛ | Nepali đŗđĩ |
+| Norwegian đŗđ´ | Occitan đĢđˇ | Panjabi đŽđŗ | Pashto đĻđĢ | Persian đŽđˇ |
+| Polish đĩđą | Portuguese đĩđš | Punjabi đŽđŗ | Romanian đˇđ´ | Russian đˇđē |
+| Sanskrit đŽđŗ | Serbian đˇđ¸ | Shona đŋđŧ | Sindhi đĩđ° | Sinhala đąđ° |
+| Slovak đ¸đ° | Slovenian đ¸đŽ | Somali đ¸đ´ | Spanish đĒđ¸ | Sundanese đŽđŠ |
+| Swahili đ°đĒ | Swedish đ¸đĒ | Tagalog đĩđ | Tajik đšđ¯ | Tamil đŽđŗ |
+| Tatar đˇđē | Telugu đŽđŗ | Thai đšđ | Tibetan đ¨đŗ | Turkish đšđˇ |
+| Turkmen đšđ˛ | Ukrainian đēđĻ | Urdu đĩđ° | Uzbek đēđŋ | Vietnamese đģe |
+| Welsh đ´ķ §ķ ĸķ ˇķ Ŧķ ŗķ ŋ | Yiddish đŽđą | Yoruba đŗđŦ | | |
+
### âī¸ PUBLIC DOMAIN (CC0 1.0)
diff --git a/main.py b/main.py
index bc884f8..2d543b0 100644
--- a/main.py
+++ b/main.py
@@ -87,7 +87,7 @@ def _silent_shutdown_hook(exc_type, exc_value, exc_tb):
sys.excepthook = _silent_shutdown_hook
class DownloadWorker(QThread):
- """Background worker for model downloads."""
+ """Background worker for model downloads with REAL progress."""
progress = Signal(int)
finished = Signal()
error = Signal(str)
@@ -98,20 +98,73 @@ class DownloadWorker(QThread):
def run(self):
try:
- from faster_whisper import download_model
+ import requests
+ from tqdm import tqdm
model_path = get_models_path()
- # Download to a specific subdirectory to keep things clean and predictable
- # This matches the logic in transcriber.py which looks for this specific path
dest_dir = model_path / f"faster-whisper-{self.model_name}"
- logging.info(f"Downloading Model '{self.model_name}' to {dest_dir}...")
+ dest_dir.mkdir(parents=True, exist_ok=True)
- # Ensure parent exists
- model_path.mkdir(parents=True, exist_ok=True)
+ # Files to download for a standard faster-whisper model
+ # We map local filenames to HF repo filenames
+ repo_id = f"Systran/faster-whisper-{self.model_name}"
+ files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"]
- # output_dir in download_model specifies where the model files are saved
- download_model(self.model_name, output_dir=str(dest_dir))
+ # Check if Preprocessor config exists (sometimes it does, usually optional for whisper?)
+ # We'll stick to the core 4.
+ base_url = f"https://huggingface.co/{repo_id}/resolve/main"
+
+ logging.info(f"Downloading {self.model_name} from {base_url}...")
+
+ # 1. Calculate Total Size
+ total_size = 0
+ file_sizes = {}
+
+ with requests.Session() as s:
+ for fname in files:
+ url = f"{base_url}/{fname}"
+ head = s.head(url, allow_redirects=True)
+ if head.status_code == 200:
+ size = int(head.headers.get('content-length', 0))
+ file_sizes[fname] = size
+ total_size += size
+ else:
+ # Fallback for vocabulary.json vs vocabulary.txt
+ if fname == "vocabulary.json":
+ # Try .txt? Or just skip if not found?
+ # Faster-whisper usually has vocabulary.json
+ pass
+
+ # 2. Download loop
+ downloaded_bytes = 0
+
+ with requests.Session() as s:
+ for fname in files:
+ if fname not in file_sizes: continue
+
+ url = f"{base_url}/{fname}"
+ dest_file = dest_dir / fname
+
+ # Resume check?
+ # Simpler to just overwrite for reliability unless we want complex resume logic.
+ # We'll overwrite.
+
+ resp = s.get(url, stream=True)
+ resp.raise_for_status()
+
+ with open(dest_file, 'wb') as f:
+ for chunk in resp.iter_content(chunk_size=8192):
+ if chunk:
+ f.write(chunk)
+ downloaded_bytes += len(chunk)
+
+ # Emit Progress
+ if total_size > 0:
+ pct = int((downloaded_bytes / total_size) * 100)
+ self.progress.emit(pct)
+
self.finished.emit()
+
except Exception as e:
logging.error(f"Download failed: {e}")
self.error.emit(str(e))
diff --git a/src/core/transcriber.py b/src/core/transcriber.py
index 42b958e..ccd346b 100644
--- a/src/core/transcriber.py
+++ b/src/core/transcriber.py
@@ -94,20 +94,59 @@ class WhisperTranscriber:
language = self.config.get("language")
# Use task override if provided, otherwise config
- final_task = task if task else self.config.get("task")
+ # Ensure safe string and lowercase ("transcribe" vs "Transcribe")
+ raw_task = task if task else self.config.get("task")
+ final_task = str(raw_task).strip().lower() if raw_task else "transcribe"
+
+ # Sanity check for valid Whisper tasks
+ if final_task not in ["transcribe", "translate"]:
+ logging.warning(f"Invalid task '{final_task}' detected. Defaulting to 'transcribe'.")
+ final_task = "transcribe"
+
+ # Language handling
+ final_language = language if language != "auto" else None
+
+ # Anti-Hallucination: Force condition_on_previous_text=False for translation
+ condition_prev = self.config.get("condition_on_previous_text")
+
+ # Helper options for Translation Stability
+ initial_prompt = self.config.get("initial_prompt")
+
+ if final_task == "translate":
+ condition_prev = False
+ # Force beam search if user has set it to greedy (1)
+ # Translation requires more search breadth to find the English mapping
+ if beam_size < 5:
+ logging.info("Forcing beam_size=5 for Translation task.")
+ beam_size = 5
+
+ # Inject guidance prompt if none exists
+ if not initial_prompt:
+ initial_prompt = "Translate this to English."
+
+ logging.info(f"Model Dispatch: Task='{final_task}', Language='{final_language}', ConditionPrev={condition_prev}, Beam={beam_size}")
+
+ # Build arguments dynamically to avoid passing None if that's the issue
+ transcribe_opts = {
+ "beam_size": beam_size,
+ "best_of": best_of,
+ "vad_filter": vad,
+ "task": final_task,
+ "vad_parameters": dict(min_silence_duration_ms=500),
+ "condition_on_previous_text": condition_prev,
+ "without_timestamps": True
+ }
+
+ if initial_prompt:
+ transcribe_opts["initial_prompt"] = initial_prompt
+
+ # Only add language if it's explicitly set (not None/Auto)
+ # This avoids potentially confusing the model with explicit None
+ if final_language:
+ transcribe_opts["language"] = final_language
# Transcribe
- segments, info = self.model.transcribe(
- audio_data,
- beam_size=beam_size,
- best_of=best_of,
- vad_filter=vad,
- task=final_task,
- language=language if language != "auto" else None,
- vad_parameters=dict(min_silence_duration_ms=500),
- condition_on_previous_text=self.config.get("condition_on_previous_text"),
- without_timestamps=True
- )
+ segments, info = self.model.transcribe(audio_data, **transcribe_opts)
# Aggregate text
text_result = ""
diff --git a/src/utils/window_hook.py b/src/utils/window_hook.py
index d43f5c0..d408b6c 100644
--- a/src/utils/window_hook.py
+++ b/src/utils/window_hook.py
@@ -55,6 +55,10 @@ except AttributeError:
def LOWORD(l): return l & 0xffff
def HIWORD(l): return (l >> 16) & 0xffff
+GWL_EXSTYLE = -20
+WS_EX_TRANSPARENT = 0x00000020
+WS_EX_LAYERED = 0x00080000
+
class WindowHook:
def __init__(self, hwnd, width, height, initial_scale=1.0):
self.hwnd = hwnd
@@ -68,7 +72,31 @@ class WindowHook:
self.enabled = True # New flag
def set_enabled(self, enabled):
+ """
+ Enables or disables interaction.
+ When disabled, we set WS_EX_TRANSPARENT so clicks pass through physically.
+ """
+ if self.enabled == enabled:
+ return
+
self.enabled = enabled
+
+ # Get current styles
+ style = user32.GetWindowLongW(self.hwnd, GWL_EXSTYLE)
+
+ if not enabled:
+ # Enable Click-Through (Add Transparent)
+ # We also ensure Layered is set (Qt usually sets it, but good to be sure)
+ new_style = style | WS_EX_TRANSPARENT | WS_EX_LAYERED
+ else:
+ # Disable Click-Through (Remove Transparent)
+ new_style = style & ~WS_EX_TRANSPARENT
+
+ if new_style != style:
+ SetWindowLongPtr(self.hwnd, GWL_EXSTYLE, new_style)
+
+ # Force a redraw/frame update just in case
+ user32.SetWindowPos(self.hwnd, 0, 0, 0, 0, 0, 0x0027) # SWP_NOMOVE | SWP_NOSIZE | SWP_NOZORDER | SWP_FRAMECHANGED
def install(self):
proc_address = ctypes.cast(self.new_wnd_proc, ctypes.c_void_p)