Fix: Translation Reliability, Click-Through, and Docs Sync

- Transcriber: Enforced 'beam_size=5' and prompt injection for robust translation. - Transcriber: Removed conditioning on previous text to prevent language stickiness. - Transcriber: Refactored kwargs to sanitize inputs. - Overlay: Fixed click-through by toggling WS_EX_TRANSPARENT. - UI: Added real download progress reporting. - Docs: Refactored language list to table.
2026-01-24 19:05:43 +02:00
parent f3bf7541cf
commit 0b2b5848e2
4 changed files with 169 additions and 23 deletions
--- a/main.py
+++ b/main.py
@@ -87,7 +87,7 @@ def _silent_shutdown_hook(exc_type, exc_value, exc_tb):
 sys.excepthook = _silent_shutdown_hook

 class DownloadWorker(QThread):
-    """Background worker for model downloads."""
+    """Background worker for model downloads with REAL progress."""
    progress = Signal(int)
    finished = Signal()
    error = Signal(str)
@@ -98,20 +98,73 @@ class DownloadWorker(QThread):

    def run(self):
        try:
-            from faster_whisper import download_model
+            import requests
+            from tqdm import tqdm
            model_path = get_models_path()
-            # Download to a specific subdirectory to keep things clean and predictable
-            # This matches the logic in transcriber.py which looks for this specific path
            dest_dir = model_path / f"faster-whisper-{self.model_name}"
-            logging.info(f"Downloading Model '{self.model_name}' to {dest_dir}...")
+            dest_dir.mkdir(parents=True, exist_ok=True)
            
-            # Ensure parent exists
-            model_path.mkdir(parents=True, exist_ok=True)
+            # Files to download for a standard faster-whisper model
+            # We map local filenames to HF repo filenames
+            repo_id = f"Systran/faster-whisper-{self.model_name}"
+            files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"]
            
-            # output_dir in download_model specifies where the model files are saved
-            download_model(self.model_name, output_dir=str(dest_dir))
+            # Check if Preprocessor config exists (sometimes it does, usually optional for whisper?)
+            # We'll stick to the core 4.
            
+            base_url = f"https://huggingface.co/{repo_id}/resolve/main"
+            
+            logging.info(f"Downloading {self.model_name} from {base_url}...")
+            
+            # 1. Calculate Total Size
+            total_size = 0
+            file_sizes = {}
+            
+            with requests.Session() as s:
+                for fname in files:
+                    url = f"{base_url}/{fname}"
+                    head = s.head(url, allow_redirects=True)
+                    if head.status_code == 200:
+                        size = int(head.headers.get('content-length', 0))
+                        file_sizes[fname] = size
+                        total_size += size
+                    else:
+                        # Fallback for vocabulary.json vs vocabulary.txt
+                        if fname == "vocabulary.json":
+                             # Try .txt? Or just skip if not found? 
+                             # Faster-whisper usually has vocabulary.json
+                             pass
+            
+            # 2. Download loop
+            downloaded_bytes = 0
+            
+            with requests.Session() as s:
+                for fname in files:
+                    if fname not in file_sizes: continue
+                    
+                    url = f"{base_url}/{fname}"
+                    dest_file = dest_dir / fname
+                    
+                    # Resume check? 
+                    # Simpler to just overwrite for reliability unless we want complex resume logic.
+                    # We'll overwrite.
+                    
+                    resp = s.get(url, stream=True)
+                    resp.raise_for_status()
+                    
+                    with open(dest_file, 'wb') as f:
+                        for chunk in resp.iter_content(chunk_size=8192):
+                            if chunk:
+                                f.write(chunk)
+                                downloaded_bytes += len(chunk)
+                                
+                                # Emit Progress
+                                if total_size > 0:
+                                    pct = int((downloaded_bytes / total_size) * 100)
+                                    self.progress.emit(pct)
+
            self.finished.emit()
+            
        except Exception as e:
            logging.error(f"Download failed: {e}")
            self.error.emit(str(e))