Fix: Translation Reliability, Click-Through, and Docs Sync

- Transcriber: Enforced 'beam_size=5' and prompt injection for robust translation. - Transcriber: Removed conditioning on previous text to prevent language stickiness. - Transcriber: Refactored kwargs to sanitize inputs. - Overlay: Fixed click-through by toggling WS_EX_TRANSPARENT. - UI: Added real download progress reporting. - Docs: Refactored language list to table.
2026-01-24 19:05:43 +02:00
parent f3bf7541cf
commit 0b2b5848e2
4 changed files with 169 additions and 23 deletions
@@ -94,20 +94,59 @@ class WhisperTranscriber:
            language = self.config.get("language")
            
            # Use task override if provided, otherwise config
-            final_task = task if task else self.config.get("task")
+            # Ensure safe string and lowercase ("transcribe" vs "Transcribe")
+            raw_task = task if task else self.config.get("task")
+            final_task = str(raw_task).strip().lower() if raw_task else "transcribe"
+            
+            # Sanity check for valid Whisper tasks
+            if final_task not in ["transcribe", "translate"]:
+                logging.warning(f"Invalid task '{final_task}' detected. Defaulting to 'transcribe'.")
+                final_task = "transcribe"
+            
+            # Language handling
+            final_language = language if language != "auto" else None
+            
+            # Anti-Hallucination: Force condition_on_previous_text=False for translation
+            condition_prev = self.config.get("condition_on_previous_text")
+            
+            # Helper options for Translation Stability
+            initial_prompt = self.config.get("initial_prompt")
+            
+            if final_task == "translate":
+                condition_prev = False
+                # Force beam search if user has set it to greedy (1)
+                # Translation requires more search breadth to find the English mapping
+                if beam_size < 5:
+                    logging.info("Forcing beam_size=5 for Translation task.")
+                    beam_size = 5
+                    
+                # Inject guidance prompt if none exists
+                if not initial_prompt:
+                    initial_prompt = "Translate this to English."
+
+            logging.info(f"Model Dispatch: Task='{final_task}', Language='{final_language}', ConditionPrev={condition_prev}, Beam={beam_size}")
+            
+            # Build arguments dynamically to avoid passing None if that's the issue
+            transcribe_opts = {
+                "beam_size": beam_size,
+                "best_of": best_of,
+                "vad_filter": vad,
+                "task": final_task,
+                "vad_parameters": dict(min_silence_duration_ms=500),
+                "condition_on_previous_text": condition_prev,
+                "without_timestamps": True
+            }
+            
+            if initial_prompt:
+                 transcribe_opts["initial_prompt"] = initial_prompt
+            
+            # Only add language if it's explicitly set (not None/Auto)
+            # This avoids potentially confusing the model with explicit None
+            if final_language:
+                transcribe_opts["language"] = final_language
            
            # Transcribe
-            segments, info = self.model.transcribe(
-                audio_data,
-                beam_size=beam_size,
-                best_of=best_of,
-                vad_filter=vad,
-                task=final_task,
-                language=language if language != "auto" else None,
-                vad_parameters=dict(min_silence_duration_ms=500),
-                condition_on_previous_text=self.config.get("condition_on_previous_text"),
-                without_timestamps=True
-            )
+            segments, info = self.model.transcribe(audio_data, **transcribe_opts)
            
            # Aggregate text
            text_result = ""
@@ -55,6 +55,10 @@ except AttributeError:
 def LOWORD(l): return l & 0xffff
 def HIWORD(l): return (l >> 16) & 0xffff

+GWL_EXSTYLE = -20
+WS_EX_TRANSPARENT = 0x00000020
+WS_EX_LAYERED = 0x00080000
+
 class WindowHook:
    def __init__(self, hwnd, width, height, initial_scale=1.0):
        self.hwnd = hwnd
@@ -68,7 +72,31 @@ class WindowHook:
        self.enabled = True # New flag

    def set_enabled(self, enabled):
+        """
+        Enables or disables interaction.
+        When disabled, we set WS_EX_TRANSPARENT so clicks pass through physically.
+        """
+        if self.enabled == enabled:
+            return
+            
        self.enabled = enabled
+        
+        # Get current styles
+        style = user32.GetWindowLongW(self.hwnd, GWL_EXSTYLE)
+        
+        if not enabled:
+            # Enable Click-Through (Add Transparent)
+            # We also ensure Layered is set (Qt usually sets it, but good to be sure)
+            new_style = style | WS_EX_TRANSPARENT | WS_EX_LAYERED
+        else:
+            # Disable Click-Through (Remove Transparent)
+            new_style = style & ~WS_EX_TRANSPARENT
+            
+        if new_style != style:
+            SetWindowLongPtr(self.hwnd, GWL_EXSTYLE, new_style)
+            
+        # Force a redraw/frame update just in case
+        user32.SetWindowPos(self.hwnd, 0, 0, 0, 0, 0, 0x0027) # SWP_NOMOVE | SWP_NOSIZE | SWP_NOZORDER | SWP_FRAMECHANGED

    def install(self):
        proc_address = ctypes.cast(self.new_wnd_proc, ctypes.c_void_p)