Fix: Translation Reliability, Click-Through, and Docs Sync
- Transcriber: Enforced 'beam_size=5' and prompt injection for robust translation. - Transcriber: Removed conditioning on previous text to prevent language stickiness. - Transcriber: Refactored kwargs to sanitize inputs. - Overlay: Fixed click-through by toggling WS_EX_TRANSPARENT. - UI: Added real download progress reporting. - Docs: Refactored language list to table.
This commit is contained in:
@@ -94,20 +94,59 @@ class WhisperTranscriber:
|
||||
language = self.config.get("language")
|
||||
|
||||
# Use task override if provided, otherwise config
|
||||
final_task = task if task else self.config.get("task")
|
||||
# Ensure safe string and lowercase ("transcribe" vs "Transcribe")
|
||||
raw_task = task if task else self.config.get("task")
|
||||
final_task = str(raw_task).strip().lower() if raw_task else "transcribe"
|
||||
|
||||
# Sanity check for valid Whisper tasks
|
||||
if final_task not in ["transcribe", "translate"]:
|
||||
logging.warning(f"Invalid task '{final_task}' detected. Defaulting to 'transcribe'.")
|
||||
final_task = "transcribe"
|
||||
|
||||
# Language handling
|
||||
final_language = language if language != "auto" else None
|
||||
|
||||
# Anti-Hallucination: Force condition_on_previous_text=False for translation
|
||||
condition_prev = self.config.get("condition_on_previous_text")
|
||||
|
||||
# Helper options for Translation Stability
|
||||
initial_prompt = self.config.get("initial_prompt")
|
||||
|
||||
if final_task == "translate":
|
||||
condition_prev = False
|
||||
# Force beam search if user has set it to greedy (1)
|
||||
# Translation requires more search breadth to find the English mapping
|
||||
if beam_size < 5:
|
||||
logging.info("Forcing beam_size=5 for Translation task.")
|
||||
beam_size = 5
|
||||
|
||||
# Inject guidance prompt if none exists
|
||||
if not initial_prompt:
|
||||
initial_prompt = "Translate this to English."
|
||||
|
||||
logging.info(f"Model Dispatch: Task='{final_task}', Language='{final_language}', ConditionPrev={condition_prev}, Beam={beam_size}")
|
||||
|
||||
# Build arguments dynamically to avoid passing None if that's the issue
|
||||
transcribe_opts = {
|
||||
"beam_size": beam_size,
|
||||
"best_of": best_of,
|
||||
"vad_filter": vad,
|
||||
"task": final_task,
|
||||
"vad_parameters": dict(min_silence_duration_ms=500),
|
||||
"condition_on_previous_text": condition_prev,
|
||||
"without_timestamps": True
|
||||
}
|
||||
|
||||
if initial_prompt:
|
||||
transcribe_opts["initial_prompt"] = initial_prompt
|
||||
|
||||
# Only add language if it's explicitly set (not None/Auto)
|
||||
# This avoids potentially confusing the model with explicit None
|
||||
if final_language:
|
||||
transcribe_opts["language"] = final_language
|
||||
|
||||
# Transcribe
|
||||
segments, info = self.model.transcribe(
|
||||
audio_data,
|
||||
beam_size=beam_size,
|
||||
best_of=best_of,
|
||||
vad_filter=vad,
|
||||
task=final_task,
|
||||
language=language if language != "auto" else None,
|
||||
vad_parameters=dict(min_silence_duration_ms=500),
|
||||
condition_on_previous_text=self.config.get("condition_on_previous_text"),
|
||||
without_timestamps=True
|
||||
)
|
||||
segments, info = self.model.transcribe(audio_data, **transcribe_opts)
|
||||
|
||||
# Aggregate text
|
||||
text_result = ""
|
||||
|
||||
@@ -55,6 +55,10 @@ except AttributeError:
|
||||
def LOWORD(l): return l & 0xffff
|
||||
def HIWORD(l): return (l >> 16) & 0xffff
|
||||
|
||||
GWL_EXSTYLE = -20
|
||||
WS_EX_TRANSPARENT = 0x00000020
|
||||
WS_EX_LAYERED = 0x00080000
|
||||
|
||||
class WindowHook:
|
||||
def __init__(self, hwnd, width, height, initial_scale=1.0):
|
||||
self.hwnd = hwnd
|
||||
@@ -68,7 +72,31 @@ class WindowHook:
|
||||
self.enabled = True # New flag
|
||||
|
||||
def set_enabled(self, enabled):
|
||||
"""
|
||||
Enables or disables interaction.
|
||||
When disabled, we set WS_EX_TRANSPARENT so clicks pass through physically.
|
||||
"""
|
||||
if self.enabled == enabled:
|
||||
return
|
||||
|
||||
self.enabled = enabled
|
||||
|
||||
# Get current styles
|
||||
style = user32.GetWindowLongW(self.hwnd, GWL_EXSTYLE)
|
||||
|
||||
if not enabled:
|
||||
# Enable Click-Through (Add Transparent)
|
||||
# We also ensure Layered is set (Qt usually sets it, but good to be sure)
|
||||
new_style = style | WS_EX_TRANSPARENT | WS_EX_LAYERED
|
||||
else:
|
||||
# Disable Click-Through (Remove Transparent)
|
||||
new_style = style & ~WS_EX_TRANSPARENT
|
||||
|
||||
if new_style != style:
|
||||
SetWindowLongPtr(self.hwnd, GWL_EXSTYLE, new_style)
|
||||
|
||||
# Force a redraw/frame update just in case
|
||||
user32.SetWindowPos(self.hwnd, 0, 0, 0, 0, 0, 0x0027) # SWP_NOMOVE | SWP_NOSIZE | SWP_NOZORDER | SWP_FRAMECHANGED
|
||||
|
||||
def install(self):
|
||||
proc_address = ctypes.cast(self.new_wnd_proc, ctypes.c_void_p)
|
||||
|
||||
Reference in New Issue
Block a user