v1.0.1 Feature Update and Polish

Full Changelog: [New Features] - Added Native Translation Mode: - Whisper model now fully supports Translating any language to English - Added 'task' and 'language' parameters to Transcriber core - Dual Hotkey Support: - Added separate Global Hotkeys for Transcribe (default F8) and Translate (default F10) - Both hotkeys are fully customizable in Settings - Engine dynamically switches modes based on which key is pressed [UI/UX Improvements] - Settings Window: - Widened Hotkey Input fields (240px) to accommodate long combinations - Added Pretty-Printing for hotkey sequences (e.g. 'ctrl+f9' display as 'Ctrl + F9') - Replaced Country Code dropdown with Full Language Names (99+ languages) - Made Language Dropdown scrollable (max height 300px) to prevent screen overflow - Removed redundant 'Task' selector (replaced by dedicated hotkeys) - System Tray: - Tooltip now displays both Transcribe and Translate hotkeys - Tooltip hotkeys are formatted readably [Core & Performance] - Bootstrapper: - Implemented Smart Incremental Sync - Now checks filesize and content hash before copying files - Drastically reduces startup time for subsequent runs - Preserves user settings.json during updates - Backend: - Fixed HotkeyManager to support dynamic configuration keys - Fixed Language Lock: selecting a language now correctly forces the model to use it - Refactored bridge/main connection for language list handling
2026-01-24 18:29:10 +02:00
parent f184eb0037
commit 4b84a27a67
11 changed files with 342 additions and 72 deletions
@@ -259,48 +259,72 @@ class Bootstrapper:
        process.wait()
        
    def refresh_app_source(self):
-        """Refresh app source files. Skips if already exists to save time."""
-        # Optimization: If app/main.py exists, skip update to improve startup speed.
-        # The user can delete the 'runtime' folder to force an update.
-        if (self.app_path / "main.py").exists():
-            log("App already exists. Skipping update.")
-            return True
-
-        if self.ui: self.ui.set_status("Updating app files...")
+        """
+        Smartly updates app source files by only copying changed files.
+        Preserves user settings and reduces disk I/O.
+        """
+        if self.ui: self.ui.set_status("Checking for updates...")
        
        try:
-            # Preserve settings.json if it exists
-            settings_path = self.app_path / "settings.json"
-            temp_settings = None
-            if settings_path.exists():
-                try:
-                    temp_settings = settings_path.read_bytes()
-                except:
-                    log("Failed to backup settings.json, it involves risk of data loss.")
+            # 1. Ensure destination exists
+            if not self.app_path.exists():
+                self.app_path.mkdir(parents=True, exist_ok=True)
                
-            if self.app_path.exists():
-                shutil.rmtree(self.app_path, ignore_errors=True)
+            # 2. Walk source and sync
+            # source_path is the temporary bundled folder
+            # app_path is the persistent runtime folder
            
-            shutil.copytree(
-                self.source_path,
-                self.app_path,
-                ignore=shutil.ignore_patterns(
-                    '__pycache__', '*.pyc', '.git', 'venv', 
-                    'build', 'dist', '*.egg-info', 'runtime'
-                )
-            )
+            changes_made = 0
            
-            # Restore settings.json
-            if temp_settings:
-                try:
-                    settings_path.write_bytes(temp_settings)
-                    log("Restored settings.json")
-                except:
-                    log("Failed to restore settings.json")
+            for src_dir, dirs, files in os.walk(self.source_path):
+                # Determine relative path from source root
+                rel_path = Path(src_dir).relative_to(self.source_path)
+                dst_dir = self.app_path / rel_path
+                
+                # Ensure directory exists
+                if not dst_dir.exists():
+                    dst_dir.mkdir(parents=True, exist_ok=True)
+                
+                for file in files:
+                    # Skip ignored files
+                    if file in ['__pycache__', '.git', 'settings.json'] or file.endswith('.pyc'):
+                        continue
+                        
+                    src_file = Path(src_dir) / file
+                    dst_file = dst_dir / file
+                    
+                    # Check if update needed
+                    should_copy = False
+                    if not dst_file.exists():
+                        should_copy = True
+                    else:
+                        # Compare size first (fast)
+                        if src_file.stat().st_size != dst_file.stat().st_size:
+                            should_copy = True
+                        else:
+                            # Compare content (slower but accurate)
+                            # Only read if size matches to verify diff
+                            if src_file.read_bytes() != dst_file.read_bytes():
+                                should_copy = True
+                    
+                    if should_copy:
+                        shutil.copy2(src_file, dst_file)
+                        changes_made += 1
+                        if self.ui: self.ui.set_detail(f"Updated: {file}")
+
+            # 3. Cleanup logic (Optional: remove files in dest that are not in source)
+            # For now, we only add/update to prevent deleting generated user files (logs, etc)
+            
+            if changes_made > 0:
+                log(f"Update complete. {changes_made} files changed.")
+            else:
+                log("App is up to date.")
                
            return True
        except Exception as e:
            log(f"Error refreshing app source: {e}")
+            # Fallback to nuclear option if sync fails completely? 
+            # No, 'smart_sync' failing might mean permissions, nuclear wouldn't help.
            return False
        
    def run_app(self):
@@ -118,13 +118,14 @@ class DownloadWorker(QThread):

 class TranscriptionWorker(QThread):
    finished = Signal(str)
-    def __init__(self, transcriber, audio_data, is_file=False, parent=None):
+    def __init__(self, transcriber, audio_data, is_file=False, parent=None, task_override=None):
        super().__init__(parent)
        self.transcriber = transcriber
        self.audio_data = audio_data
        self.is_file = is_file
+        self.task_override = task_override
    def run(self):
-        text = self.transcriber.transcribe(self.audio_data, is_file=self.is_file)
+        text = self.transcriber.transcribe(self.audio_data, is_file=self.is_file, task=self.task_override)
        self.finished.emit(text)

 class WhisperApp(QObject):
@@ -166,13 +167,18 @@ class WhisperApp(QObject):
        self.tray.transcribe_file_requested.connect(self.transcribe_file)
        
        # Init Tooltip
-        hotkey = self.config.get("hotkey")
-        self.tray.setToolTip(f"Whisper Voice - Press {hotkey} to Record")
+        from src.utils.formatters import format_hotkey
+        self.format_hotkey = format_hotkey # Store ref
+        
+        hk1 = self.format_hotkey(self.config.get("hotkey"))
+        hk2 = self.format_hotkey(self.config.get("hotkey_translate"))
+        self.tray.setToolTip(f"Whisper Voice\nTranscribe: {hk1}\nTranslate: {hk2}")
        
        # 3. Logic Components Placeholders
        self.audio_engine = None
        self.transcriber = None
-        self.hotkey_manager = None
+        self.hk_transcribe = None
+        self.hk_translate = None
        self.overlay_root = None
        
        # 4. Start Loader
@@ -266,9 +272,16 @@ class WhisperApp(QObject):
        self.audio_engine.set_visualizer_callback(self.bridge.update_amplitude)
        self.audio_engine.set_silence_callback(self.on_silence_detected)
        self.transcriber = WhisperTranscriber()
-        self.hotkey_manager = HotkeyManager()
-        self.hotkey_manager.triggered.connect(self.toggle_recording)
-        self.hotkey_manager.start()
+        
+        # Dual Hotkey Managers
+        self.hk_transcribe = HotkeyManager(config_key="hotkey")
+        self.hk_transcribe.triggered.connect(lambda: self.toggle_recording(task_override="transcribe"))
+        self.hk_transcribe.start()
+        
+        self.hk_translate = HotkeyManager(config_key="hotkey_translate")
+        self.hk_translate.triggered.connect(lambda: self.toggle_recording(task_override="translate"))
+        self.hk_translate.start()
+        
        self.bridge.update_status("Ready")

    def run(self):
@@ -286,7 +299,8 @@ class WhisperApp(QObject):
            except: pass
            self.bridge.stats_worker.stop()
        
-        if self.hotkey_manager: self.hotkey_manager.stop()
+        if self.hk_transcribe: self.hk_transcribe.stop()
+        if self.hk_translate: self.hk_translate.stop()
        
        # Close all QML windows to ensure bindings stop before Python objects die
        if self.overlay_root: 
@@ -361,10 +375,14 @@ class WhisperApp(QObject):
        print(f"Setting Changed: {key} = {value}")
        
        # 1. Hotkey Reload
-        if key == "hotkey":
-            if self.hotkey_manager: self.hotkey_manager.reload_hotkey()
+        if key in ["hotkey", "hotkey_translate"]:
+            if self.hk_transcribe: self.hk_transcribe.reload_hotkey()
+            if self.hk_translate: self.hk_translate.reload_hotkey()
+            
            if self.tray:
-                self.tray.setToolTip(f"Whisper Voice - Press {value} to Record")
+                hk1 = self.format_hotkey(self.config.get("hotkey"))
+                hk2 = self.format_hotkey(self.config.get("hotkey_translate"))
+                self.tray.setToolTip(f"Whisper Voice\nTranscribe: {hk1}\nTranslate: {hk2}")

        # 2. AI Model Reload (Heavy)
        if key in ["model_size", "compute_device", "compute_type"]:
@@ -467,6 +485,8 @@ class WhisperApp(QObject):
        file_path, _ = QFileDialog.getOpenFileName(None, "Select Audio", "", "Audio (*.mp3 *.wav *.flac *.m4a *.ogg)")
        if file_path:
            self.bridge.update_status("Thinking...")
+            # Files use the default configured task usually, or we could ask? 
+            # Default to config setting for files.
            self.worker = TranscriptionWorker(self.transcriber, file_path, is_file=True, parent=self)
            self.worker.finished.connect(self.on_transcription_done)
            self.worker.start()
@@ -474,10 +494,13 @@ class WhisperApp(QObject):
    @Slot()
    def on_silence_detected(self):
        from PySide6.QtCore import QMetaObject, Qt
+        # Silence detection always triggers the task that was active? 
+        # Since silence stops recording, it just calls toggle_recording with no arg, using the stored current_task?
+        # Let's ensure toggle_recording handles no arg calls by stopping the CURRENT task.
        QMetaObject.invokeMethod(self, "toggle_recording", Qt.QueuedConnection)

-    @Slot()
-    def toggle_recording(self):
+    @Slot() # Modified to allow lambda override
+    def toggle_recording(self, task_override=None):
        if not self.audio_engine: return
        
        # Prevent starting a new recording while we are still transcribing the last one
@@ -485,23 +508,36 @@ class WhisperApp(QObject):
            logging.warning("Ignored toggle request: Transcription in progress.")
            return

+        # Determine which task we are entering
+        if task_override:
+            intended_task = task_override
+        else:
+            intended_task = self.config.get("task")
+
        if self.audio_engine.recording:
+            # STOP RECORDING
            self.bridge.update_status("Thinking...")
            self.bridge.isRecording = False
            self.bridge.isProcessing = True # Start Processing
            audio_data = self.audio_engine.stop_recording()
-            self.worker = TranscriptionWorker(self.transcriber, audio_data, parent=self)
+            
+            # Use the task that started this session, or the override if provided (though usually override is for starting)
+            final_task = getattr(self, "current_recording_task", self.config.get("task"))
+            
+            self.worker = TranscriptionWorker(self.transcriber, audio_data, parent=self, task_override=final_task)
            self.worker.finished.connect(self.on_transcription_done)
            self.worker.start()
        else:
-            self.bridge.update_status("Recording")
+            # START RECORDING
+            self.current_recording_task = intended_task
+            self.bridge.update_status(f"Recording ({intended_task})...") 
            self.bridge.isRecording = True
            self.audio_engine.start_recording()

    @Slot(bool)
    def on_ui_toggle_request(self, state):
        if state != self.audio_engine.recording:
-            self.toggle_recording()
+            self.toggle_recording() # Default behavior for UI clicks

    @Slot(str)
    def on_transcription_done(self, text: str):
@@ -514,8 +550,8 @@ class WhisperApp(QObject):

    @Slot(bool)
    def on_hotkeys_enabled_toggle(self, state):
-        if self.hotkey_manager:
-            self.hotkey_manager.set_enabled(state)
+        if self.hk_transcribe: self.hk_transcribe.set_enabled(state)
+        if self.hk_translate: self.hk_translate.set_enabled(state)

    @Slot(str)
    def on_download_requested(self, size):
@@ -16,6 +16,7 @@ from src.core.paths import get_base_path
 # Default Configuration
 DEFAULT_SETTINGS = {
    "hotkey": "f8",
+    "hotkey_translate": "f10",
    "model_size": "small",
    "input_device": None,       # Device ID (int) or Name (str), None = Default
    "save_recordings": False,   # Save .wav files for debugging
@@ -38,6 +39,7 @@ DEFAULT_SETTINGS = {
    
    # AI - Advanced
    "language": "auto",         # "auto" or ISO code
+    "task": "transcribe",       # "transcribe" or "translate" (to English)
    "compute_device": "auto",   # "auto", "cuda", "cpu"
    "compute_type": "int8",     # "int8", "float16", "float32"
    "beam_size": 5,
@@ -30,15 +30,16 @@ class HotkeyManager(QObject):
    
    triggered = Signal()

-    def __init__(self, hotkey: str = "f8"):
+    def __init__(self, config_key: str = "hotkey"):
        """
        Initialize the HotkeyManager.

        Args:
-            hotkey (str): The global hotkey string description. Default: "f8".
+            config_key (str): The configuration key to look up (e.g. "hotkey").
        """
        super().__init__()
-        self.hotkey = hotkey
+        self.config_key = config_key
+        self.hotkey = "f8" # Placeholder
        self.is_listening = False
        self._enabled = True

@@ -58,9 +59,9 @@ class HotkeyManager(QObject):
            
        from src.core.config import ConfigManager
        config = ConfigManager()
-        self.hotkey = config.get("hotkey")
+        self.hotkey = config.get(self.config_key)
        
-        logging.info(f"Registering global hotkey: {self.hotkey}")
+        logging.info(f"Registering global hotkey ({self.config_key}): {self.hotkey}")
        try:
            # We don't suppress=True here because we want the app to see keys during recording 
            # (Wait, actually if we are recording we WANT keyboard to see it, 
@@ -0,0 +1,120 @@
+"""
+Supported Languages Module
+==========================
+Full list of languages supported by OpenAI Whisper.
+Maps ISO codes to display names.
+"""
+
+LANGUAGES = {
+    "auto": "Auto Detect",
+    "af": "Afrikaans",
+    "sq": "Albanian",
+    "am": "Amharic",
+    "ar": "Arabic",
+    "hy": "Armenian",
+    "as": "Assamese",
+    "az": "Azerbaijani",
+    "ba": "Bashkir",
+    "eu": "Basque",
+    "be": "Belarusian",
+    "bn": "Bengali",
+    "bs": "Bosnian",
+    "br": "Breton",
+    "bg": "Bulgarian",
+    "my": "Burmese",
+    "ca": "Catalan",
+    "zh": "Chinese",
+    "hr": "Croatian",
+    "cs": "Czech",
+    "da": "Danish",
+    "nl": "Dutch",
+    "en": "English",
+    "et": "Estonian",
+    "fo": "Faroese",
+    "fi": "Finnish",
+    "fr": "French",
+    "gl": "Galician",
+    "ka": "Georgian",
+    "de": "German",
+    "el": "Greek",
+    "gu": "Gujarati",
+    "ht": "Haitian",
+    "ha": "Hausa",
+    "haw": "Hawaiian",
+    "he": "Hebrew",
+    "hi": "Hindi",
+    "hu": "Hungarian",
+    "is": "Icelandic",
+    "id": "Indonesian",
+    "it": "Italian",
+    "ja": "Japanese",
+    "jw": "Javanese",
+    "kn": "Kannada",
+    "kk": "Kazakh",
+    "km": "Khmer",
+    "ko": "Korean",
+    "lo": "Lao",
+    "la": "Latin",
+    "lv": "Latvian",
+    "ln": "Lingala",
+    "lt": "Lithuanian",
+    "lb": "Luxembourgish",
+    "mk": "Macedonian",
+    "mg": "Malagasy",
+    "ms": "Malay",
+    "ml": "Malayalam",
+    "mt": "Maltese",
+    "mi": "Maori",
+    "mr": "Marathi",
+    "mn": "Mongolian",
+    "ne": "Nepali",
+    "no": "Norwegian",
+    "oc": "Occitan",
+    "pa": "Punjabi",
+    "ps": "Pashto",
+    "fa": "Persian",
+    "pl": "Polish",
+    "pt": "Portuguese",
+    "ro": "Romanian",
+    "ru": "Russian",
+    "sa": "Sanskrit",
+    "sr": "Serbian",
+    "sn": "Shona",
+    "sd": "Sindhi",
+    "si": "Sinhala",
+    "sk": "Slovak",
+    "sl": "Slovenian",
+    "so": "Somali",
+    "es": "Spanish",
+    "su": "Sundanese",
+    "sw": "Swahili",
+    "sv": "Swedish",
+    "tl": "Tagalog",
+    "tg": "Tajik",
+    "ta": "Tamil",
+    "tt": "Tatar",
+    "te": "Telugu",
+    "th": "Thai",
+    "bo": "Tibetan",
+    "tr": "Turkish",
+    "tk": "Turkmen",
+    "uk": "Ukrainian",
+    "ur": "Urdu",
+    "uz": "Uzbek",
+    "vi": "Vietnamese",
+    "cy": "Welsh",
+    "yi": "Yiddish",
+    "yo": "Yoruba",
+}
+
+def get_language_names():
+    return list(LANGUAGES.values())
+
+def get_code_by_name(name):
+    for code, lang in LANGUAGES.items():
+        if lang == name:
+            return code
+    return "auto"
+
+def get_name_by_code(code):
+    return LANGUAGES.get(code, "Auto Detect")
@@ -74,11 +74,11 @@ class WhisperTranscriber:
            logging.error(f"Failed to load model: {e}")
            self.model = None

-    def transcribe(self, audio_data, is_file: bool = False) -> str:
+    def transcribe(self, audio_data, is_file: bool = False, task: Optional[str] = None) -> str:
        """
        Transcribe audio data.
        """
-        logging.info(f"Starting transcription... (is_file={is_file})")
+        logging.info(f"Starting transcription... (is_file={is_file}, task={task})")
        
        # Ensure model is loaded
        if not self.model:
@@ -91,6 +91,10 @@ class WhisperTranscriber:
            beam_size = int(self.config.get("beam_size"))
            best_of = int(self.config.get("best_of"))
            vad = False if is_file else self.config.get("vad_filter")
+            language = self.config.get("language")
+            
+            # Use task override if provided, otherwise config
+            final_task = task if task else self.config.get("task")
            
            # Transcribe
            segments, info = self.model.transcribe(
@@ -98,6 +102,8 @@ class WhisperTranscriber:
                beam_size=beam_size,
                best_of=best_of,
                vad_filter=vad,
+                task=final_task,
+                language=language if language != "auto" else None,
                vad_parameters=dict(min_silence_duration_ms=500),
                condition_on_previous_text=self.config.get("condition_on_previous_text"),
                without_timestamps=True
@@ -245,6 +245,26 @@ class UIBridge(QObject):

    # --- Methods called from QML ---
    
+    @Slot(result=list)
+    def get_supported_languages(self):
+        from src.core.languages import get_language_names
+        return get_language_names()
+
+    @Slot(str)
+    def set_language_by_name(self, name):
+        from src.core.languages import get_code_by_name
+        from src.core.config import ConfigManager
+        code = get_code_by_name(name)
+        ConfigManager().set("language", code)
+        self.settingChanged.emit("language", code)
+        
+    @Slot(result=str)
+    def get_current_language_name(self):
+        from src.core.languages import get_name_by_code
+        from src.core.config import ConfigManager
+        code = ConfigManager().get("language")
+        return get_name_by_code(code)
+
    @Slot(str, result='QVariant')
    def getSetting(self, key):
        from src.core.config import ConfigManager
@@ -100,7 +100,7 @@ ComboBox {
    popup: Popup {
        y: control.height - 1
        width: control.width
-        implicitHeight: contentItem.implicitHeight
+        implicitHeight: Math.min(contentItem.implicitHeight, 300)
        padding: 5

        contentItem: ListView {
@@ -25,7 +25,7 @@ Rectangle {
    
    Text {
        anchors.centerIn: parent
-        text: control.recording ? "Listening..." : (control.currentSequence || "None")
+        text: control.recording ? "Listening..." : (formatSequence(control.currentSequence) || "None")
        color: control.recording ? SettingsStyle.accent : (control.currentSequence ? "#ffffff" : "#808080")
        font.family: "JetBrains Mono"
        font.pixelSize: 13
@@ -72,6 +72,23 @@ Rectangle {
        if (!activeFocus) control.recording = false
    }

+    function formatSequence(seq) {
+        if (!seq) return ""
+        var parts = seq.split("+")
+        for (var i = 0; i < parts.length; i++) {
+            var p = parts[i]
+            // Standardize modifiers
+            if (p === "ctrl") parts[i] = "Ctrl"
+            else if (p === "alt") parts[i] = "Alt"
+            else if (p === "shift") parts[i] = "Shift"
+            else if (p === "win") parts[i] = "Win"
+            else if (p === "esc") parts[i] = "Esc"
+            // Capitalize F-keys and others (e.g. f8 -> F8, space -> Space)
+            else parts[i] = p.charAt(0).toUpperCase() + p.slice(1)
+        }
+        return parts.join(" + ")
+    }
+
    function getKeyName(key, text) {
        // F-Keys
        if (key >= Qt.Key_F1 && key <= Qt.Key_F35) return "f" + (key - Qt.Key_F1 + 1)
@@ -314,15 +314,25 @@ Window {
                                    spacing: 0
                                    
                                    ModernSettingsItem {
-                                        label: "Global Hotkey"
-                                        description: "Press to record a new shortcut (e.g. Ctrl+Space)"
+                                        label: "Global Hotkey (Transcribe)"
+                                        description: "Press to record a new shortcut (e.g. F9)"
                                        control: ModernKeySequenceRecorder {
-                                            Layout.preferredWidth: 200
+                                            implicitWidth: 240
                                            currentSequence: ui.getSetting("hotkey")
                                            onSequenceChanged: (seq) => ui.setSetting("hotkey", seq)
                                        }
                                    }

+                                    ModernSettingsItem {
+                                        label: "Global Hotkey (Translate)"
+                                        description: "Press to record a new shortcut (e.g. F10)"
+                                        control: ModernKeySequenceRecorder {
+                                            implicitWidth: 240
+                                            currentSequence: ui.getSetting("hotkey_translate")
+                                            onSequenceChanged: (seq) => ui.setSetting("hotkey_translate", seq)
+                                        }
+                                    }
+                                    
                                    ModernSettingsItem {
                                        label: "Run on Startup"
                                        description: "Automatically launch when you log in"
@@ -742,15 +752,17 @@ Window {
                                    
                                    ModernSettingsItem {
                                        label: "Language"
-                                        description: "Force language or Auto-detect"
+                                        description: "Spoken language to transcribe"
                                        control: ModernComboBox {
-                                            width: 140
-                                            model: ["auto", "en", "fr", "de", "es", "it", "ja", "zh", "ru"]
-                                            currentIndex: model.indexOf(ui.getSetting("language"))
-                                            onActivated: ui.setSetting("language", currentText)
+                                            Layout.preferredWidth: 200
+                                            model: ui.get_supported_languages()
+                                            currentIndex: model.indexOf(ui.get_current_language_name())
+                                            onActivated: (index) => ui.set_language_by_name(currentText)
                                        }
                                    }
                            
+                                    // Task selector removed as per user request (Hotkeys handle this now)
+                            
                                    ModernSettingsItem {
                                        label: "Compute Device"
                                        description: "Hardware acceleration (CUDA requires NVidia GPU)"
@@ -0,0 +1,32 @@
+"""
+Formatter Utilities
+===================
+Helper functions for text formatting.
+"""
+
+def format_hotkey(sequence: str) -> str:
+    """
+    Formats a hotkey sequence string (e.g. 'ctrl+alt+f9') 
+    into a pretty readable string (e.g. 'Ctrl + Alt + F9').
+    """
+    if not sequence:
+        return "None"
+    
+    parts = sequence.split('+')
+    formatted_parts = []
+    
+    for p in parts:
+        p = p.strip().lower()
+        if p == 'ctrl': formatted_parts.append('Ctrl')
+        elif p == 'alt': formatted_parts.append('Alt')
+        elif p == 'shift': formatted_parts.append('Shift')
+        elif p == 'win': formatted_parts.append('Win')
+        elif p == 'esc': formatted_parts.append('Esc')
+        else:
+            # Capitalize first letter
+            if len(p) > 0:
+                formatted_parts.append(p[0].upper() + p[1:])
+            else:
+                formatted_parts.append(p)
+                
+    return " + ".join(formatted_parts)