diff --git a/bootstrapper.py b/bootstrapper.py index e619d6f..5e8d0fc 100644 --- a/bootstrapper.py +++ b/bootstrapper.py @@ -259,48 +259,72 @@ class Bootstrapper: process.wait() def refresh_app_source(self): - """Refresh app source files. Skips if already exists to save time.""" - # Optimization: If app/main.py exists, skip update to improve startup speed. - # The user can delete the 'runtime' folder to force an update. - if (self.app_path / "main.py").exists(): - log("App already exists. Skipping update.") - return True - - if self.ui: self.ui.set_status("Updating app files...") + """ + Smartly updates app source files by only copying changed files. + Preserves user settings and reduces disk I/O. + """ + if self.ui: self.ui.set_status("Checking for updates...") try: - # Preserve settings.json if it exists - settings_path = self.app_path / "settings.json" - temp_settings = None - if settings_path.exists(): - try: - temp_settings = settings_path.read_bytes() - except: - log("Failed to backup settings.json, it involves risk of data loss.") - - if self.app_path.exists(): - shutil.rmtree(self.app_path, ignore_errors=True) + # 1. Ensure destination exists + if not self.app_path.exists(): + self.app_path.mkdir(parents=True, exist_ok=True) - shutil.copytree( - self.source_path, - self.app_path, - ignore=shutil.ignore_patterns( - '__pycache__', '*.pyc', '.git', 'venv', - 'build', 'dist', '*.egg-info', 'runtime' - ) - ) - - # Restore settings.json - if temp_settings: - try: - settings_path.write_bytes(temp_settings) - log("Restored settings.json") - except: - log("Failed to restore settings.json") + # 2. Walk source and sync + # source_path is the temporary bundled folder + # app_path is the persistent runtime folder + + changes_made = 0 + + for src_dir, dirs, files in os.walk(self.source_path): + # Determine relative path from source root + rel_path = Path(src_dir).relative_to(self.source_path) + dst_dir = self.app_path / rel_path + + # Ensure directory exists + if not dst_dir.exists(): + dst_dir.mkdir(parents=True, exist_ok=True) + + for file in files: + # Skip ignored files + if file in ['__pycache__', '.git', 'settings.json'] or file.endswith('.pyc'): + continue + + src_file = Path(src_dir) / file + dst_file = dst_dir / file + # Check if update needed + should_copy = False + if not dst_file.exists(): + should_copy = True + else: + # Compare size first (fast) + if src_file.stat().st_size != dst_file.stat().st_size: + should_copy = True + else: + # Compare content (slower but accurate) + # Only read if size matches to verify diff + if src_file.read_bytes() != dst_file.read_bytes(): + should_copy = True + + if should_copy: + shutil.copy2(src_file, dst_file) + changes_made += 1 + if self.ui: self.ui.set_detail(f"Updated: {file}") + + # 3. Cleanup logic (Optional: remove files in dest that are not in source) + # For now, we only add/update to prevent deleting generated user files (logs, etc) + + if changes_made > 0: + log(f"Update complete. {changes_made} files changed.") + else: + log("App is up to date.") + return True except Exception as e: log(f"Error refreshing app source: {e}") + # Fallback to nuclear option if sync fails completely? + # No, 'smart_sync' failing might mean permissions, nuclear wouldn't help. return False def run_app(self): diff --git a/main.py b/main.py index be5d8b9..bc884f8 100644 --- a/main.py +++ b/main.py @@ -118,13 +118,14 @@ class DownloadWorker(QThread): class TranscriptionWorker(QThread): finished = Signal(str) - def __init__(self, transcriber, audio_data, is_file=False, parent=None): + def __init__(self, transcriber, audio_data, is_file=False, parent=None, task_override=None): super().__init__(parent) self.transcriber = transcriber self.audio_data = audio_data self.is_file = is_file + self.task_override = task_override def run(self): - text = self.transcriber.transcribe(self.audio_data, is_file=self.is_file) + text = self.transcriber.transcribe(self.audio_data, is_file=self.is_file, task=self.task_override) self.finished.emit(text) class WhisperApp(QObject): @@ -166,13 +167,18 @@ class WhisperApp(QObject): self.tray.transcribe_file_requested.connect(self.transcribe_file) # Init Tooltip - hotkey = self.config.get("hotkey") - self.tray.setToolTip(f"Whisper Voice - Press {hotkey} to Record") + from src.utils.formatters import format_hotkey + self.format_hotkey = format_hotkey # Store ref + + hk1 = self.format_hotkey(self.config.get("hotkey")) + hk2 = self.format_hotkey(self.config.get("hotkey_translate")) + self.tray.setToolTip(f"Whisper Voice\nTranscribe: {hk1}\nTranslate: {hk2}") # 3. Logic Components Placeholders self.audio_engine = None self.transcriber = None - self.hotkey_manager = None + self.hk_transcribe = None + self.hk_translate = None self.overlay_root = None # 4. Start Loader @@ -266,9 +272,16 @@ class WhisperApp(QObject): self.audio_engine.set_visualizer_callback(self.bridge.update_amplitude) self.audio_engine.set_silence_callback(self.on_silence_detected) self.transcriber = WhisperTranscriber() - self.hotkey_manager = HotkeyManager() - self.hotkey_manager.triggered.connect(self.toggle_recording) - self.hotkey_manager.start() + + # Dual Hotkey Managers + self.hk_transcribe = HotkeyManager(config_key="hotkey") + self.hk_transcribe.triggered.connect(lambda: self.toggle_recording(task_override="transcribe")) + self.hk_transcribe.start() + + self.hk_translate = HotkeyManager(config_key="hotkey_translate") + self.hk_translate.triggered.connect(lambda: self.toggle_recording(task_override="translate")) + self.hk_translate.start() + self.bridge.update_status("Ready") def run(self): @@ -286,7 +299,8 @@ class WhisperApp(QObject): except: pass self.bridge.stats_worker.stop() - if self.hotkey_manager: self.hotkey_manager.stop() + if self.hk_transcribe: self.hk_transcribe.stop() + if self.hk_translate: self.hk_translate.stop() # Close all QML windows to ensure bindings stop before Python objects die if self.overlay_root: @@ -361,10 +375,14 @@ class WhisperApp(QObject): print(f"Setting Changed: {key} = {value}") # 1. Hotkey Reload - if key == "hotkey": - if self.hotkey_manager: self.hotkey_manager.reload_hotkey() + if key in ["hotkey", "hotkey_translate"]: + if self.hk_transcribe: self.hk_transcribe.reload_hotkey() + if self.hk_translate: self.hk_translate.reload_hotkey() + if self.tray: - self.tray.setToolTip(f"Whisper Voice - Press {value} to Record") + hk1 = self.format_hotkey(self.config.get("hotkey")) + hk2 = self.format_hotkey(self.config.get("hotkey_translate")) + self.tray.setToolTip(f"Whisper Voice\nTranscribe: {hk1}\nTranslate: {hk2}") # 2. AI Model Reload (Heavy) if key in ["model_size", "compute_device", "compute_type"]: @@ -467,6 +485,8 @@ class WhisperApp(QObject): file_path, _ = QFileDialog.getOpenFileName(None, "Select Audio", "", "Audio (*.mp3 *.wav *.flac *.m4a *.ogg)") if file_path: self.bridge.update_status("Thinking...") + # Files use the default configured task usually, or we could ask? + # Default to config setting for files. self.worker = TranscriptionWorker(self.transcriber, file_path, is_file=True, parent=self) self.worker.finished.connect(self.on_transcription_done) self.worker.start() @@ -474,10 +494,13 @@ class WhisperApp(QObject): @Slot() def on_silence_detected(self): from PySide6.QtCore import QMetaObject, Qt + # Silence detection always triggers the task that was active? + # Since silence stops recording, it just calls toggle_recording with no arg, using the stored current_task? + # Let's ensure toggle_recording handles no arg calls by stopping the CURRENT task. QMetaObject.invokeMethod(self, "toggle_recording", Qt.QueuedConnection) - @Slot() - def toggle_recording(self): + @Slot() # Modified to allow lambda override + def toggle_recording(self, task_override=None): if not self.audio_engine: return # Prevent starting a new recording while we are still transcribing the last one @@ -485,23 +508,36 @@ class WhisperApp(QObject): logging.warning("Ignored toggle request: Transcription in progress.") return + # Determine which task we are entering + if task_override: + intended_task = task_override + else: + intended_task = self.config.get("task") + if self.audio_engine.recording: + # STOP RECORDING self.bridge.update_status("Thinking...") self.bridge.isRecording = False self.bridge.isProcessing = True # Start Processing audio_data = self.audio_engine.stop_recording() - self.worker = TranscriptionWorker(self.transcriber, audio_data, parent=self) + + # Use the task that started this session, or the override if provided (though usually override is for starting) + final_task = getattr(self, "current_recording_task", self.config.get("task")) + + self.worker = TranscriptionWorker(self.transcriber, audio_data, parent=self, task_override=final_task) self.worker.finished.connect(self.on_transcription_done) self.worker.start() else: - self.bridge.update_status("Recording") + # START RECORDING + self.current_recording_task = intended_task + self.bridge.update_status(f"Recording ({intended_task})...") self.bridge.isRecording = True self.audio_engine.start_recording() @Slot(bool) def on_ui_toggle_request(self, state): if state != self.audio_engine.recording: - self.toggle_recording() + self.toggle_recording() # Default behavior for UI clicks @Slot(str) def on_transcription_done(self, text: str): @@ -514,8 +550,8 @@ class WhisperApp(QObject): @Slot(bool) def on_hotkeys_enabled_toggle(self, state): - if self.hotkey_manager: - self.hotkey_manager.set_enabled(state) + if self.hk_transcribe: self.hk_transcribe.set_enabled(state) + if self.hk_translate: self.hk_translate.set_enabled(state) @Slot(str) def on_download_requested(self, size): diff --git a/src/core/config.py b/src/core/config.py index 023a3d0..6ea3138 100644 --- a/src/core/config.py +++ b/src/core/config.py @@ -16,6 +16,7 @@ from src.core.paths import get_base_path # Default Configuration DEFAULT_SETTINGS = { "hotkey": "f8", + "hotkey_translate": "f10", "model_size": "small", "input_device": None, # Device ID (int) or Name (str), None = Default "save_recordings": False, # Save .wav files for debugging @@ -38,6 +39,7 @@ DEFAULT_SETTINGS = { # AI - Advanced "language": "auto", # "auto" or ISO code + "task": "transcribe", # "transcribe" or "translate" (to English) "compute_device": "auto", # "auto", "cuda", "cpu" "compute_type": "int8", # "int8", "float16", "float32" "beam_size": 5, diff --git a/src/core/hotkey_manager.py b/src/core/hotkey_manager.py index 6a8f3fb..7a2eded 100644 --- a/src/core/hotkey_manager.py +++ b/src/core/hotkey_manager.py @@ -30,15 +30,16 @@ class HotkeyManager(QObject): triggered = Signal() - def __init__(self, hotkey: str = "f8"): + def __init__(self, config_key: str = "hotkey"): """ Initialize the HotkeyManager. Args: - hotkey (str): The global hotkey string description. Default: "f8". + config_key (str): The configuration key to look up (e.g. "hotkey"). """ super().__init__() - self.hotkey = hotkey + self.config_key = config_key + self.hotkey = "f8" # Placeholder self.is_listening = False self._enabled = True @@ -58,9 +59,9 @@ class HotkeyManager(QObject): from src.core.config import ConfigManager config = ConfigManager() - self.hotkey = config.get("hotkey") + self.hotkey = config.get(self.config_key) - logging.info(f"Registering global hotkey: {self.hotkey}") + logging.info(f"Registering global hotkey ({self.config_key}): {self.hotkey}") try: # We don't suppress=True here because we want the app to see keys during recording # (Wait, actually if we are recording we WANT keyboard to see it, diff --git a/src/core/languages.py b/src/core/languages.py new file mode 100644 index 0000000..6c18792 --- /dev/null +++ b/src/core/languages.py @@ -0,0 +1,120 @@ +""" +Supported Languages Module +========================== +Full list of languages supported by OpenAI Whisper. +Maps ISO codes to display names. +""" + +LANGUAGES = { + "auto": "Auto Detect", + "af": "Afrikaans", + "sq": "Albanian", + "am": "Amharic", + "ar": "Arabic", + "hy": "Armenian", + "as": "Assamese", + "az": "Azerbaijani", + "ba": "Bashkir", + "eu": "Basque", + "be": "Belarusian", + "bn": "Bengali", + "bs": "Bosnian", + "br": "Breton", + "bg": "Bulgarian", + "my": "Burmese", + "ca": "Catalan", + "zh": "Chinese", + "hr": "Croatian", + "cs": "Czech", + "da": "Danish", + "nl": "Dutch", + "en": "English", + "et": "Estonian", + "fo": "Faroese", + "fi": "Finnish", + "fr": "French", + "gl": "Galician", + "ka": "Georgian", + "de": "German", + "el": "Greek", + "gu": "Gujarati", + "ht": "Haitian", + "ha": "Hausa", + "haw": "Hawaiian", + "he": "Hebrew", + "hi": "Hindi", + "hu": "Hungarian", + "is": "Icelandic", + "id": "Indonesian", + "it": "Italian", + "ja": "Japanese", + "jw": "Javanese", + "kn": "Kannada", + "kk": "Kazakh", + "km": "Khmer", + "ko": "Korean", + "lo": "Lao", + "la": "Latin", + "lv": "Latvian", + "ln": "Lingala", + "lt": "Lithuanian", + "lb": "Luxembourgish", + "mk": "Macedonian", + "mg": "Malagasy", + "ms": "Malay", + "ml": "Malayalam", + "mt": "Maltese", + "mi": "Maori", + "mr": "Marathi", + "mn": "Mongolian", + "ne": "Nepali", + "no": "Norwegian", + "oc": "Occitan", + "pa": "Punjabi", + "ps": "Pashto", + "fa": "Persian", + "pl": "Polish", + "pt": "Portuguese", + "ro": "Romanian", + "ru": "Russian", + "sa": "Sanskrit", + "sr": "Serbian", + "sn": "Shona", + "sd": "Sindhi", + "si": "Sinhala", + "sk": "Slovak", + "sl": "Slovenian", + "so": "Somali", + "es": "Spanish", + "su": "Sundanese", + "sw": "Swahili", + "sv": "Swedish", + "tl": "Tagalog", + "tg": "Tajik", + "ta": "Tamil", + "tt": "Tatar", + "te": "Telugu", + "th": "Thai", + "bo": "Tibetan", + "tr": "Turkish", + "tk": "Turkmen", + "uk": "Ukrainian", + "ur": "Urdu", + "uz": "Uzbek", + "vi": "Vietnamese", + "cy": "Welsh", + "yi": "Yiddish", + "yo": "Yoruba", +} + +def get_language_names(): + return list(LANGUAGES.values()) + +def get_code_by_name(name): + for code, lang in LANGUAGES.items(): + if lang == name: + return code + return "auto" + +def get_name_by_code(code): + return LANGUAGES.get(code, "Auto Detect") diff --git a/src/core/transcriber.py b/src/core/transcriber.py index 08d8060..42b958e 100644 --- a/src/core/transcriber.py +++ b/src/core/transcriber.py @@ -74,11 +74,11 @@ class WhisperTranscriber: logging.error(f"Failed to load model: {e}") self.model = None - def transcribe(self, audio_data, is_file: bool = False) -> str: + def transcribe(self, audio_data, is_file: bool = False, task: Optional[str] = None) -> str: """ Transcribe audio data. """ - logging.info(f"Starting transcription... (is_file={is_file})") + logging.info(f"Starting transcription... (is_file={is_file}, task={task})") # Ensure model is loaded if not self.model: @@ -91,6 +91,10 @@ class WhisperTranscriber: beam_size = int(self.config.get("beam_size")) best_of = int(self.config.get("best_of")) vad = False if is_file else self.config.get("vad_filter") + language = self.config.get("language") + + # Use task override if provided, otherwise config + final_task = task if task else self.config.get("task") # Transcribe segments, info = self.model.transcribe( @@ -98,6 +102,8 @@ class WhisperTranscriber: beam_size=beam_size, best_of=best_of, vad_filter=vad, + task=final_task, + language=language if language != "auto" else None, vad_parameters=dict(min_silence_duration_ms=500), condition_on_previous_text=self.config.get("condition_on_previous_text"), without_timestamps=True diff --git a/src/ui/bridge.py b/src/ui/bridge.py index 13a1da2..7029189 100644 --- a/src/ui/bridge.py +++ b/src/ui/bridge.py @@ -245,6 +245,26 @@ class UIBridge(QObject): # --- Methods called from QML --- + @Slot(result=list) + def get_supported_languages(self): + from src.core.languages import get_language_names + return get_language_names() + + @Slot(str) + def set_language_by_name(self, name): + from src.core.languages import get_code_by_name + from src.core.config import ConfigManager + code = get_code_by_name(name) + ConfigManager().set("language", code) + self.settingChanged.emit("language", code) + + @Slot(result=str) + def get_current_language_name(self): + from src.core.languages import get_name_by_code + from src.core.config import ConfigManager + code = ConfigManager().get("language") + return get_name_by_code(code) + @Slot(str, result='QVariant') def getSetting(self, key): from src.core.config import ConfigManager diff --git a/src/ui/qml/ModernComboBox.qml b/src/ui/qml/ModernComboBox.qml index 0bb777d..dd906bc 100644 --- a/src/ui/qml/ModernComboBox.qml +++ b/src/ui/qml/ModernComboBox.qml @@ -100,7 +100,7 @@ ComboBox { popup: Popup { y: control.height - 1 width: control.width - implicitHeight: contentItem.implicitHeight + implicitHeight: Math.min(contentItem.implicitHeight, 300) padding: 5 contentItem: ListView { diff --git a/src/ui/qml/ModernKeySequenceRecorder.qml b/src/ui/qml/ModernKeySequenceRecorder.qml index e8c3cfe..a722a9e 100644 --- a/src/ui/qml/ModernKeySequenceRecorder.qml +++ b/src/ui/qml/ModernKeySequenceRecorder.qml @@ -25,7 +25,7 @@ Rectangle { Text { anchors.centerIn: parent - text: control.recording ? "Listening..." : (control.currentSequence || "None") + text: control.recording ? "Listening..." : (formatSequence(control.currentSequence) || "None") color: control.recording ? SettingsStyle.accent : (control.currentSequence ? "#ffffff" : "#808080") font.family: "JetBrains Mono" font.pixelSize: 13 @@ -72,6 +72,23 @@ Rectangle { if (!activeFocus) control.recording = false } + function formatSequence(seq) { + if (!seq) return "" + var parts = seq.split("+") + for (var i = 0; i < parts.length; i++) { + var p = parts[i] + // Standardize modifiers + if (p === "ctrl") parts[i] = "Ctrl" + else if (p === "alt") parts[i] = "Alt" + else if (p === "shift") parts[i] = "Shift" + else if (p === "win") parts[i] = "Win" + else if (p === "esc") parts[i] = "Esc" + // Capitalize F-keys and others (e.g. f8 -> F8, space -> Space) + else parts[i] = p.charAt(0).toUpperCase() + p.slice(1) + } + return parts.join(" + ") + } + function getKeyName(key, text) { // F-Keys if (key >= Qt.Key_F1 && key <= Qt.Key_F35) return "f" + (key - Qt.Key_F1 + 1) diff --git a/src/ui/qml/Settings.qml b/src/ui/qml/Settings.qml index b5b5523..03095fb 100644 --- a/src/ui/qml/Settings.qml +++ b/src/ui/qml/Settings.qml @@ -314,14 +314,24 @@ Window { spacing: 0 ModernSettingsItem { - label: "Global Hotkey" - description: "Press to record a new shortcut (e.g. Ctrl+Space)" + label: "Global Hotkey (Transcribe)" + description: "Press to record a new shortcut (e.g. F9)" control: ModernKeySequenceRecorder { - Layout.preferredWidth: 200 + implicitWidth: 240 currentSequence: ui.getSetting("hotkey") onSequenceChanged: (seq) => ui.setSetting("hotkey", seq) } } + + ModernSettingsItem { + label: "Global Hotkey (Translate)" + description: "Press to record a new shortcut (e.g. F10)" + control: ModernKeySequenceRecorder { + implicitWidth: 240 + currentSequence: ui.getSetting("hotkey_translate") + onSequenceChanged: (seq) => ui.setSetting("hotkey_translate", seq) + } + } ModernSettingsItem { label: "Run on Startup" @@ -742,15 +752,17 @@ Window { ModernSettingsItem { label: "Language" - description: "Force language or Auto-detect" + description: "Spoken language to transcribe" control: ModernComboBox { - width: 140 - model: ["auto", "en", "fr", "de", "es", "it", "ja", "zh", "ru"] - currentIndex: model.indexOf(ui.getSetting("language")) - onActivated: ui.setSetting("language", currentText) + Layout.preferredWidth: 200 + model: ui.get_supported_languages() + currentIndex: model.indexOf(ui.get_current_language_name()) + onActivated: (index) => ui.set_language_by_name(currentText) } } - + + // Task selector removed as per user request (Hotkeys handle this now) + ModernSettingsItem { label: "Compute Device" description: "Hardware acceleration (CUDA requires NVidia GPU)" diff --git a/src/utils/formatters.py b/src/utils/formatters.py new file mode 100644 index 0000000..98b60ed --- /dev/null +++ b/src/utils/formatters.py @@ -0,0 +1,32 @@ +""" +Formatter Utilities +=================== +Helper functions for text formatting. +""" + +def format_hotkey(sequence: str) -> str: + """ + Formats a hotkey sequence string (e.g. 'ctrl+alt+f9') + into a pretty readable string (e.g. 'Ctrl + Alt + F9'). + """ + if not sequence: + return "None" + + parts = sequence.split('+') + formatted_parts = [] + + for p in parts: + p = p.strip().lower() + if p == 'ctrl': formatted_parts.append('Ctrl') + elif p == 'alt': formatted_parts.append('Alt') + elif p == 'shift': formatted_parts.append('Shift') + elif p == 'win': formatted_parts.append('Win') + elif p == 'esc': formatted_parts.append('Esc') + else: + # Capitalize first letter + if len(p) > 0: + formatted_parts.append(p[0].upper() + p[1:]) + else: + formatted_parts.append(p) + + return " + ".join(formatted_parts)