import sys import threading import logging import os # Add the application directory to sys.path to ensure 'src' is findable # This is critical for the embedded Python environment in the portable build app_dir = os.path.dirname(os.path.abspath(__file__)) if app_dir not in sys.path: sys.path.insert(0, app_dir) from PySide6.QtWidgets import QApplication, QFileDialog, QMessageBox from PySide6.QtCore import QObject, Slot, Signal, QThread, Qt, QUrl from PySide6.QtQml import QQmlApplicationEngine from PySide6.QtQuickControls2 import QQuickStyle from PySide6.QtGui import QIcon from src.ui.bridge import UIBridge from src.ui.tray import SystemTray from src.core.audio_engine import AudioEngine from src.core.transcriber import WhisperTranscriber from src.core.hotkey_manager import HotkeyManager from src.core.config import ConfigManager from src.utils.injector import InputInjector from src.core.paths import get_models_path, get_bundle_path from src.utils.window_hook import WindowHook from PySide6.QtGui import QSurfaceFormat # Configure GPU Surface for Alpha/Transparency (Critical for Blur) surface_fmt = QSurfaceFormat() surface_fmt.setAlphaBufferSize(8) QSurfaceFormat.setDefaultFormat(surface_fmt) # Configure High DPI behavior for crisp UI os.environ["QT_ENABLE_HIGHDPI_SCALING"] = "1" os.environ["QT_AUTOSCREENSCALEFACTOR"] = "1" # Detect resolution without creating QApplication (Fixes crash) try: import ctypes user32 = ctypes.windll.user32 # Get physical screen width (unscaled) # SetProcessDPIAware is needed to get the true resolution user32.SetProcessDPIAware() width = user32.GetSystemMetrics(0) # Base scale centers around 1920 width. # At 3840 (4k), res_scale is 2.0. If we want it ~40% smaller, we multiply by 0.6 = 1.2 res_scale = (width / 1920) if width >= 3840: res_scale *= 0.6 # Make it significantly smaller at 4k as requested os.environ["QT_SCALE_FACTOR"] = str(max(1.0, res_scale)) except: pass # Configure Logging class QmlLoggingHandler(logging.Handler, QObject): sig_log = Signal(str) def __init__(self, bridge): logging.Handler.__init__(self) QObject.__init__(self) self.bridge = bridge self.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) self.sig_log.connect(self.bridge.append_log) def emit(self, record): msg = self.format(record) self.sig_log.emit(msg) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Silence shutdown-related tracebacks from Qt/PySide6 signals def _silent_shutdown_hook(exc_type, exc_value, exc_tb): # During Python shutdown, some QObject signals may try to call dead slots. # Ignore these specific tracebacks when they occur in bridge.py. import traceback if exc_type in (RuntimeError, SystemError, KeyboardInterrupt): return # Suppress completely tb_str = ''.join(traceback.format_exception(exc_type, exc_value, exc_tb)) if 'bridge.py' in tb_str and '@Slot' in tb_str: return # Suppress bridge signal tracebacks # For all other exceptions, print normally sys.__excepthook__(exc_type, exc_value, exc_tb) sys.excepthook = _silent_shutdown_hook class DownloadWorker(QThread): """Background worker for model downloads.""" progress = Signal(int) finished = Signal() error = Signal(str) def __init__(self, model_name="small", parent=None): super().__init__(parent) self.model_name = model_name def run(self): try: from faster_whisper import download_model model_path = get_models_path() # Download to a specific subdirectory to keep things clean and predictable # This matches the logic in transcriber.py which looks for this specific path dest_dir = model_path / f"faster-whisper-{self.model_name}" logging.info(f"Downloading Model '{self.model_name}' to {dest_dir}...") # Ensure parent exists model_path.mkdir(parents=True, exist_ok=True) # output_dir in download_model specifies where the model files are saved download_model(self.model_name, output_dir=str(dest_dir)) self.finished.emit() except Exception as e: logging.error(f"Download failed: {e}") self.error.emit(str(e)) class TranscriptionWorker(QThread): finished = Signal(str) def __init__(self, transcriber, audio_data, is_file=False, parent=None, task_override=None): super().__init__(parent) self.transcriber = transcriber self.audio_data = audio_data self.is_file = is_file self.task_override = task_override def run(self): text = self.transcriber.transcribe(self.audio_data, is_file=self.is_file, task=self.task_override) self.finished.emit(text) class WhisperApp(QObject): def __init__(self): super().__init__() # Force a style that supports full customization QQuickStyle.setStyle("Basic") self.qt_app = QApplication(sys.argv) self.qt_app.setQuitOnLastWindowClosed(False) # Set application-wide window icon (shows in taskbar for all windows) icon_path = get_bundle_path() / "assets" / "icon.ico" if icon_path.exists(): self.qt_app.setWindowIcon(QIcon(str(icon_path))) self.config = ConfigManager() # 1. Initialize QML Engine & Bridge self.engine = QQmlApplicationEngine() self.bridge = UIBridge() # 0. Attach Logging Handler logging.getLogger().addHandler(QmlLoggingHandler(self.bridge)) # Connect toggle recording signal self.bridge.toggleRecordingRequested.connect(self.toggle_recording) self.bridge.isRecordingChanged.connect(self.on_ui_toggle_request) self.bridge.settingChanged.connect(self.on_settings_changed) self.bridge.hotkeysEnabledChanged.connect(self.on_hotkeys_enabled_toggle) self.bridge.downloadRequested.connect(self.on_download_requested) self.engine.rootContext().setContextProperty("ui", self.bridge) # 2. Tray setup self.tray = SystemTray() self.tray.quit_requested.connect(self.quit_app) self.tray.settings_requested.connect(self.open_settings) self.tray.transcribe_file_requested.connect(self.transcribe_file) # Init Tooltip from src.utils.formatters import format_hotkey self.format_hotkey = format_hotkey # Store ref hk1 = self.format_hotkey(self.config.get("hotkey")) hk2 = self.format_hotkey(self.config.get("hotkey_translate")) self.tray.setToolTip(f"Whisper Voice\nTranscribe: {hk1}\nTranslate: {hk2}") # 3. Logic Components Placeholders self.audio_engine = None self.transcriber = None self.hk_transcribe = None self.hk_translate = None self.overlay_root = None # 4. Start Loader loader_qml = get_bundle_path() / "src/ui/qml/Loader.qml" self.engine.load(QUrl.fromLocalFile(str(loader_qml))) self.loader_root = self.engine.rootObjects()[0] self.loader_root.setProperty("color", "transparent") self.loader_worker = DownloadWorker() self.loader_worker.progress.connect(self.on_loader_progress) self.loader_worker.finished.connect(self.on_loader_done) self.loader_worker.start() # Preload audio devices in background to avoid settings lag import threading threading.Thread(target=self.bridge.preload_audio_devices, daemon=True).start() def on_loader_progress(self, percent): self.bridge.downloadProgress = percent def on_loader_done(self): if getattr(self, "_loader_handled", False): return self._loader_handled = True logging.info("Model verification complete.") # Close Loader Window if hasattr(self, "loader_root"): self.loader_root.close() # Init Backend self.init_logic() # Show Overlay (Ensure we don't load multiple times) overlay_qml = get_bundle_path() / "src/ui/qml/Overlay.qml" self.engine.load(QUrl.fromLocalFile(str(overlay_qml))) self.overlay_root = self.engine.rootObjects()[-1] self.overlay_root.setProperty("color", "transparent") self.center_overlay() # Preload Settings (Invisible) logging.info("Preloading Settings window...") self.open_settings() if self.settings_root: self.settings_root.setVisible(False) # Install Low-Level Window Hook for Transparent Hit Test try: from src.utils.window_hook import WindowHook hwnd = self.overlay_root.winId() # Initial scale from config scale = float(self.config.get("ui_scale")) # Current Overlay Dimensions win_w = int(460 * scale) win_h = int(180 * scale) self.window_hook = WindowHook(hwnd, win_w, win_h, initial_scale=scale) self.window_hook.install() # Initial state: Disabled because we start inactive self.window_hook.set_enabled(False) except Exception as e: logging.error(f"Failed to install WindowHook: {e}") def center_overlay(self): """Calculates and sets the Overlay position above the taskbar.""" from PySide6.QtGui import QGuiApplication screen = QGuiApplication.primaryScreen() if not screen or not self.overlay_root: return geom = screen.availableGeometry() w = self.overlay_root.width() h = self.overlay_root.height() x = geom.x() + (geom.width() - w) // 2 y = geom.bottom() - h - 15 self.overlay_root.setX(x) self.overlay_root.setY(y) def init_logic(self): if getattr(self, "_logic_initialized", False): return self._logic_initialized = True logging.info("Initializing Core Logic...") self.audio_engine = AudioEngine() self.audio_engine.set_visualizer_callback(self.bridge.update_amplitude) self.audio_engine.set_silence_callback(self.on_silence_detected) self.transcriber = WhisperTranscriber() # Dual Hotkey Managers self.hk_transcribe = HotkeyManager(config_key="hotkey") self.hk_transcribe.triggered.connect(lambda: self.toggle_recording(task_override="transcribe")) self.hk_transcribe.start() self.hk_translate = HotkeyManager(config_key="hotkey_translate") self.hk_translate.triggered.connect(lambda: self.toggle_recording(task_override="translate")) self.hk_translate.start() self.bridge.update_status("Ready") def run(self): sys.exit(self.qt_app.exec()) @Slot() def quit_app(self): logging.info("Shutting down...") # [CRITICAL] Stop the StatsWorker FIRST before any UI objects are touched. # This prevents signal emissions to a dying UIBridge object. if hasattr(self, 'bridge') and hasattr(self.bridge, 'stats_worker'): try: self.bridge.stats_worker.stats_ready.disconnect(self.bridge.update_stats_callback) except: pass self.bridge.stats_worker.stop() if self.hk_transcribe: self.hk_transcribe.stop() if self.hk_translate: self.hk_translate.stop() # Close all QML windows to ensure bindings stop before Python objects die if self.overlay_root: self.overlay_root.close() self.overlay_root.deleteLater() if hasattr(self, 'loader_root') and self.loader_root: self.loader_root.close() self.loader_root.deleteLater() if hasattr(self, 'settings_root') and self.settings_root: self.settings_root.close() self.settings_root.deleteLater() if hasattr(self, 'loader_worker') and self.loader_worker and self.loader_worker.isRunning(): logging.info("Waiting for loader to finish...") self.loader_worker.quit() self.loader_worker.wait(1000) if hasattr(self, 'worker') and self.worker and self.worker.isRunning(): logging.info("Waiting for transcription to finish...") self.worker.quit() self.worker.wait(2000) self.qt_app.quit() @Slot() def open_settings(self): if not hasattr(self, 'settings_root') or self.settings_root is None: logging.info("Loading Settings window for the first time...") settings_qml = get_bundle_path() / "src/ui/qml/Settings.qml" self.engine.load(QUrl.fromLocalFile(str(settings_qml))) self.settings_root = self.engine.rootObjects()[-1] self.settings_root.setProperty("color", "transparent") # Connect the closing signal to just hide/delete reference if needed, # but better to keep it alive. Actually, QML Window close() hides it by default usually # unless we set closePolicy. Let's ensure we can re-show it. # We might need to listen to closing signal to prevent destruction if we want to reuse. # But simpler: check if it exists, if so, show/raise it. # Center on screen from PySide6.QtGui import QGuiApplication screen = QGuiApplication.primaryScreen() if screen: geom = screen.availableGeometry() self.settings_root.setX(geom.x() + (geom.width() - self.settings_root.width()) // 2) self.settings_root.setY(geom.y() + (geom.height() - self.settings_root.height()) // 2) self.settings_root.setVisible(True) self.settings_root.requestActivate() @Slot() def init_settings_preload(self): """Preloads settings window to avoid lag on first open.""" # Check if already loaded if hasattr(self, 'settings_root') and self.settings_root: return logging.info("Preloading Settings QML...") # Load but keep hidden? QML Window visible defaults to true usually, # so we might see a flicker if we don't be careful. # Ideally we load it with visible: false property from python or QML. # For now, let's just let the first open be the load, but since user complained about lag... # effectively doing nothing different here unless we actually trigger load. pass @Slot(str, 'QVariant') def on_settings_changed(self, key, value): """ React to settings changes in real-time. Some settings require immediate action (reloading model, moving window). """ print(f"Setting Changed: {key} = {value}") # 1. Hotkey Reload if key in ["hotkey", "hotkey_translate"]: if self.hk_transcribe: self.hk_transcribe.reload_hotkey() if self.hk_translate: self.hk_translate.reload_hotkey() if self.tray: hk1 = self.format_hotkey(self.config.get("hotkey")) hk2 = self.format_hotkey(self.config.get("hotkey_translate")) self.tray.setToolTip(f"Whisper Voice\nTranscribe: {hk1}\nTranslate: {hk2}") # 2. AI Model Reload (Heavy) if key in ["model_size", "compute_device", "compute_type"]: size = self.config.get("model_size") # Notify UI to check if the new selected model is downloaded self.bridge.notifyModelStatesChanged() if self.transcriber.model_exists(size): logging.info(f"Model '{size}' exists. Reloading engine...") threading.Thread(target=self.transcriber.load_model, daemon=True).start() else: logging.info(f"Model '{size}' not found. Waiting for manual download.") # 3. Window Positioning if key in ["overlay_position", "overlay_offset_x", "overlay_offset_y", "ui_scale"]: self.reposition_overlay() # 4. Run on Startup if key == "run_on_startup": self.handle_startup_shortcut(value) # 4. Input Device (Audio Engine handles this on next record start typically, # but we can force a stream restart if we want instant feedback? # For now, next record is fine as per plan). def reposition_overlay(self): """Calculates and sets the Overlay position based on user settings.""" from PySide6.QtGui import QGuiApplication screen = QGuiApplication.primaryScreen() if not screen or not self.overlay_root: return # Apply UI Scale (Handled in QML now, but we need it for position calc) scale = float(self.config.get("ui_scale")) # self.overlay_root.setProperty("scale", scale) # Removed, handled in QML # Get Geometry geom = screen.availableGeometry() # Current Scaled Dimensions (Approximation) # Note: We must assume the base size is 460x180 (window size) # But visually it's 380x100 (container) scaled up. # The Window itself stays fixed size (transparent frame), but content scales. # Actually, simpler interpretation: The window size is fixed large area, content moves. # BUT if we want "Edge alignment", we must account for visual bounds. visual_w = 460 * scale visual_h = 180 * scale # We set the WINDOW position anchor. # Since the window content is centered, the window is effectively the bounding box we care about? # No, the window is 460x180. The content is smaller 380x100. # Let's align based on the WINDOW size for now to be safe. # Wait, if we scale in QML, does the window size change? No. # So if we scale up 1.5x, content might clip if window doesn't grow. # To support UI Scale properly without clipping, we should probably resize the window here too. # Let's resize the window to fit the scaled content. win_w = int(460 * scale) win_h = int(180 * scale) self.overlay_root.setWidth(win_w) self.overlay_root.setHeight(win_h) pos_mode = self.config.get("overlay_position") offset_x = int(self.config.get("overlay_offset_x")) offset_y = int(self.config.get("overlay_offset_y")) x = 0 y = 0 if pos_mode == "Bottom Center": x = geom.x() + (geom.width() - win_w) // 2 y = geom.bottom() - win_h - 15 elif pos_mode == "Top Center": x = geom.x() + (geom.width() - win_w) // 2 y = geom.top() + 15 elif pos_mode == "Bottom Right": x = geom.right() - win_w - 15 y = geom.bottom() - win_h - 15 elif pos_mode == "Top Right": x = geom.right() - win_w - 15 y = geom.top() + 15 elif pos_mode == "Bottom Left": x = geom.left() + 15 y = geom.bottom() - win_h - 15 elif pos_mode == "Top Left": x = geom.left() + 15 y = geom.top() + 15 # Apply Offsets x += offset_x y += offset_y self.overlay_root.setX(x) self.overlay_root.setY(y) @Slot() def transcribe_file(self): file_path, _ = QFileDialog.getOpenFileName(None, "Select Audio", "", "Audio (*.mp3 *.wav *.flac *.m4a *.ogg)") if file_path: self.bridge.update_status("Thinking...") # Files use the default configured task usually, or we could ask? # Default to config setting for files. self.worker = TranscriptionWorker(self.transcriber, file_path, is_file=True, parent=self) self.worker.finished.connect(self.on_transcription_done) self.worker.start() @Slot() def on_silence_detected(self): from PySide6.QtCore import QMetaObject, Qt # Silence detection always triggers the task that was active? # Since silence stops recording, it just calls toggle_recording with no arg, using the stored current_task? # Let's ensure toggle_recording handles no arg calls by stopping the CURRENT task. QMetaObject.invokeMethod(self, "toggle_recording", Qt.QueuedConnection) @Slot() # Modified to allow lambda override def toggle_recording(self, task_override=None): if not self.audio_engine: return # Prevent starting a new recording while we are still transcribing the last one if self.bridge.isProcessing: logging.warning("Ignored toggle request: Transcription in progress.") return # Determine which task we are entering if task_override: intended_task = task_override else: intended_task = self.config.get("task") if self.audio_engine.recording: # STOP RECORDING self.bridge.update_status("Thinking...") self.bridge.isRecording = False self.bridge.isProcessing = True # Start Processing audio_data = self.audio_engine.stop_recording() # Use the task that started this session, or the override if provided (though usually override is for starting) final_task = getattr(self, "current_recording_task", self.config.get("task")) self.worker = TranscriptionWorker(self.transcriber, audio_data, parent=self, task_override=final_task) self.worker.finished.connect(self.on_transcription_done) self.worker.start() else: # START RECORDING self.current_recording_task = intended_task self.bridge.update_status(f"Recording ({intended_task})...") self.bridge.isRecording = True self.audio_engine.start_recording() @Slot(bool) def on_ui_toggle_request(self, state): if state != self.audio_engine.recording: self.toggle_recording() # Default behavior for UI clicks @Slot(str) def on_transcription_done(self, text: str): self.bridge.update_status("Ready") self.bridge.isProcessing = False # End Processing if text: method = self.config.get("input_method") speed = int(self.config.get("typing_speed")) InputInjector.inject_text(text, method, speed) @Slot(bool) def on_hotkeys_enabled_toggle(self, state): if self.hk_transcribe: self.hk_transcribe.set_enabled(state) if self.hk_translate: self.hk_translate.set_enabled(state) @Slot(str) def on_download_requested(self, size): if self.bridge.isDownloading: return self.bridge.update_status("Downloading...") self.bridge.isDownloading = True self.download_worker = DownloadWorker(size, parent=self) self.download_worker.finished.connect(self.on_download_finished) self.download_worker.error.connect(self.on_download_error) self.download_worker.start() def on_download_finished(self): self.bridge.isDownloading = False self.bridge.update_status("Ready") self.bridge.notifyModelStatesChanged() # Refresh UI markers # Automatically load it now that it's here threading.Thread(target=self.transcriber.load_model, daemon=True).start() def on_download_error(self, err): self.bridge.isDownloading = False self.bridge.update_status("Error") logging.error(f"Download Error: {err}") @Slot(bool) def on_ui_toggle_request(self, is_recording): """Called when recording state changes.""" # Update Window Hook to allow clicking if active is_active = is_recording or self.bridge.isProcessing if hasattr(self, 'window_hook'): self.window_hook.set_enabled(is_active) @Slot(bool) def on_processing_changed(self, is_processing): is_active = self.bridge.isRecording or is_processing if hasattr(self, 'window_hook'): self.window_hook.set_enabled(is_active) if __name__ == "__main__": import sys app = WhisperApp() # Connect extra signal for processing state app.bridge.isProcessingChanged.connect(app.on_processing_changed) sys.exit(app.run())