diff --git a/build.spec b/build.spec index 63d0752..bd7fd54 100644 --- a/build.spec +++ b/build.spec @@ -8,6 +8,7 @@ # NO heavy dependencies (torch, PySide6, etc.) are bundled. import os +import sys import glob block_cipher = None @@ -61,7 +62,7 @@ a = Analysis( 'psutil', 'pynvml', 'pystray', 'PIL', 'Pillow', 'darkdetect', 'huggingface_hub', 'requests', 'tqdm', 'onnxruntime', 'av', - 'tkinter', 'matplotlib', 'notebook', 'IPython', + 'matplotlib', 'notebook', 'IPython', ], win_no_prefer_redirects=False, win_private_assemblies=False, @@ -91,5 +92,5 @@ exe = EXE( target_arch=None, codesign_identity=None, entitlements_file=None, - icon='assets/icon.ico', + icon='assets/icon.ico' if sys.platform == 'win32' else None, ) diff --git a/main.py b/main.py index cdd8548..b0974bb 100644 --- a/main.py +++ b/main.py @@ -9,6 +9,14 @@ app_dir = os.path.dirname(os.path.abspath(__file__)) if app_dir not in sys.path: sys.path.insert(0, app_dir) +# ----------------------------------------------------------------------------- +# LINUX: Force XWayland (X11) for reliable window positioning & overlay behavior. +# Our input stack (evdev, UInput, wl-copy) is compositor-agnostic so this is safe. +# Native Wayland lacks app-controlled window positioning which the overlay needs. +# ----------------------------------------------------------------------------- +if sys.platform == 'linux' and os.environ.get('WAYLAND_DISPLAY'): + os.environ.setdefault('QT_QPA_PLATFORM', 'xcb') + # ----------------------------------------------------------------------------- # WINDOWS DLL FIX (CRITICAL for Portable CUDA) # Python 3.8+ on Windows requires explicit DLL directory addition. @@ -16,20 +24,32 @@ if app_dir not in sys.path: if os.name == 'nt' and hasattr(os, 'add_dll_directory'): try: from pathlib import Path - # Scan sys.path for site-packages + _candidate_dirs = set() + + # 1. From sys.path (scan ALL site-packages, not just the first) for p in sys.path: path_obj = Path(p) if path_obj.name == 'site-packages' and path_obj.exists(): - nvidia_path = path_obj / "nvidia" - if nvidia_path.exists(): - for subdir in nvidia_path.iterdir(): - # Add 'bin' folder from each nvidia stub (cublas, cudnn, etc.) - bin_path = subdir / "bin" - if bin_path.exists(): - os.add_dll_directory(str(bin_path)) - # Also try adding site-packages itself just in case - # os.add_dll_directory(str(path_obj)) - break + _candidate_dirs.add(str(path_obj.resolve())) + + # 2. Relative to the Python executable (critical for embedded Python) + _exe_dir = Path(sys.executable).parent + for _sp in [_exe_dir / "Lib" / "site-packages", _exe_dir / "lib" / "site-packages"]: + if _sp.exists(): + _candidate_dirs.add(str(_sp.resolve())) + + # 3. Scan all candidates for nvidia DLL directories + for _sp_str in _candidate_dirs: + nvidia_path = Path(_sp_str) / "nvidia" + if nvidia_path.exists(): + for subdir in nvidia_path.iterdir(): + bin_path = subdir / "bin" + if bin_path.exists(): + os.add_dll_directory(str(bin_path)) + # Also add to PATH as fallback - some libraries + # (e.g. CTranslate2) load DLLs lazily via LoadLibrary + # which may not respect os.add_dll_directory() + os.environ['PATH'] = str(bin_path) + os.pathsep + os.environ.get('PATH', '') except Exception: pass # ----------------------------------------------------------------------------- @@ -49,7 +69,8 @@ from src.core.hotkey_manager import HotkeyManager from src.core.config import ConfigManager from src.utils.injector import InputInjector from src.core.paths import get_models_path, get_bundle_path -from src.utils.window_hook import WindowHook +if os.name == 'nt': + from src.utils.window_hook import WindowHook from PySide6.QtGui import QSurfaceFormat @@ -63,37 +84,52 @@ os.environ["QT_ENABLE_HIGHDPI_SCALING"] = "1" os.environ["QT_AUTOSCREENSCALEFACTOR"] = "1" # Detect resolution without creating QApplication (Fixes crash) -try: - import ctypes - user32 = ctypes.windll.user32 - # Get physical screen width (unscaled) - # SetProcessDPIAware is needed to get the true resolution - user32.SetProcessDPIAware() - width = user32.GetSystemMetrics(0) - # Base scale centers around 1920 width. - # At 3840 (4k), res_scale is 2.0. If we want it ~40% smaller, we multiply by 0.6 = 1.2 - res_scale = (width / 1920) - if width >= 3840: - res_scale *= 0.6 # Make it significantly smaller at 4k as requested - - os.environ["QT_SCALE_FACTOR"] = str(max(1.0, res_scale)) -except: - pass +if os.name == 'nt': + try: + import ctypes + user32 = ctypes.windll.user32 + # Get physical screen width (unscaled) + # SetProcessDPIAware is needed to get the true resolution + user32.SetProcessDPIAware() + width = user32.GetSystemMetrics(0) + # Base scale centers around 1920 width. + # At 3840 (4k), res_scale is 2.0. If we want it ~40% smaller, we multiply by 0.6 = 1.2 + res_scale = (width / 1920) + if width >= 3840: + res_scale *= 0.6 # Make it significantly smaller at 4k as requested -# Detect Windows "Reduce Motion" preference -try: - import ctypes - SPI_GETCLIENTAREAANIMATION = 0x1042 - animation_enabled = ctypes.c_bool(True) - ctypes.windll.user32.SystemParametersInfoW( - SPI_GETCLIENTAREAANIMATION, 0, - ctypes.byref(animation_enabled), 0 - ) - if not animation_enabled.value: - ConfigManager().data["reduce_motion"] = True - ConfigManager().save() -except Exception: - pass + os.environ["QT_SCALE_FACTOR"] = str(max(1.0, res_scale)) + except: + pass +# On Linux, Qt handles DPI automatically via QT_ENABLE_HIGHDPI_SCALING + +# Detect "Reduce Motion" preference +if os.name == 'nt': + try: + import ctypes + SPI_GETCLIENTAREAANIMATION = 0x1042 + animation_enabled = ctypes.c_bool(True) + ctypes.windll.user32.SystemParametersInfoW( + SPI_GETCLIENTAREAANIMATION, 0, + ctypes.byref(animation_enabled), 0 + ) + if not animation_enabled.value: + ConfigManager().data["reduce_motion"] = True + ConfigManager().save() + except Exception: + pass +elif sys.platform == 'linux': + try: + import subprocess as _sp + result = _sp.run( + ['gsettings', 'get', 'org.gnome.desktop.interface', 'enable-animations'], + capture_output=True, text=True, timeout=2 + ) + if result.stdout.strip() == 'false': + ConfigManager().data["reduce_motion"] = True + ConfigManager().save() + except Exception: + pass # Configure Logging class QmlLoggingHandler(logging.Handler, QObject): @@ -140,23 +176,33 @@ class DownloadWorker(QThread): def run(self): try: import requests - from tqdm import tqdm + import shutil model_path = get_models_path() - # Determine what to download dest_dir = model_path / f"faster-whisper-{self.model_name}" repo_id = f"Systran/faster-whisper-{self.model_name}" - files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"] + required_files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"] base_url = f"https://huggingface.co/{repo_id}/resolve/main" - dest_dir.mkdir(parents=True, exist_ok=True) + # Skip if already complete + if dest_dir.exists() and all((dest_dir / f).exists() for f in required_files): + logging.info(f"Model {self.model_name} already downloaded.") + self.finished.emit() + return + + # Download to a temp dir first, move on success + tmp_dir = model_path / f".tmp-faster-whisper-{self.model_name}" + if tmp_dir.exists(): + shutil.rmtree(tmp_dir) + tmp_dir.mkdir(parents=True, exist_ok=True) + logging.info(f"Downloading {self.model_name} to {dest_dir}...") - + # 1. Calculate Total Size total_size = 0 file_sizes = {} - + with requests.Session() as s: - for fname in files: + for fname in required_files: url = f"{base_url}/{fname}" head = s.head(url, allow_redirects=True) if head.status_code == 200: @@ -164,42 +210,48 @@ class DownloadWorker(QThread): file_sizes[fname] = size total_size += size else: - # Fallback for vocabulary.json vs vocabulary.txt - if fname == "vocabulary.json": - # Try .txt? Or just skip if not found? - # Faster-whisper usually has vocabulary.json - pass - + logging.warning(f"HEAD failed for {fname}: HTTP {head.status_code}") + + # Abort if any required file is unavailable + missing = [f for f in required_files if f not in file_sizes] + if missing: + shutil.rmtree(tmp_dir, ignore_errors=True) + raise RuntimeError(f"Required model files unavailable: {missing}") + # 2. Download loop downloaded_bytes = 0 - + with requests.Session() as s: - for fname in files: - if fname not in file_sizes: continue - + for fname in required_files: url = f"{base_url}/{fname}" - dest_file = dest_dir / fname - - # Resume check? - # Simpler to just overwrite for reliability unless we want complex resume logic. - # We'll overwrite. - + dest_file = tmp_dir / fname + resp = s.get(url, stream=True) resp.raise_for_status() - + with open(dest_file, 'wb') as f: for chunk in resp.iter_content(chunk_size=8192): if chunk: f.write(chunk) downloaded_bytes += len(chunk) - - # Emit Progress if total_size > 0: pct = int((downloaded_bytes / total_size) * 100) self.progress.emit(pct) + # 3. Validate all files present and non-empty + for fname in required_files: + fpath = tmp_dir / fname + if not fpath.exists() or fpath.stat().st_size == 0: + shutil.rmtree(tmp_dir, ignore_errors=True) + raise RuntimeError(f"Download incomplete: {fname} missing or empty") + + # 4. Atomic move: replace dest with completed download + if dest_dir.exists(): + shutil.rmtree(dest_dir) + tmp_dir.rename(dest_dir) + self.finished.emit() - + except Exception as e: logging.error(f"Download failed: {e}") self.error.emit(str(e)) @@ -311,7 +363,8 @@ class WhisperApp(QObject): self.bridge.llmDownloadRequested.connect(self.on_llm_download_requested) self.engine.rootContext().setContextProperty("ui", self.bridge) - + self.engine.rootContext().setContextProperty("isLinux", sys.platform == 'linux') + # 2. Tray setup self.tray = SystemTray() self.tray.quit_requested.connect(self.quit_app) @@ -374,7 +427,7 @@ class WhisperApp(QObject): self.overlay_root.setProperty("color", "transparent") self.center_overlay() - + # Preload Settings (Invisible) logging.info("Preloading Settings window...") self.open_settings() @@ -382,23 +435,27 @@ class WhisperApp(QObject): self.settings_root.setVisible(False) # Install Low-Level Window Hook for Transparent Hit Test - try: - from src.utils.window_hook import WindowHook - hwnd = self.overlay_root.winId() - # Initial scale from config - scale = float(self.config.get("ui_scale")) - - # Current Overlay Dimensions - win_w = int(460 * scale) - win_h = int(180 * scale) - - self.window_hook = WindowHook(hwnd, win_w, win_h, initial_scale=scale) - self.window_hook.install() - - # Initial state: Disabled because we start inactive - self.window_hook.set_enabled(False) - except Exception as e: - logging.error(f"Failed to install WindowHook: {e}") + if os.name == 'nt': + try: + from src.utils.window_hook import WindowHook + hwnd = self.overlay_root.winId() + # Initial scale from config + scale = float(self.config.get("ui_scale")) + + # Current Overlay Dimensions + win_w = int(460 * scale) + win_h = int(180 * scale) + + self.window_hook = WindowHook(hwnd, win_w, win_h, initial_scale=scale) + self.window_hook.install() + + # Initial state: Disabled because we start inactive + self.window_hook.set_enabled(False) + except Exception as e: + logging.error(f"Failed to install WindowHook: {e}") + else: + # On Linux, use Qt flag for click-through overlay + self.overlay_root.setFlag(Qt.WindowTransparentForInput, True) def center_overlay(self): """Calculates and sets the Overlay position above the taskbar.""" @@ -812,19 +869,23 @@ class WhisperApp(QObject): self.bridge.update_status("Error") logging.error(f"Download Error: {err}") + def _update_overlay_state(self, is_active): + """Update overlay visibility and input handling based on active state.""" + if hasattr(self, 'window_hook'): + self.window_hook.set_enabled(is_active) + elif sys.platform == 'linux' and self.overlay_root: + self.overlay_root.setFlag(Qt.WindowTransparentForInput, not is_active) + @Slot(bool) def on_ui_toggle_request(self, is_recording): """Called when recording state changes.""" - # Update Window Hook to allow clicking if active is_active = is_recording or self.bridge.isProcessing - if hasattr(self, 'window_hook'): - self.window_hook.set_enabled(is_active) - + self._update_overlay_state(is_active) + @Slot(bool) def on_processing_changed(self, is_processing): is_active = self.bridge.isRecording or is_processing - if hasattr(self, 'window_hook'): - self.window_hook.set_enabled(is_active) + self._update_overlay_state(is_active) if __name__ == "__main__": import sys