diff --git a/bootstrapper.py b/bootstrapper.py index 5e8d0fc..9a1f83c 100644 --- a/bootstrapper.py +++ b/bootstrapper.py @@ -347,11 +347,17 @@ class Bootstrapper: messagebox.showerror("WhisperVoice Error", f"Failed to launch app: {e}") return False + def check_dependencies(self): + """Quick check if critical dependencies are installed.""" + return True # Deprecated logic placeholder + def setup_and_run(self): """Full setup/update and run flow.""" try: + # 1. Ensure basics if not self.is_python_ready(): self.download_python() + self._fix_pth_file() # Ensure pth is fixed immediately after download self.install_pip() self.install_packages() @@ -362,7 +368,10 @@ class Bootstrapper: if self.run_app(): if self.ui: self.ui.root.quit() except Exception as e: - messagebox.showerror("Setup Error", f"Installation failed: {e}") + if self.ui: + import tkinter.messagebox as mb + mb.showerror("Setup Error", f"Installation failed: {e}") # Improved error visibility + log(f"Fatal error: {e}") import traceback traceback.print_exc() diff --git a/dist/WhisperVoice.exe b/dist/WhisperVoice.exe new file mode 100644 index 0000000..a2a87bc Binary files /dev/null and b/dist/WhisperVoice.exe differ diff --git a/main.py b/main.py index 2d543b0..d5768ae 100644 --- a/main.py +++ b/main.py @@ -101,20 +101,14 @@ class DownloadWorker(QThread): import requests from tqdm import tqdm model_path = get_models_path() + # Determine what to download dest_dir = model_path / f"faster-whisper-{self.model_name}" - dest_dir.mkdir(parents=True, exist_ok=True) - - # Files to download for a standard faster-whisper model - # We map local filenames to HF repo filenames repo_id = f"Systran/faster-whisper-{self.model_name}" files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"] - - # Check if Preprocessor config exists (sometimes it does, usually optional for whisper?) - # We'll stick to the core 4. - base_url = f"https://huggingface.co/{repo_id}/resolve/main" - - logging.info(f"Downloading {self.model_name} from {base_url}...") + + dest_dir.mkdir(parents=True, exist_ok=True) + logging.info(f"Downloading {self.model_name} to {dest_dir}...") # 1. Calculate Total Size total_size = 0 diff --git a/portable_build.py b/portable_build.py index cd1ea66..578c27b 100644 --- a/portable_build.py +++ b/portable_build.py @@ -39,39 +39,36 @@ def build_portable(): print("⏳ This may take 5-10 minutes...") PyInstaller.__main__.run([ - "main.py", # Entry point + "bootstrapper.py", # Entry point (Tiny Installer) "--name=WhisperVoice", # EXE name - "--onefile", # Single EXE (slower startup but portable) + "--onefile", # Single EXE "--noconsole", # No terminal window "--clean", # Clean cache - *add_data_args, # Bundled assets - # Heavy libraries that need special collection - "--collect-all", "faster_whisper", - "--collect-all", "ctranslate2", - "--collect-all", "PySide6", - "--collect-all", "torch", - "--collect-all", "numpy", + # Bundle the app source to be extracted by bootstrapper + # The bootstrapper expects 'app_source' folder in bundled resources + "--add-data", f"src{os.pathsep}app_source/src", + "--add-data", f"main.py{os.pathsep}app_source", + "--add-data", f"requirements.txt{os.pathsep}app_source", - # Hidden imports (modules imported dynamically) - "--hidden-import", "keyboard", - "--hidden-import", "pyperclip", - "--hidden-import", "psutil", - "--hidden-import", "pynvml", - "--hidden-import", "sounddevice", - "--hidden-import", "scipy", - "--hidden-import", "scipy.signal", - "--hidden-import", "huggingface_hub", - "--hidden-import", "tokenizers", + # Add assets + "--add-data", f"src/ui/qml{os.pathsep}app_source/src/ui/qml", + "--add-data", f"assets{os.pathsep}app_source/assets", - # Qt plugins - "--hidden-import", "PySide6.QtQuickControls2", - "--hidden-import", "PySide6.QtQuick.Controls", + # No heavy collections! + # The bootstrapper uses internal pip to install everything. - # Icon (convert to .ico for Windows) - # "--icon=icon.ico", # Uncomment if you have a .ico file + # Exclude heavy modules to ensure this exe stays tiny + "--exclude-module", "faster_whisper", + "--exclude-module", "torch", + "--exclude-module", "PySide6", + + + # Icon + # "--icon=icon.ico", ]) + print("\n" + "="*60) print("✅ BUILD COMPLETE!") print("="*60) diff --git a/requirements.txt b/requirements.txt index c4ce426..1cb562e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ faster-whisper>=1.0.0 torch>=2.0.0 + # UI Framework PySide6>=6.6.0 diff --git a/src/core/config.py b/src/core/config.py index 6ea3138..10f7f76 100644 --- a/src/core/config.py +++ b/src/core/config.py @@ -46,7 +46,13 @@ DEFAULT_SETTINGS = { "best_of": 5, "vad_filter": True, "no_repeat_ngram_size": 0, - "condition_on_previous_text": True + "condition_on_previous_text": True, + "initial_prompt": "Mm-hmm. Okay, let's go. I speak in full sentences.", # Default: Forces punctuation + + + + # Low VRAM Mode + "unload_models_after_use": False # If True, models are unloaded immediately to free VRAM } class ConfigManager: diff --git a/src/core/transcriber.py b/src/core/transcriber.py index ccd346b..f9048f6 100644 --- a/src/core/transcriber.py +++ b/src/core/transcriber.py @@ -15,6 +15,11 @@ import numpy as np from src.core.config import ConfigManager from src.core.paths import get_models_path +try: + import torch +except ImportError: + torch = None + # Import directly - valid since we are now running in the full environment from faster_whisper import WhisperModel @@ -153,7 +158,14 @@ class WhisperTranscriber: for segment in segments: text_result += segment.text + " " - return text_result.strip() + text_result = text_result.strip() + + # Low VRAM Mode: Unload Whisper Model immediately + if self.config.get("unload_models_after_use"): + self.unload_model() + + logging.info(f"Final Transcription Output: '{text_result}'") + return text_result except Exception as e: logging.error(f"Transcription failed: {e}") @@ -172,3 +184,21 @@ class WhisperTranscriber: return True return False + + def unload_model(self): + """ + Unloads model to free memory. + """ + if self.model: + del self.model + + self.model = None + self.current_model_size = None + + # Force garbage collection + import gc + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + logging.info("Whisper Model unloaded (Low VRAM Mode).") diff --git a/src/ui/bridge.py b/src/ui/bridge.py index 7029189..a3ca549 100644 --- a/src/ui/bridge.py +++ b/src/ui/bridge.py @@ -376,6 +376,9 @@ class UIBridge(QObject): try: from src.core.paths import get_models_path + + + # Check new simple format used by DownloadWorker path_simple = get_models_path() / f"faster-whisper-{size}" if path_simple.exists() and any(path_simple.iterdir()): diff --git a/src/ui/qml/Settings.qml b/src/ui/qml/Settings.qml index 03095fb..9659849 100644 --- a/src/ui/qml/Settings.qml +++ b/src/ui/qml/Settings.qml @@ -587,6 +587,53 @@ Window { Text { text: "Model configuration and performance"; color: SettingsStyle.textSecondary; font.family: mainFont; font.pixelSize: 14 } } + ModernSettingsSection { + title: "Style & Prompting" + Layout.margins: 32 + Layout.topMargin: 0 + + content: ColumnLayout { + width: parent.width + spacing: 0 + + ModernSettingsItem { + label: "Punctuation Style" + description: "Hint for how to format text" + control: ModernComboBox { + id: styleCombo + width: 180 + model: ["Standard (Proper)", "Casual (Lowercase)", "Custom"] + + // Logic to determine initial index based on config string + Component.onCompleted: { + let current = ui.getSetting("initial_prompt") + if (current === "Mm-hmm. Okay, let's go. I speak in full sentences.") currentIndex = 0 + else if (current === "um, okay... i guess so.") currentIndex = 1 + else currentIndex = 2 + } + + onActivated: { + if (index === 0) ui.setSetting("initial_prompt", "Mm-hmm. Okay, let's go. I speak in full sentences.") + else if (index === 1) ui.setSetting("initial_prompt", "um, okay... i guess so.") + // Custom: Don't change string immediately, let user type + } + } + } + + ModernSettingsItem { + label: "Custom Prompt" + description: "Advanced: Define your own style hint" + visible: styleCombo.currentIndex === 2 + control: ModernTextField { + Layout.preferredWidth: 280 + placeholderText: "e.g. 'Hello, World.'" + text: ui.getSetting("initial_prompt") || "" + onEditingFinished: ui.setSetting("initial_prompt", text === "" ? null : text) + } + } + } + } + ModernSettingsSection { title: "Model Config" Layout.margins: 32 @@ -785,6 +832,16 @@ Window { onActivated: ui.setSetting("compute_type", currentText) } } + + ModernSettingsItem { + label: "Low VRAM Mode" + description: "Unload models immediately after use (Saves VRAM, Adds Delay)" + showSeparator: false + control: ModernSwitch { + checked: ui.getSetting("unload_models_after_use") + onToggled: ui.setSetting("unload_models_after_use", checked) + } + } } } diff --git a/test_m2m.py b/test_m2m.py new file mode 100644 index 0000000..466c185 --- /dev/null +++ b/test_m2m.py @@ -0,0 +1,38 @@ + +import sys +from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer + +def test_m2m(): + model_name = "facebook/m2m100_418M" + print(f"Loading {model_name}...") + + tokenizer = M2M100Tokenizer.from_pretrained(model_name) + model = M2M100ForConditionalGeneration.from_pretrained(model_name) + + # Test cases: (Language Code, Input) + test_cases = [ + ("en", "he go to school yesterday"), + ("pl", "on iść do szkoła wczoraj"), # Intentional broken grammar in Polish + ] + + print("\nStarting M2M Tests (Self-Translation):\n") + + for lang, input_text in test_cases: + tokenizer.src_lang = lang + encoded = tokenizer(input_text, return_tensors="pt") + + # Translate to SAME language + generated_tokens = model.generate( + **encoded, + forced_bos_token_id=tokenizer.get_lang_id(lang) + ) + + corrected = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] + + print(f"[{lang}]") + print(f"Input: {input_text}") + print(f"Output: {corrected}") + print("-" * 20) + +if __name__ == "__main__": + test_m2m() diff --git a/test_mt0.py b/test_mt0.py new file mode 100644 index 0000000..1ff54ae --- /dev/null +++ b/test_mt0.py @@ -0,0 +1,40 @@ + +import sys +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM + +def test_mt0(): + model_name = "bigscience/mt0-base" + print(f"Loading {model_name}...") + + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModelForSeq2SeqLM.from_pretrained(model_name) + + # Test cases: (Language, Prompt, Input) + # MT0 is instruction tuned, so we should prompt it in the target language or English. + # Cross-lingual prompting (English prompt -> Target tasks) is usually supported. + + test_cases = [ + ("English", "Correct grammar:", "he go to school yesterday"), + ("Polish", "Popraw gramatykę:", "to jest testowe zdanie bez kropki"), + ("Finnish", "Korjaa kielioppi:", "tämä on testilause ilman pistettä"), + ("Russian", "Исправь грамматику:", "это тестовое предложение без точки"), + ("Japanese", "文法を直してください:", "これは点のないテスト文です"), + ("Spanish", "Corrige la gramática:", "esta es una oración de prueba sin punto"), + ] + + print("\nStarting MT0 Tests:\n") + + for lang, prompt_text, input_text in test_cases: + full_input = f"{prompt_text} {input_text}" + inputs = tokenizer(full_input, return_tensors="pt") + + outputs = model.generate(inputs.input_ids, max_length=128) + corrected = tokenizer.decode(outputs[0], skip_special_tokens=True) + + print(f"[{lang}]") + print(f"Input: {full_input}") + print(f"Output: {corrected}") + print("-" * 20) + +if __name__ == "__main__": + test_mt0() diff --git a/test_punctuation.py b/test_punctuation.py new file mode 100644 index 0000000..7127c0a --- /dev/null +++ b/test_punctuation.py @@ -0,0 +1,34 @@ + +import sys +import os + +# Add src to path +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from src.core.grammar_assistant import GrammarAssistant + +def test_punctuation(): + assistant = GrammarAssistant() + assistant.load_model() + + samples = [ + # User's example (verbatim) + "If the voice recognition doesn't recognize that I like stopped Or something would that would it also correct that", + + # Generic run-on + "hello how are you doing today i am doing fine thanks for asking", + + # Missing commas/periods + "well i think its valid however we should probably check the logs first" + ] + + print("\nStarting Punctuation Tests:\n") + + for sample in samples: + print(f"Original: {sample}") + corrected = assistant.correct(sample) + print(f"Corrected: {corrected}") + print("-" * 20) + +if __name__ == "__main__": + test_punctuation()