Release v1.0.2: Implemented Style Prompting & Removed Grammar Correction

- Removed M2M100 Grammar Correction model completely to reduce bloat/complexity. - Implemented 'Style Prompting' in Settings -> AI Engine to handle punctuation natively via Whisper. - Added Style Presets: Standard (Default), Casual, and Custom. - Optimized Build: Bootstrapper no longer requires transformers/sentencepiece. - Fixed 'torch' NameError in Low VRAM mode. - Fixed Bootstrapper missing dependency detection. - Updated UI to reflect removed features. - Included compiled v1.0.2 Executable in dist/.
2026-01-25 13:42:06 +02:00
parent 03f46ee1e3
commit 84f10092e9
12 changed files with 246 additions and 37 deletions
@@ -347,11 +347,17 @@ class Bootstrapper:
            messagebox.showerror("WhisperVoice Error", f"Failed to launch app: {e}")
            return False

+    def check_dependencies(self):
+        """Quick check if critical dependencies are installed."""
+        return True # Deprecated logic placeholder
+
    def setup_and_run(self):
        """Full setup/update and run flow."""
        try:
+            # 1. Ensure basics
            if not self.is_python_ready():
                self.download_python()
+                self._fix_pth_file() # Ensure pth is fixed immediately after download
                self.install_pip()
                self.install_packages()
            
@@ -362,7 +368,10 @@ class Bootstrapper:
            if self.run_app():
                if self.ui: self.ui.root.quit()
        except Exception as e:
-            messagebox.showerror("Setup Error", f"Installation failed: {e}")
+            if self.ui:
+                import tkinter.messagebox as mb
+                mb.showerror("Setup Error", f"Installation failed: {e}") # Improved error visibility
+            log(f"Fatal error: {e}")
            import traceback
            traceback.print_exc()

@@ -101,20 +101,14 @@ class DownloadWorker(QThread):
            import requests
            from tqdm import tqdm
            model_path = get_models_path()
+            # Determine what to download
            dest_dir = model_path / f"faster-whisper-{self.model_name}"
-            dest_dir.mkdir(parents=True, exist_ok=True)
-            
-            # Files to download for a standard faster-whisper model
-            # We map local filenames to HF repo filenames
            repo_id = f"Systran/faster-whisper-{self.model_name}"
            files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"]
-            
-            # Check if Preprocessor config exists (sometimes it does, usually optional for whisper?)
-            # We'll stick to the core 4.
-            
            base_url = f"https://huggingface.co/{repo_id}/resolve/main"

-            logging.info(f"Downloading {self.model_name} from {base_url}...")
+            dest_dir.mkdir(parents=True, exist_ok=True)
+            logging.info(f"Downloading {self.model_name} to {dest_dir}...")
            
            # 1. Calculate Total Size
            total_size = 0
@@ -39,39 +39,36 @@ def build_portable():
    print("⏳ This may take 5-10 minutes...")
    
    PyInstaller.__main__.run([
-        "main.py",                       # Entry point
+        "bootstrapper.py",               # Entry point (Tiny Installer)
        "--name=WhisperVoice",           # EXE name
-        "--onefile",                     # Single EXE (slower startup but portable)
+        "--onefile",                     # Single EXE
        "--noconsole",                   # No terminal window
        "--clean",                       # Clean cache
-        *add_data_args,                  # Bundled assets
        
-        # Heavy libraries that need special collection
-        "--collect-all", "faster_whisper",
-        "--collect-all", "ctranslate2",
-        "--collect-all", "PySide6",
-        "--collect-all", "torch",
-        "--collect-all", "numpy",
+        # Bundle the app source to be extracted by bootstrapper
+        # The bootstrapper expects 'app_source' folder in bundled resources
+        "--add-data", f"src{os.pathsep}app_source/src",
+        "--add-data", f"main.py{os.pathsep}app_source",
+        "--add-data", f"requirements.txt{os.pathsep}app_source",
        
-        # Hidden imports (modules imported dynamically)
-        "--hidden-import", "keyboard",
-        "--hidden-import", "pyperclip",
-        "--hidden-import", "psutil",
-        "--hidden-import", "pynvml",
-        "--hidden-import", "sounddevice",
-        "--hidden-import", "scipy",
-        "--hidden-import", "scipy.signal",
-        "--hidden-import", "huggingface_hub",
-        "--hidden-import", "tokenizers",
+        # Add assets
+        "--add-data", f"src/ui/qml{os.pathsep}app_source/src/ui/qml",
+        "--add-data", f"assets{os.pathsep}app_source/assets",
        
-        # Qt plugins
-        "--hidden-import", "PySide6.QtQuickControls2",
-        "--hidden-import", "PySide6.QtQuick.Controls",
+        # No heavy collections! 
+        # The bootstrapper uses internal pip to install everything.
        
-        # Icon (convert to .ico for Windows)
-        # "--icon=icon.ico",  # Uncomment if you have a .ico file
+        # Exclude heavy modules to ensure this exe stays tiny
+        "--exclude-module", "faster_whisper",
+        "--exclude-module", "torch",
+        "--exclude-module", "PySide6",
+
+        
+        # Icon
+        # "--icon=icon.ico",
    ])

+
    print("\n" + "="*60)
    print("✅ BUILD COMPLETE!")
    print("="*60)
@@ -5,6 +5,7 @@
 faster-whisper>=1.0.0
 torch>=2.0.0

+
 # UI Framework
 PySide6>=6.6.0

@@ -46,7 +46,13 @@ DEFAULT_SETTINGS = {
    "best_of": 5,
    "vad_filter": True,
    "no_repeat_ngram_size": 0,
-    "condition_on_previous_text": True
+    "condition_on_previous_text": True,
+    "initial_prompt": "Mm-hmm. Okay, let's go. I speak in full sentences.", # Default: Forces punctuation
+    
+
+    
+    # Low VRAM Mode
+    "unload_models_after_use": False # If True, models are unloaded immediately to free VRAM
 }

 class ConfigManager:
@@ -15,6 +15,11 @@ import numpy as np
 from src.core.config import ConfigManager
 from src.core.paths import get_models_path

+try:
+    import torch
+except ImportError:
+    torch = None
+
 # Import directly - valid since we are now running in the full environment
 from faster_whisper import WhisperModel

@@ -153,7 +158,14 @@ class WhisperTranscriber:
            for segment in segments:
                text_result += segment.text + " "
                
-            return text_result.strip()
+            text_result = text_result.strip()
+            
+            # Low VRAM Mode: Unload Whisper Model immediately
+            if self.config.get("unload_models_after_use"):
+                self.unload_model()
+
+            logging.info(f"Final Transcription Output: '{text_result}'")
+            return text_result
            
        except Exception as e:
            logging.error(f"Transcription failed: {e}")
@@ -172,3 +184,21 @@ class WhisperTranscriber:
            return True
            
        return False
+
+    def unload_model(self):
+        """
+        Unloads model to free memory.
+        """
+        if self.model:
+            del self.model
+        
+        self.model = None
+        self.current_model_size = None
+        
+        # Force garbage collection
+        import gc
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            
+        logging.info("Whisper Model unloaded (Low VRAM Mode).")
@@ -376,6 +376,9 @@ class UIBridge(QObject):
        
        try:
            from src.core.paths import get_models_path
+            
+
+
            # Check new simple format used by DownloadWorker
            path_simple = get_models_path() / f"faster-whisper-{size}"
            if path_simple.exists() and any(path_simple.iterdir()):
@@ -587,6 +587,53 @@ Window {
                                Text { text: "Model configuration and performance"; color: SettingsStyle.textSecondary; font.family: mainFont; font.pixelSize: 14 }
                            }

+                                    ModernSettingsSection {
+                                title: "Style & Prompting"
+                                Layout.margins: 32
+                                Layout.topMargin: 0
+                                
+                                content: ColumnLayout {
+                                    width: parent.width
+                                    spacing: 0
+                                    
+                                    ModernSettingsItem {
+                                        label: "Punctuation Style"
+                                        description: "Hint for how to format text"
+                                        control: ModernComboBox {
+                                            id: styleCombo
+                                            width: 180
+                                            model: ["Standard (Proper)", "Casual (Lowercase)", "Custom"]
+                                            
+                                            // Logic to determine initial index based on config string
+                                            Component.onCompleted: {
+                                                let current = ui.getSetting("initial_prompt")
+                                                if (current === "Mm-hmm. Okay, let's go. I speak in full sentences.") currentIndex = 0
+                                                else if (current === "um, okay... i guess so.") currentIndex = 1
+                                                else currentIndex = 2
+                                            }
+
+                                            onActivated: {
+                                                if (index === 0) ui.setSetting("initial_prompt", "Mm-hmm. Okay, let's go. I speak in full sentences.")
+                                                else if (index === 1) ui.setSetting("initial_prompt", "um, okay... i guess so.")
+                                                // Custom: Don't change string immediately, let user type
+                                            }
+                                        }
+                                    }
+
+                                    ModernSettingsItem {
+                                        label: "Custom Prompt"
+                                        description: "Advanced: Define your own style hint"
+                                        visible: styleCombo.currentIndex === 2
+                                        control: ModernTextField {
+                                            Layout.preferredWidth: 280
+                                            placeholderText: "e.g. 'Hello, World.'"
+                                            text: ui.getSetting("initial_prompt") || ""
+                                            onEditingFinished: ui.setSetting("initial_prompt", text === "" ? null : text)
+                                        }
+                                    }
+                                }
+                            }
+
                            ModernSettingsSection {
                                title: "Model Config"
                                Layout.margins: 32
@@ -785,6 +832,16 @@ Window {
                                            onActivated: ui.setSetting("compute_type", currentText)
                                        }
                                    }
+                                    
+                                    ModernSettingsItem {
+                                        label: "Low VRAM Mode"
+                                        description: "Unload models immediately after use (Saves VRAM, Adds Delay)"
+                                        showSeparator: false
+                                        control: ModernSwitch {
+                                            checked: ui.getSetting("unload_models_after_use")
+                                            onToggled: ui.setSetting("unload_models_after_use", checked)
+                                        }
+                                    }
                                }
                            }

@@ -0,0 +1,38 @@
+
+import sys
+from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+
+def test_m2m():
+    model_name = "facebook/m2m100_418M"
+    print(f"Loading {model_name}...")
+    
+    tokenizer = M2M100Tokenizer.from_pretrained(model_name)
+    model = M2M100ForConditionalGeneration.from_pretrained(model_name)
+    
+    # Test cases: (Language Code, Input)
+    test_cases = [
+        ("en", "he go to school yesterday"),
+        ("pl", "on iść do szkoła wczoraj"), # Intentional broken grammar in Polish
+    ]
+    
+    print("\nStarting M2M Tests (Self-Translation):\n")
+    
+    for lang, input_text in test_cases:
+        tokenizer.src_lang = lang
+        encoded = tokenizer(input_text, return_tensors="pt")
+        
+        # Translate to SAME language
+        generated_tokens = model.generate(
+            **encoded, 
+            forced_bos_token_id=tokenizer.get_lang_id(lang)
+        )
+        
+        corrected = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+        
+        print(f"[{lang}]")
+        print(f"Input:    {input_text}")
+        print(f"Output:   {corrected}")
+        print("-" * 20)
+
+if __name__ == "__main__":
+    test_m2m()
@@ -0,0 +1,40 @@
+
+import sys
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+
+def test_mt0():
+    model_name = "bigscience/mt0-base"
+    print(f"Loading {model_name}...")
+    
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+    
+    # Test cases: (Language, Prompt, Input)
+    # MT0 is instruction tuned, so we should prompt it in the target language or English.
+    # Cross-lingual prompting (English prompt -> Target tasks) is usually supported.
+    
+    test_cases = [
+        ("English", "Correct grammar:", "he go to school yesterday"),
+        ("Polish", "Popraw gramatykę:", "to jest testowe zdanie bez kropki"),
+        ("Finnish", "Korjaa kielioppi:", "tämä on testilause ilman pistettä"),
+        ("Russian", "Исправь грамматику:", "это тестовое предложение без точки"),
+        ("Japanese", "文法を直してください:", "これは点のないテスト文です"),
+        ("Spanish", "Corrige la gramática:", "esta es una oración de prueba sin punto"),
+    ]
+    
+    print("\nStarting MT0 Tests:\n")
+    
+    for lang, prompt_text, input_text in test_cases:
+        full_input = f"{prompt_text} {input_text}"
+        inputs = tokenizer(full_input, return_tensors="pt")
+        
+        outputs = model.generate(inputs.input_ids, max_length=128)
+        corrected = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        
+        print(f"[{lang}]")
+        print(f"Input:    {full_input}")
+        print(f"Output:   {corrected}")
+        print("-" * 20)
+
+if __name__ == "__main__":
+    test_mt0()
@@ -0,0 +1,34 @@
+
+import sys
+import os
+
+# Add src to path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from src.core.grammar_assistant import GrammarAssistant
+
+def test_punctuation():
+    assistant = GrammarAssistant()
+    assistant.load_model()
+    
+    samples = [
+        # User's example (verbatim)
+        "If the voice recognition doesn't recognize that I like stopped Or something would that would it also correct that",
+        
+        # Generic run-on
+        "hello how are you doing today i am doing fine thanks for asking",
+        
+        # Missing commas/periods
+        "well i think its valid however we should probably check the logs first"
+    ]
+    
+    print("\nStarting Punctuation Tests:\n")
+    
+    for sample in samples:
+        print(f"Original: {sample}")
+        corrected = assistant.correct(sample)
+        print(f"Corrected: {corrected}")
+        print("-" * 20)
+
+if __name__ == "__main__":
+    test_punctuation()