Release v1.0.2: Implemented Style Prompting & Removed Grammar Correction

- Removed M2M100 Grammar Correction model completely to reduce bloat/complexity. - Implemented 'Style Prompting' in Settings -> AI Engine to handle punctuation natively via Whisper. - Added Style Presets: Standard (Default), Casual, and Custom. - Optimized Build: Bootstrapper no longer requires transformers/sentencepiece. - Fixed 'torch' NameError in Low VRAM mode. - Fixed Bootstrapper missing dependency detection. - Updated UI to reflect removed features. - Included compiled v1.0.2 Executable in dist/.
2026-01-25 13:42:06 +02:00
parent 03f46ee1e3
commit 84f10092e9
12 changed files with 246 additions and 37 deletions
@@ -347,11 +347,17 @@ class Bootstrapper:
            messagebox.showerror("WhisperVoice Error", f"Failed to launch app: {e}")
            return False
    def check_dependencies(self):
        """Quick check if critical dependencies are installed."""
        return True # Deprecated logic placeholder
    def setup_and_run(self):
        """Full setup/update and run flow."""
        try:
            # 1. Ensure basics
            if not self.is_python_ready():
                self.download_python()
                self._fix_pth_file() # Ensure pth is fixed immediately after download
                self.install_pip()
                self.install_packages()
@@ -362,7 +368,10 @@ class Bootstrapper:
            if self.run_app():
                if self.ui: self.ui.root.quit()
        except Exception as e:
-            messagebox.showerror("Setup Error", f"Installation failed: {e}")
+            if self.ui:
                import tkinter.messagebox as mb
                mb.showerror("Setup Error", f"Installation failed: {e}") # Improved error visibility
            log(f"Fatal error: {e}")
            import traceback
            traceback.print_exc()
@@ -101,20 +101,14 @@ class DownloadWorker(QThread):
            import requests
            from tqdm import tqdm
            model_path = get_models_path()
            # Determine what to download
            dest_dir = model_path / f"faster-whisper-{self.model_name}"
            dest_dir.mkdir(parents=True, exist_ok=True)
            # Files to download for a standard faster-whisper model
            # We map local filenames to HF repo filenames
            repo_id = f"Systran/faster-whisper-{self.model_name}"
            files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"]
            # Check if Preprocessor config exists (sometimes it does, usually optional for whisper?)
            # We'll stick to the core 4.
            base_url = f"https://huggingface.co/{repo_id}/resolve/main"
-            
+
-            logging.info(f"Downloading {self.model_name} from {base_url}...")
+            dest_dir.mkdir(parents=True, exist_ok=True)
            logging.info(f"Downloading {self.model_name} to {dest_dir}...")
            # 1. Calculate Total Size
            total_size = 0
@@ -39,39 +39,36 @@ def build_portable():
    print("⏳ This may take 5-10 minutes...")
    PyInstaller.__main__.run([
-        "main.py",                       # Entry point
+        "bootstrapper.py",               # Entry point (Tiny Installer)
        "--name=WhisperVoice",           # EXE name
-        "--onefile",                     # Single EXE (slower startup but portable)
+        "--onefile",                     # Single EXE
        "--noconsole",                   # No terminal window
        "--clean",                       # Clean cache
        *add_data_args,                  # Bundled assets
-        # Heavy libraries that need special collection
+        # Bundle the app source to be extracted by bootstrapper
-        "--collect-all", "faster_whisper",
+        # The bootstrapper expects 'app_source' folder in bundled resources
-        "--collect-all", "ctranslate2",
+        "--add-data", f"src{os.pathsep}app_source/src",
-        "--collect-all", "PySide6",
+        "--add-data", f"main.py{os.pathsep}app_source",
-        "--collect-all", "torch",
+        "--add-data", f"requirements.txt{os.pathsep}app_source",
        "--collect-all", "numpy",
-        # Hidden imports (modules imported dynamically)
+        # Add assets
-        "--hidden-import", "keyboard",
+        "--add-data", f"src/ui/qml{os.pathsep}app_source/src/ui/qml",
-        "--hidden-import", "pyperclip",
+        "--add-data", f"assets{os.pathsep}app_source/assets",
        "--hidden-import", "psutil",
        "--hidden-import", "pynvml",
        "--hidden-import", "sounddevice",
        "--hidden-import", "scipy",
        "--hidden-import", "scipy.signal",
        "--hidden-import", "huggingface_hub",
        "--hidden-import", "tokenizers",
-        # Qt plugins
+        # No heavy collections! 
-        "--hidden-import", "PySide6.QtQuickControls2",
+        # The bootstrapper uses internal pip to install everything.
        "--hidden-import", "PySide6.QtQuick.Controls",
-        # Icon (convert to .ico for Windows)
+        # Exclude heavy modules to ensure this exe stays tiny
-        # "--icon=icon.ico",  # Uncomment if you have a .ico file
+        "--exclude-module", "faster_whisper",
        "--exclude-module", "torch",
        "--exclude-module", "PySide6",
        # Icon
        # "--icon=icon.ico",
    ])
    print("\n" + "="*60)
    print("✅ BUILD COMPLETE!")
    print("="*60)
@@ -5,6 +5,7 @@
 faster-whisper>=1.0.0
 torch>=2.0.0
 # UI Framework
 PySide6>=6.6.0
@@ -46,7 +46,13 @@ DEFAULT_SETTINGS = {
    "best_of": 5,
    "vad_filter": True,
    "no_repeat_ngram_size": 0,
-    "condition_on_previous_text": True
+    "condition_on_previous_text": True,
    "initial_prompt": "Mm-hmm. Okay, let's go. I speak in full sentences.", # Default: Forces punctuation
    # Low VRAM Mode
    "unload_models_after_use": False # If True, models are unloaded immediately to free VRAM
 }
 class ConfigManager:
@@ -15,6 +15,11 @@ import numpy as np
 from src.core.config import ConfigManager
 from src.core.paths import get_models_path
 try:
    import torch
 except ImportError:
    torch = None
 # Import directly - valid since we are now running in the full environment
 from faster_whisper import WhisperModel
@@ -153,7 +158,14 @@ class WhisperTranscriber:
            for segment in segments:
                text_result += segment.text + " "
-            return text_result.strip()
+            text_result = text_result.strip()
            # Low VRAM Mode: Unload Whisper Model immediately
            if self.config.get("unload_models_after_use"):
                self.unload_model()
            logging.info(f"Final Transcription Output: '{text_result}'")
            return text_result
        except Exception as e:
            logging.error(f"Transcription failed: {e}")
@@ -172,3 +184,21 @@ class WhisperTranscriber:
            return True
        return False
    def unload_model(self):
        """
        Unloads model to free memory.
        """
        if self.model:
            del self.model
        self.model = None
        self.current_model_size = None
        # Force garbage collection
        import gc
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        logging.info("Whisper Model unloaded (Low VRAM Mode).")
@@ -376,6 +376,9 @@ class UIBridge(QObject):
        try:
            from src.core.paths import get_models_path
            # Check new simple format used by DownloadWorker
            path_simple = get_models_path() / f"faster-whisper-{size}"
            if path_simple.exists() and any(path_simple.iterdir()):
@@ -587,6 +587,53 @@ Window {
                                Text { text: "Model configuration and performance"; color: SettingsStyle.textSecondary; font.family: mainFont; font.pixelSize: 14 }
                            }
                                    ModernSettingsSection {
                                title: "Style & Prompting"
                                Layout.margins: 32
                                Layout.topMargin: 0
                                content: ColumnLayout {
                                    width: parent.width
                                    spacing: 0
                                    ModernSettingsItem {
                                        label: "Punctuation Style"
                                        description: "Hint for how to format text"
                                        control: ModernComboBox {
                                            id: styleCombo
                                            width: 180
                                            model: ["Standard (Proper)", "Casual (Lowercase)", "Custom"]
                                            // Logic to determine initial index based on config string
                                            Component.onCompleted: {
                                                let current = ui.getSetting("initial_prompt")
                                                if (current === "Mm-hmm. Okay, let's go. I speak in full sentences.") currentIndex = 0
                                                else if (current === "um, okay... i guess so.") currentIndex = 1
                                                else currentIndex = 2
                                            }
                                            onActivated: {
                                                if (index === 0) ui.setSetting("initial_prompt", "Mm-hmm. Okay, let's go. I speak in full sentences.")
                                                else if (index === 1) ui.setSetting("initial_prompt", "um, okay... i guess so.")
                                                // Custom: Don't change string immediately, let user type
                                            }
                                        }
                                    }
                                    ModernSettingsItem {
                                        label: "Custom Prompt"
                                        description: "Advanced: Define your own style hint"
                                        visible: styleCombo.currentIndex === 2
                                        control: ModernTextField {
                                            Layout.preferredWidth: 280
                                            placeholderText: "e.g. 'Hello, World.'"
                                            text: ui.getSetting("initial_prompt") || ""
                                            onEditingFinished: ui.setSetting("initial_prompt", text === "" ? null : text)
                                        }
                                    }
                                }
                            }
                            ModernSettingsSection {
                                title: "Model Config"
                                Layout.margins: 32
@@ -785,6 +832,16 @@ Window {
                                            onActivated: ui.setSetting("compute_type", currentText)
                                        }
                                    }
                                    ModernSettingsItem {
                                        label: "Low VRAM Mode"
                                        description: "Unload models immediately after use (Saves VRAM, Adds Delay)"
                                        showSeparator: false
                                        control: ModernSwitch {
                                            checked: ui.getSetting("unload_models_after_use")
                                            onToggled: ui.setSetting("unload_models_after_use", checked)
                                        }
                                    }
                                }
                            }
@@ -0,0 +1,38 @@
 import sys
 from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
 def test_m2m():
    model_name = "facebook/m2m100_418M"
    print(f"Loading {model_name}...")
    tokenizer = M2M100Tokenizer.from_pretrained(model_name)
    model = M2M100ForConditionalGeneration.from_pretrained(model_name)
    # Test cases: (Language Code, Input)
    test_cases = [
        ("en", "he go to school yesterday"),
        ("pl", "on iść do szkoła wczoraj"), # Intentional broken grammar in Polish
    ]
    print("\nStarting M2M Tests (Self-Translation):\n")
    for lang, input_text in test_cases:
        tokenizer.src_lang = lang
        encoded = tokenizer(input_text, return_tensors="pt")
        # Translate to SAME language
        generated_tokens = model.generate(
            **encoded, 
            forced_bos_token_id=tokenizer.get_lang_id(lang)
        )
        corrected = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
        print(f"[{lang}]")
        print(f"Input:    {input_text}")
        print(f"Output:   {corrected}")
        print("-" * 20)
 if __name__ == "__main__":
    test_m2m()
@@ -0,0 +1,40 @@
 import sys
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 def test_mt0():
    model_name = "bigscience/mt0-base"
    print(f"Loading {model_name}...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    # Test cases: (Language, Prompt, Input)
    # MT0 is instruction tuned, so we should prompt it in the target language or English.
    # Cross-lingual prompting (English prompt -> Target tasks) is usually supported.
    test_cases = [
        ("English", "Correct grammar:", "he go to school yesterday"),
        ("Polish", "Popraw gramatykę:", "to jest testowe zdanie bez kropki"),
        ("Finnish", "Korjaa kielioppi:", "tämä on testilause ilman pistettä"),
        ("Russian", "Исправь грамматику:", "это тестовое предложение без точки"),
        ("Japanese", "文法を直してください:", "これは点のないテスト文です"),
        ("Spanish", "Corrige la gramática:", "esta es una oración de prueba sin punto"),
    ]
    print("\nStarting MT0 Tests:\n")
    for lang, prompt_text, input_text in test_cases:
        full_input = f"{prompt_text} {input_text}"
        inputs = tokenizer(full_input, return_tensors="pt")
        outputs = model.generate(inputs.input_ids, max_length=128)
        corrected = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"[{lang}]")
        print(f"Input:    {full_input}")
        print(f"Output:   {corrected}")
        print("-" * 20)
 if __name__ == "__main__":
    test_mt0()
@@ -0,0 +1,34 @@
 import sys
 import os
 # Add src to path
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 from src.core.grammar_assistant import GrammarAssistant
 def test_punctuation():
    assistant = GrammarAssistant()
    assistant.load_model()
    samples = [
        # User's example (verbatim)
        "If the voice recognition doesn't recognize that I like stopped Or something would that would it also correct that",
        # Generic run-on
        "hello how are you doing today i am doing fine thanks for asking",
        # Missing commas/periods
        "well i think its valid however we should probably check the logs first"
    ]
    print("\nStarting Punctuation Tests:\n")
    for sample in samples:
        print(f"Original: {sample}")
        corrected = assistant.correct(sample)
        print(f"Corrected: {corrected}")
        print("-" * 20)
 if __name__ == "__main__":
    test_punctuation()