Release v1.0.2: Implemented Style Prompting & Removed Grammar Correction

- Removed M2M100 Grammar Correction model completely to reduce bloat/complexity.
- Implemented 'Style Prompting' in Settings -> AI Engine to handle punctuation natively via Whisper.
- Added Style Presets: Standard (Default), Casual, and Custom.
- Optimized Build: Bootstrapper no longer requires transformers/sentencepiece.
- Fixed 'torch' NameError in Low VRAM mode.
- Fixed Bootstrapper missing dependency detection.
- Updated UI to reflect removed features.
- Included compiled v1.0.2 Executable in dist/.
This commit is contained in:
Your Name
2026-01-25 13:42:06 +02:00
parent 03f46ee1e3
commit 84f10092e9
12 changed files with 246 additions and 37 deletions

View File

@@ -347,11 +347,17 @@ class Bootstrapper:
messagebox.showerror("WhisperVoice Error", f"Failed to launch app: {e}")
return False
def check_dependencies(self):
"""Quick check if critical dependencies are installed."""
return True # Deprecated logic placeholder
def setup_and_run(self):
"""Full setup/update and run flow."""
try:
# 1. Ensure basics
if not self.is_python_ready():
self.download_python()
self._fix_pth_file() # Ensure pth is fixed immediately after download
self.install_pip()
self.install_packages()
@@ -362,7 +368,10 @@ class Bootstrapper:
if self.run_app():
if self.ui: self.ui.root.quit()
except Exception as e:
messagebox.showerror("Setup Error", f"Installation failed: {e}")
if self.ui:
import tkinter.messagebox as mb
mb.showerror("Setup Error", f"Installation failed: {e}") # Improved error visibility
log(f"Fatal error: {e}")
import traceback
traceback.print_exc()

BIN
dist/WhisperVoice.exe vendored Normal file

Binary file not shown.

12
main.py
View File

@@ -101,20 +101,14 @@ class DownloadWorker(QThread):
import requests
from tqdm import tqdm
model_path = get_models_path()
# Determine what to download
dest_dir = model_path / f"faster-whisper-{self.model_name}"
dest_dir.mkdir(parents=True, exist_ok=True)
# Files to download for a standard faster-whisper model
# We map local filenames to HF repo filenames
repo_id = f"Systran/faster-whisper-{self.model_name}"
files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"]
# Check if Preprocessor config exists (sometimes it does, usually optional for whisper?)
# We'll stick to the core 4.
base_url = f"https://huggingface.co/{repo_id}/resolve/main"
logging.info(f"Downloading {self.model_name} from {base_url}...")
dest_dir.mkdir(parents=True, exist_ok=True)
logging.info(f"Downloading {self.model_name} to {dest_dir}...")
# 1. Calculate Total Size
total_size = 0

View File

@@ -39,39 +39,36 @@ def build_portable():
print("⏳ This may take 5-10 minutes...")
PyInstaller.__main__.run([
"main.py", # Entry point
"bootstrapper.py", # Entry point (Tiny Installer)
"--name=WhisperVoice", # EXE name
"--onefile", # Single EXE (slower startup but portable)
"--onefile", # Single EXE
"--noconsole", # No terminal window
"--clean", # Clean cache
*add_data_args, # Bundled assets
# Heavy libraries that need special collection
"--collect-all", "faster_whisper",
"--collect-all", "ctranslate2",
"--collect-all", "PySide6",
"--collect-all", "torch",
"--collect-all", "numpy",
# Bundle the app source to be extracted by bootstrapper
# The bootstrapper expects 'app_source' folder in bundled resources
"--add-data", f"src{os.pathsep}app_source/src",
"--add-data", f"main.py{os.pathsep}app_source",
"--add-data", f"requirements.txt{os.pathsep}app_source",
# Hidden imports (modules imported dynamically)
"--hidden-import", "keyboard",
"--hidden-import", "pyperclip",
"--hidden-import", "psutil",
"--hidden-import", "pynvml",
"--hidden-import", "sounddevice",
"--hidden-import", "scipy",
"--hidden-import", "scipy.signal",
"--hidden-import", "huggingface_hub",
"--hidden-import", "tokenizers",
# Add assets
"--add-data", f"src/ui/qml{os.pathsep}app_source/src/ui/qml",
"--add-data", f"assets{os.pathsep}app_source/assets",
# Qt plugins
"--hidden-import", "PySide6.QtQuickControls2",
"--hidden-import", "PySide6.QtQuick.Controls",
# No heavy collections!
# The bootstrapper uses internal pip to install everything.
# Icon (convert to .ico for Windows)
# "--icon=icon.ico", # Uncomment if you have a .ico file
# Exclude heavy modules to ensure this exe stays tiny
"--exclude-module", "faster_whisper",
"--exclude-module", "torch",
"--exclude-module", "PySide6",
# Icon
# "--icon=icon.ico",
])
print("\n" + "="*60)
print("✅ BUILD COMPLETE!")
print("="*60)

View File

@@ -5,6 +5,7 @@
faster-whisper>=1.0.0
torch>=2.0.0
# UI Framework
PySide6>=6.6.0

View File

@@ -46,7 +46,13 @@ DEFAULT_SETTINGS = {
"best_of": 5,
"vad_filter": True,
"no_repeat_ngram_size": 0,
"condition_on_previous_text": True
"condition_on_previous_text": True,
"initial_prompt": "Mm-hmm. Okay, let's go. I speak in full sentences.", # Default: Forces punctuation
# Low VRAM Mode
"unload_models_after_use": False # If True, models are unloaded immediately to free VRAM
}
class ConfigManager:

View File

@@ -15,6 +15,11 @@ import numpy as np
from src.core.config import ConfigManager
from src.core.paths import get_models_path
try:
import torch
except ImportError:
torch = None
# Import directly - valid since we are now running in the full environment
from faster_whisper import WhisperModel
@@ -153,7 +158,14 @@ class WhisperTranscriber:
for segment in segments:
text_result += segment.text + " "
return text_result.strip()
text_result = text_result.strip()
# Low VRAM Mode: Unload Whisper Model immediately
if self.config.get("unload_models_after_use"):
self.unload_model()
logging.info(f"Final Transcription Output: '{text_result}'")
return text_result
except Exception as e:
logging.error(f"Transcription failed: {e}")
@@ -172,3 +184,21 @@ class WhisperTranscriber:
return True
return False
def unload_model(self):
"""
Unloads model to free memory.
"""
if self.model:
del self.model
self.model = None
self.current_model_size = None
# Force garbage collection
import gc
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
logging.info("Whisper Model unloaded (Low VRAM Mode).")

View File

@@ -376,6 +376,9 @@ class UIBridge(QObject):
try:
from src.core.paths import get_models_path
# Check new simple format used by DownloadWorker
path_simple = get_models_path() / f"faster-whisper-{size}"
if path_simple.exists() and any(path_simple.iterdir()):

View File

@@ -587,6 +587,53 @@ Window {
Text { text: "Model configuration and performance"; color: SettingsStyle.textSecondary; font.family: mainFont; font.pixelSize: 14 }
}
ModernSettingsSection {
title: "Style & Prompting"
Layout.margins: 32
Layout.topMargin: 0
content: ColumnLayout {
width: parent.width
spacing: 0
ModernSettingsItem {
label: "Punctuation Style"
description: "Hint for how to format text"
control: ModernComboBox {
id: styleCombo
width: 180
model: ["Standard (Proper)", "Casual (Lowercase)", "Custom"]
// Logic to determine initial index based on config string
Component.onCompleted: {
let current = ui.getSetting("initial_prompt")
if (current === "Mm-hmm. Okay, let's go. I speak in full sentences.") currentIndex = 0
else if (current === "um, okay... i guess so.") currentIndex = 1
else currentIndex = 2
}
onActivated: {
if (index === 0) ui.setSetting("initial_prompt", "Mm-hmm. Okay, let's go. I speak in full sentences.")
else if (index === 1) ui.setSetting("initial_prompt", "um, okay... i guess so.")
// Custom: Don't change string immediately, let user type
}
}
}
ModernSettingsItem {
label: "Custom Prompt"
description: "Advanced: Define your own style hint"
visible: styleCombo.currentIndex === 2
control: ModernTextField {
Layout.preferredWidth: 280
placeholderText: "e.g. 'Hello, World.'"
text: ui.getSetting("initial_prompt") || ""
onEditingFinished: ui.setSetting("initial_prompt", text === "" ? null : text)
}
}
}
}
ModernSettingsSection {
title: "Model Config"
Layout.margins: 32
@@ -785,6 +832,16 @@ Window {
onActivated: ui.setSetting("compute_type", currentText)
}
}
ModernSettingsItem {
label: "Low VRAM Mode"
description: "Unload models immediately after use (Saves VRAM, Adds Delay)"
showSeparator: false
control: ModernSwitch {
checked: ui.getSetting("unload_models_after_use")
onToggled: ui.setSetting("unload_models_after_use", checked)
}
}
}
}

38
test_m2m.py Normal file
View File

@@ -0,0 +1,38 @@
import sys
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
def test_m2m():
model_name = "facebook/m2m100_418M"
print(f"Loading {model_name}...")
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
# Test cases: (Language Code, Input)
test_cases = [
("en", "he go to school yesterday"),
("pl", "on iść do szkoła wczoraj"), # Intentional broken grammar in Polish
]
print("\nStarting M2M Tests (Self-Translation):\n")
for lang, input_text in test_cases:
tokenizer.src_lang = lang
encoded = tokenizer(input_text, return_tensors="pt")
# Translate to SAME language
generated_tokens = model.generate(
**encoded,
forced_bos_token_id=tokenizer.get_lang_id(lang)
)
corrected = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
print(f"[{lang}]")
print(f"Input: {input_text}")
print(f"Output: {corrected}")
print("-" * 20)
if __name__ == "__main__":
test_m2m()

40
test_mt0.py Normal file
View File

@@ -0,0 +1,40 @@
import sys
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
def test_mt0():
model_name = "bigscience/mt0-base"
print(f"Loading {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Test cases: (Language, Prompt, Input)
# MT0 is instruction tuned, so we should prompt it in the target language or English.
# Cross-lingual prompting (English prompt -> Target tasks) is usually supported.
test_cases = [
("English", "Correct grammar:", "he go to school yesterday"),
("Polish", "Popraw gramatykę:", "to jest testowe zdanie bez kropki"),
("Finnish", "Korjaa kielioppi:", "tämä on testilause ilman pistettä"),
("Russian", "Исправь грамматику:", "это тестовое предложение без точки"),
("Japanese", "文法を直してください:", "これは点のないテスト文です"),
("Spanish", "Corrige la gramática:", "esta es una oración de prueba sin punto"),
]
print("\nStarting MT0 Tests:\n")
for lang, prompt_text, input_text in test_cases:
full_input = f"{prompt_text} {input_text}"
inputs = tokenizer(full_input, return_tensors="pt")
outputs = model.generate(inputs.input_ids, max_length=128)
corrected = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"[{lang}]")
print(f"Input: {full_input}")
print(f"Output: {corrected}")
print("-" * 20)
if __name__ == "__main__":
test_mt0()

34
test_punctuation.py Normal file
View File

@@ -0,0 +1,34 @@
import sys
import os
# Add src to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from src.core.grammar_assistant import GrammarAssistant
def test_punctuation():
assistant = GrammarAssistant()
assistant.load_model()
samples = [
# User's example (verbatim)
"If the voice recognition doesn't recognize that I like stopped Or something would that would it also correct that",
# Generic run-on
"hello how are you doing today i am doing fine thanks for asking",
# Missing commas/periods
"well i think its valid however we should probably check the logs first"
]
print("\nStarting Punctuation Tests:\n")
for sample in samples:
print(f"Original: {sample}")
corrected = assistant.correct(sample)
print(f"Corrected: {corrected}")
print("-" * 20)
if __name__ == "__main__":
test_punctuation()