Release v1.0.2: Implemented Style Prompting & Removed Grammar Correction
- Removed M2M100 Grammar Correction model completely to reduce bloat/complexity. - Implemented 'Style Prompting' in Settings -> AI Engine to handle punctuation natively via Whisper. - Added Style Presets: Standard (Default), Casual, and Custom. - Optimized Build: Bootstrapper no longer requires transformers/sentencepiece. - Fixed 'torch' NameError in Low VRAM mode. - Fixed Bootstrapper missing dependency detection. - Updated UI to reflect removed features. - Included compiled v1.0.2 Executable in dist/.
This commit is contained in:
@@ -347,11 +347,17 @@ class Bootstrapper:
|
|||||||
messagebox.showerror("WhisperVoice Error", f"Failed to launch app: {e}")
|
messagebox.showerror("WhisperVoice Error", f"Failed to launch app: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def check_dependencies(self):
|
||||||
|
"""Quick check if critical dependencies are installed."""
|
||||||
|
return True # Deprecated logic placeholder
|
||||||
|
|
||||||
def setup_and_run(self):
|
def setup_and_run(self):
|
||||||
"""Full setup/update and run flow."""
|
"""Full setup/update and run flow."""
|
||||||
try:
|
try:
|
||||||
|
# 1. Ensure basics
|
||||||
if not self.is_python_ready():
|
if not self.is_python_ready():
|
||||||
self.download_python()
|
self.download_python()
|
||||||
|
self._fix_pth_file() # Ensure pth is fixed immediately after download
|
||||||
self.install_pip()
|
self.install_pip()
|
||||||
self.install_packages()
|
self.install_packages()
|
||||||
|
|
||||||
@@ -362,7 +368,10 @@ class Bootstrapper:
|
|||||||
if self.run_app():
|
if self.run_app():
|
||||||
if self.ui: self.ui.root.quit()
|
if self.ui: self.ui.root.quit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
messagebox.showerror("Setup Error", f"Installation failed: {e}")
|
if self.ui:
|
||||||
|
import tkinter.messagebox as mb
|
||||||
|
mb.showerror("Setup Error", f"Installation failed: {e}") # Improved error visibility
|
||||||
|
log(f"Fatal error: {e}")
|
||||||
import traceback
|
import traceback
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
|
|
||||||
|
|||||||
BIN
dist/WhisperVoice.exe
vendored
Normal file
BIN
dist/WhisperVoice.exe
vendored
Normal file
Binary file not shown.
14
main.py
14
main.py
@@ -101,20 +101,14 @@ class DownloadWorker(QThread):
|
|||||||
import requests
|
import requests
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
model_path = get_models_path()
|
model_path = get_models_path()
|
||||||
|
# Determine what to download
|
||||||
dest_dir = model_path / f"faster-whisper-{self.model_name}"
|
dest_dir = model_path / f"faster-whisper-{self.model_name}"
|
||||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
# Files to download for a standard faster-whisper model
|
|
||||||
# We map local filenames to HF repo filenames
|
|
||||||
repo_id = f"Systran/faster-whisper-{self.model_name}"
|
repo_id = f"Systran/faster-whisper-{self.model_name}"
|
||||||
files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"]
|
files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.json"]
|
||||||
|
|
||||||
# Check if Preprocessor config exists (sometimes it does, usually optional for whisper?)
|
|
||||||
# We'll stick to the core 4.
|
|
||||||
|
|
||||||
base_url = f"https://huggingface.co/{repo_id}/resolve/main"
|
base_url = f"https://huggingface.co/{repo_id}/resolve/main"
|
||||||
|
|
||||||
logging.info(f"Downloading {self.model_name} from {base_url}...")
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
logging.info(f"Downloading {self.model_name} to {dest_dir}...")
|
||||||
|
|
||||||
# 1. Calculate Total Size
|
# 1. Calculate Total Size
|
||||||
total_size = 0
|
total_size = 0
|
||||||
|
|||||||
@@ -39,39 +39,36 @@ def build_portable():
|
|||||||
print("⏳ This may take 5-10 minutes...")
|
print("⏳ This may take 5-10 minutes...")
|
||||||
|
|
||||||
PyInstaller.__main__.run([
|
PyInstaller.__main__.run([
|
||||||
"main.py", # Entry point
|
"bootstrapper.py", # Entry point (Tiny Installer)
|
||||||
"--name=WhisperVoice", # EXE name
|
"--name=WhisperVoice", # EXE name
|
||||||
"--onefile", # Single EXE (slower startup but portable)
|
"--onefile", # Single EXE
|
||||||
"--noconsole", # No terminal window
|
"--noconsole", # No terminal window
|
||||||
"--clean", # Clean cache
|
"--clean", # Clean cache
|
||||||
*add_data_args, # Bundled assets
|
|
||||||
|
|
||||||
# Heavy libraries that need special collection
|
# Bundle the app source to be extracted by bootstrapper
|
||||||
"--collect-all", "faster_whisper",
|
# The bootstrapper expects 'app_source' folder in bundled resources
|
||||||
"--collect-all", "ctranslate2",
|
"--add-data", f"src{os.pathsep}app_source/src",
|
||||||
"--collect-all", "PySide6",
|
"--add-data", f"main.py{os.pathsep}app_source",
|
||||||
"--collect-all", "torch",
|
"--add-data", f"requirements.txt{os.pathsep}app_source",
|
||||||
"--collect-all", "numpy",
|
|
||||||
|
|
||||||
# Hidden imports (modules imported dynamically)
|
# Add assets
|
||||||
"--hidden-import", "keyboard",
|
"--add-data", f"src/ui/qml{os.pathsep}app_source/src/ui/qml",
|
||||||
"--hidden-import", "pyperclip",
|
"--add-data", f"assets{os.pathsep}app_source/assets",
|
||||||
"--hidden-import", "psutil",
|
|
||||||
"--hidden-import", "pynvml",
|
|
||||||
"--hidden-import", "sounddevice",
|
|
||||||
"--hidden-import", "scipy",
|
|
||||||
"--hidden-import", "scipy.signal",
|
|
||||||
"--hidden-import", "huggingface_hub",
|
|
||||||
"--hidden-import", "tokenizers",
|
|
||||||
|
|
||||||
# Qt plugins
|
# No heavy collections!
|
||||||
"--hidden-import", "PySide6.QtQuickControls2",
|
# The bootstrapper uses internal pip to install everything.
|
||||||
"--hidden-import", "PySide6.QtQuick.Controls",
|
|
||||||
|
|
||||||
# Icon (convert to .ico for Windows)
|
# Exclude heavy modules to ensure this exe stays tiny
|
||||||
# "--icon=icon.ico", # Uncomment if you have a .ico file
|
"--exclude-module", "faster_whisper",
|
||||||
|
"--exclude-module", "torch",
|
||||||
|
"--exclude-module", "PySide6",
|
||||||
|
|
||||||
|
|
||||||
|
# Icon
|
||||||
|
# "--icon=icon.ico",
|
||||||
])
|
])
|
||||||
|
|
||||||
|
|
||||||
print("\n" + "="*60)
|
print("\n" + "="*60)
|
||||||
print("✅ BUILD COMPLETE!")
|
print("✅ BUILD COMPLETE!")
|
||||||
print("="*60)
|
print("="*60)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
faster-whisper>=1.0.0
|
faster-whisper>=1.0.0
|
||||||
torch>=2.0.0
|
torch>=2.0.0
|
||||||
|
|
||||||
|
|
||||||
# UI Framework
|
# UI Framework
|
||||||
PySide6>=6.6.0
|
PySide6>=6.6.0
|
||||||
|
|
||||||
|
|||||||
@@ -46,7 +46,13 @@ DEFAULT_SETTINGS = {
|
|||||||
"best_of": 5,
|
"best_of": 5,
|
||||||
"vad_filter": True,
|
"vad_filter": True,
|
||||||
"no_repeat_ngram_size": 0,
|
"no_repeat_ngram_size": 0,
|
||||||
"condition_on_previous_text": True
|
"condition_on_previous_text": True,
|
||||||
|
"initial_prompt": "Mm-hmm. Okay, let's go. I speak in full sentences.", # Default: Forces punctuation
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Low VRAM Mode
|
||||||
|
"unload_models_after_use": False # If True, models are unloaded immediately to free VRAM
|
||||||
}
|
}
|
||||||
|
|
||||||
class ConfigManager:
|
class ConfigManager:
|
||||||
|
|||||||
@@ -15,6 +15,11 @@ import numpy as np
|
|||||||
from src.core.config import ConfigManager
|
from src.core.config import ConfigManager
|
||||||
from src.core.paths import get_models_path
|
from src.core.paths import get_models_path
|
||||||
|
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
except ImportError:
|
||||||
|
torch = None
|
||||||
|
|
||||||
# Import directly - valid since we are now running in the full environment
|
# Import directly - valid since we are now running in the full environment
|
||||||
from faster_whisper import WhisperModel
|
from faster_whisper import WhisperModel
|
||||||
|
|
||||||
@@ -153,7 +158,14 @@ class WhisperTranscriber:
|
|||||||
for segment in segments:
|
for segment in segments:
|
||||||
text_result += segment.text + " "
|
text_result += segment.text + " "
|
||||||
|
|
||||||
return text_result.strip()
|
text_result = text_result.strip()
|
||||||
|
|
||||||
|
# Low VRAM Mode: Unload Whisper Model immediately
|
||||||
|
if self.config.get("unload_models_after_use"):
|
||||||
|
self.unload_model()
|
||||||
|
|
||||||
|
logging.info(f"Final Transcription Output: '{text_result}'")
|
||||||
|
return text_result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Transcription failed: {e}")
|
logging.error(f"Transcription failed: {e}")
|
||||||
@@ -172,3 +184,21 @@ class WhisperTranscriber:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def unload_model(self):
|
||||||
|
"""
|
||||||
|
Unloads model to free memory.
|
||||||
|
"""
|
||||||
|
if self.model:
|
||||||
|
del self.model
|
||||||
|
|
||||||
|
self.model = None
|
||||||
|
self.current_model_size = None
|
||||||
|
|
||||||
|
# Force garbage collection
|
||||||
|
import gc
|
||||||
|
gc.collect()
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
logging.info("Whisper Model unloaded (Low VRAM Mode).")
|
||||||
|
|||||||
@@ -376,6 +376,9 @@ class UIBridge(QObject):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from src.core.paths import get_models_path
|
from src.core.paths import get_models_path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Check new simple format used by DownloadWorker
|
# Check new simple format used by DownloadWorker
|
||||||
path_simple = get_models_path() / f"faster-whisper-{size}"
|
path_simple = get_models_path() / f"faster-whisper-{size}"
|
||||||
if path_simple.exists() and any(path_simple.iterdir()):
|
if path_simple.exists() and any(path_simple.iterdir()):
|
||||||
|
|||||||
@@ -587,6 +587,53 @@ Window {
|
|||||||
Text { text: "Model configuration and performance"; color: SettingsStyle.textSecondary; font.family: mainFont; font.pixelSize: 14 }
|
Text { text: "Model configuration and performance"; color: SettingsStyle.textSecondary; font.family: mainFont; font.pixelSize: 14 }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ModernSettingsSection {
|
||||||
|
title: "Style & Prompting"
|
||||||
|
Layout.margins: 32
|
||||||
|
Layout.topMargin: 0
|
||||||
|
|
||||||
|
content: ColumnLayout {
|
||||||
|
width: parent.width
|
||||||
|
spacing: 0
|
||||||
|
|
||||||
|
ModernSettingsItem {
|
||||||
|
label: "Punctuation Style"
|
||||||
|
description: "Hint for how to format text"
|
||||||
|
control: ModernComboBox {
|
||||||
|
id: styleCombo
|
||||||
|
width: 180
|
||||||
|
model: ["Standard (Proper)", "Casual (Lowercase)", "Custom"]
|
||||||
|
|
||||||
|
// Logic to determine initial index based on config string
|
||||||
|
Component.onCompleted: {
|
||||||
|
let current = ui.getSetting("initial_prompt")
|
||||||
|
if (current === "Mm-hmm. Okay, let's go. I speak in full sentences.") currentIndex = 0
|
||||||
|
else if (current === "um, okay... i guess so.") currentIndex = 1
|
||||||
|
else currentIndex = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
onActivated: {
|
||||||
|
if (index === 0) ui.setSetting("initial_prompt", "Mm-hmm. Okay, let's go. I speak in full sentences.")
|
||||||
|
else if (index === 1) ui.setSetting("initial_prompt", "um, okay... i guess so.")
|
||||||
|
// Custom: Don't change string immediately, let user type
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ModernSettingsItem {
|
||||||
|
label: "Custom Prompt"
|
||||||
|
description: "Advanced: Define your own style hint"
|
||||||
|
visible: styleCombo.currentIndex === 2
|
||||||
|
control: ModernTextField {
|
||||||
|
Layout.preferredWidth: 280
|
||||||
|
placeholderText: "e.g. 'Hello, World.'"
|
||||||
|
text: ui.getSetting("initial_prompt") || ""
|
||||||
|
onEditingFinished: ui.setSetting("initial_prompt", text === "" ? null : text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ModernSettingsSection {
|
ModernSettingsSection {
|
||||||
title: "Model Config"
|
title: "Model Config"
|
||||||
Layout.margins: 32
|
Layout.margins: 32
|
||||||
@@ -785,6 +832,16 @@ Window {
|
|||||||
onActivated: ui.setSetting("compute_type", currentText)
|
onActivated: ui.setSetting("compute_type", currentText)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ModernSettingsItem {
|
||||||
|
label: "Low VRAM Mode"
|
||||||
|
description: "Unload models immediately after use (Saves VRAM, Adds Delay)"
|
||||||
|
showSeparator: false
|
||||||
|
control: ModernSwitch {
|
||||||
|
checked: ui.getSetting("unload_models_after_use")
|
||||||
|
onToggled: ui.setSetting("unload_models_after_use", checked)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
38
test_m2m.py
Normal file
38
test_m2m.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
|
||||||
|
import sys
|
||||||
|
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
||||||
|
|
||||||
|
def test_m2m():
|
||||||
|
model_name = "facebook/m2m100_418M"
|
||||||
|
print(f"Loading {model_name}...")
|
||||||
|
|
||||||
|
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
|
||||||
|
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
|
||||||
|
|
||||||
|
# Test cases: (Language Code, Input)
|
||||||
|
test_cases = [
|
||||||
|
("en", "he go to school yesterday"),
|
||||||
|
("pl", "on iść do szkoła wczoraj"), # Intentional broken grammar in Polish
|
||||||
|
]
|
||||||
|
|
||||||
|
print("\nStarting M2M Tests (Self-Translation):\n")
|
||||||
|
|
||||||
|
for lang, input_text in test_cases:
|
||||||
|
tokenizer.src_lang = lang
|
||||||
|
encoded = tokenizer(input_text, return_tensors="pt")
|
||||||
|
|
||||||
|
# Translate to SAME language
|
||||||
|
generated_tokens = model.generate(
|
||||||
|
**encoded,
|
||||||
|
forced_bos_token_id=tokenizer.get_lang_id(lang)
|
||||||
|
)
|
||||||
|
|
||||||
|
corrected = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
||||||
|
|
||||||
|
print(f"[{lang}]")
|
||||||
|
print(f"Input: {input_text}")
|
||||||
|
print(f"Output: {corrected}")
|
||||||
|
print("-" * 20)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_m2m()
|
||||||
40
test_mt0.py
Normal file
40
test_mt0.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
|
||||||
|
import sys
|
||||||
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
||||||
|
|
||||||
|
def test_mt0():
|
||||||
|
model_name = "bigscience/mt0-base"
|
||||||
|
print(f"Loading {model_name}...")
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
||||||
|
|
||||||
|
# Test cases: (Language, Prompt, Input)
|
||||||
|
# MT0 is instruction tuned, so we should prompt it in the target language or English.
|
||||||
|
# Cross-lingual prompting (English prompt -> Target tasks) is usually supported.
|
||||||
|
|
||||||
|
test_cases = [
|
||||||
|
("English", "Correct grammar:", "he go to school yesterday"),
|
||||||
|
("Polish", "Popraw gramatykę:", "to jest testowe zdanie bez kropki"),
|
||||||
|
("Finnish", "Korjaa kielioppi:", "tämä on testilause ilman pistettä"),
|
||||||
|
("Russian", "Исправь грамматику:", "это тестовое предложение без точки"),
|
||||||
|
("Japanese", "文法を直してください:", "これは点のないテスト文です"),
|
||||||
|
("Spanish", "Corrige la gramática:", "esta es una oración de prueba sin punto"),
|
||||||
|
]
|
||||||
|
|
||||||
|
print("\nStarting MT0 Tests:\n")
|
||||||
|
|
||||||
|
for lang, prompt_text, input_text in test_cases:
|
||||||
|
full_input = f"{prompt_text} {input_text}"
|
||||||
|
inputs = tokenizer(full_input, return_tensors="pt")
|
||||||
|
|
||||||
|
outputs = model.generate(inputs.input_ids, max_length=128)
|
||||||
|
corrected = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||||
|
|
||||||
|
print(f"[{lang}]")
|
||||||
|
print(f"Input: {full_input}")
|
||||||
|
print(f"Output: {corrected}")
|
||||||
|
print("-" * 20)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_mt0()
|
||||||
34
test_punctuation.py
Normal file
34
test_punctuation.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Add src to path
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
from src.core.grammar_assistant import GrammarAssistant
|
||||||
|
|
||||||
|
def test_punctuation():
|
||||||
|
assistant = GrammarAssistant()
|
||||||
|
assistant.load_model()
|
||||||
|
|
||||||
|
samples = [
|
||||||
|
# User's example (verbatim)
|
||||||
|
"If the voice recognition doesn't recognize that I like stopped Or something would that would it also correct that",
|
||||||
|
|
||||||
|
# Generic run-on
|
||||||
|
"hello how are you doing today i am doing fine thanks for asking",
|
||||||
|
|
||||||
|
# Missing commas/periods
|
||||||
|
"well i think its valid however we should probably check the logs first"
|
||||||
|
]
|
||||||
|
|
||||||
|
print("\nStarting Punctuation Tests:\n")
|
||||||
|
|
||||||
|
for sample in samples:
|
||||||
|
print(f"Original: {sample}")
|
||||||
|
corrected = assistant.correct(sample)
|
||||||
|
print(f"Corrected: {corrected}")
|
||||||
|
print("-" * 20)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_punctuation()
|
||||||
Reference in New Issue
Block a user