v1.0.1 Feature Update and Polish

Full Changelog:

[New Features]
- Added Native Translation Mode:
  - Whisper model now fully supports Translating any language to English
  - Added 'task' and 'language' parameters to Transcriber core
- Dual Hotkey Support:
  - Added separate Global Hotkeys for Transcribe (default F8) and Translate (default F10)
  - Both hotkeys are fully customizable in Settings
  - Engine dynamically switches modes based on which key is pressed

[UI/UX Improvements]
- Settings Window:
  - Widened Hotkey Input fields (240px) to accommodate long combinations
  - Added Pretty-Printing for hotkey sequences (e.g. 'ctrl+f9' display as 'Ctrl + F9')
  - Replaced Country Code dropdown with Full Language Names (99+ languages)
  - Made Language Dropdown scrollable (max height 300px) to prevent screen overflow
  - Removed redundant 'Task' selector (replaced by dedicated hotkeys)
- System Tray:
  - Tooltip now displays both Transcribe and Translate hotkeys
  - Tooltip hotkeys are formatted readably

[Core & Performance]
- Bootstrapper:
  - Implemented Smart Incremental Sync
  - Now checks filesize and content hash before copying files
  - Drastically reduces startup time for subsequent runs
  - Preserves user settings.json during updates
- Backend:
  - Fixed HotkeyManager to support dynamic configuration keys
  - Fixed Language Lock: selecting a language now correctly forces the model to use it
  - Refactored bridge/main connection for language list handling
This commit is contained in:
Your Name
2026-01-24 18:29:10 +02:00
parent f184eb0037
commit 4b84a27a67
11 changed files with 342 additions and 72 deletions

View File

@@ -259,48 +259,72 @@ class Bootstrapper:
process.wait()
def refresh_app_source(self):
"""Refresh app source files. Skips if already exists to save time."""
# Optimization: If app/main.py exists, skip update to improve startup speed.
# The user can delete the 'runtime' folder to force an update.
if (self.app_path / "main.py").exists():
log("App already exists. Skipping update.")
return True
if self.ui: self.ui.set_status("Updating app files...")
"""
Smartly updates app source files by only copying changed files.
Preserves user settings and reduces disk I/O.
"""
if self.ui: self.ui.set_status("Checking for updates...")
try:
# Preserve settings.json if it exists
settings_path = self.app_path / "settings.json"
temp_settings = None
if settings_path.exists():
try:
temp_settings = settings_path.read_bytes()
except:
log("Failed to backup settings.json, it involves risk of data loss.")
# 1. Ensure destination exists
if not self.app_path.exists():
self.app_path.mkdir(parents=True, exist_ok=True)
if self.app_path.exists():
shutil.rmtree(self.app_path, ignore_errors=True)
# 2. Walk source and sync
# source_path is the temporary bundled folder
# app_path is the persistent runtime folder
shutil.copytree(
self.source_path,
self.app_path,
ignore=shutil.ignore_patterns(
'__pycache__', '*.pyc', '.git', 'venv',
'build', 'dist', '*.egg-info', 'runtime'
)
)
changes_made = 0
# Restore settings.json
if temp_settings:
try:
settings_path.write_bytes(temp_settings)
log("Restored settings.json")
except:
log("Failed to restore settings.json")
for src_dir, dirs, files in os.walk(self.source_path):
# Determine relative path from source root
rel_path = Path(src_dir).relative_to(self.source_path)
dst_dir = self.app_path / rel_path
# Ensure directory exists
if not dst_dir.exists():
dst_dir.mkdir(parents=True, exist_ok=True)
for file in files:
# Skip ignored files
if file in ['__pycache__', '.git', 'settings.json'] or file.endswith('.pyc'):
continue
src_file = Path(src_dir) / file
dst_file = dst_dir / file
# Check if update needed
should_copy = False
if not dst_file.exists():
should_copy = True
else:
# Compare size first (fast)
if src_file.stat().st_size != dst_file.stat().st_size:
should_copy = True
else:
# Compare content (slower but accurate)
# Only read if size matches to verify diff
if src_file.read_bytes() != dst_file.read_bytes():
should_copy = True
if should_copy:
shutil.copy2(src_file, dst_file)
changes_made += 1
if self.ui: self.ui.set_detail(f"Updated: {file}")
# 3. Cleanup logic (Optional: remove files in dest that are not in source)
# For now, we only add/update to prevent deleting generated user files (logs, etc)
if changes_made > 0:
log(f"Update complete. {changes_made} files changed.")
else:
log("App is up to date.")
return True
except Exception as e:
log(f"Error refreshing app source: {e}")
# Fallback to nuclear option if sync fails completely?
# No, 'smart_sync' failing might mean permissions, nuclear wouldn't help.
return False
def run_app(self):

74
main.py
View File

@@ -118,13 +118,14 @@ class DownloadWorker(QThread):
class TranscriptionWorker(QThread):
finished = Signal(str)
def __init__(self, transcriber, audio_data, is_file=False, parent=None):
def __init__(self, transcriber, audio_data, is_file=False, parent=None, task_override=None):
super().__init__(parent)
self.transcriber = transcriber
self.audio_data = audio_data
self.is_file = is_file
self.task_override = task_override
def run(self):
text = self.transcriber.transcribe(self.audio_data, is_file=self.is_file)
text = self.transcriber.transcribe(self.audio_data, is_file=self.is_file, task=self.task_override)
self.finished.emit(text)
class WhisperApp(QObject):
@@ -166,13 +167,18 @@ class WhisperApp(QObject):
self.tray.transcribe_file_requested.connect(self.transcribe_file)
# Init Tooltip
hotkey = self.config.get("hotkey")
self.tray.setToolTip(f"Whisper Voice - Press {hotkey} to Record")
from src.utils.formatters import format_hotkey
self.format_hotkey = format_hotkey # Store ref
hk1 = self.format_hotkey(self.config.get("hotkey"))
hk2 = self.format_hotkey(self.config.get("hotkey_translate"))
self.tray.setToolTip(f"Whisper Voice\nTranscribe: {hk1}\nTranslate: {hk2}")
# 3. Logic Components Placeholders
self.audio_engine = None
self.transcriber = None
self.hotkey_manager = None
self.hk_transcribe = None
self.hk_translate = None
self.overlay_root = None
# 4. Start Loader
@@ -266,9 +272,16 @@ class WhisperApp(QObject):
self.audio_engine.set_visualizer_callback(self.bridge.update_amplitude)
self.audio_engine.set_silence_callback(self.on_silence_detected)
self.transcriber = WhisperTranscriber()
self.hotkey_manager = HotkeyManager()
self.hotkey_manager.triggered.connect(self.toggle_recording)
self.hotkey_manager.start()
# Dual Hotkey Managers
self.hk_transcribe = HotkeyManager(config_key="hotkey")
self.hk_transcribe.triggered.connect(lambda: self.toggle_recording(task_override="transcribe"))
self.hk_transcribe.start()
self.hk_translate = HotkeyManager(config_key="hotkey_translate")
self.hk_translate.triggered.connect(lambda: self.toggle_recording(task_override="translate"))
self.hk_translate.start()
self.bridge.update_status("Ready")
def run(self):
@@ -286,7 +299,8 @@ class WhisperApp(QObject):
except: pass
self.bridge.stats_worker.stop()
if self.hotkey_manager: self.hotkey_manager.stop()
if self.hk_transcribe: self.hk_transcribe.stop()
if self.hk_translate: self.hk_translate.stop()
# Close all QML windows to ensure bindings stop before Python objects die
if self.overlay_root:
@@ -361,10 +375,14 @@ class WhisperApp(QObject):
print(f"Setting Changed: {key} = {value}")
# 1. Hotkey Reload
if key == "hotkey":
if self.hotkey_manager: self.hotkey_manager.reload_hotkey()
if key in ["hotkey", "hotkey_translate"]:
if self.hk_transcribe: self.hk_transcribe.reload_hotkey()
if self.hk_translate: self.hk_translate.reload_hotkey()
if self.tray:
self.tray.setToolTip(f"Whisper Voice - Press {value} to Record")
hk1 = self.format_hotkey(self.config.get("hotkey"))
hk2 = self.format_hotkey(self.config.get("hotkey_translate"))
self.tray.setToolTip(f"Whisper Voice\nTranscribe: {hk1}\nTranslate: {hk2}")
# 2. AI Model Reload (Heavy)
if key in ["model_size", "compute_device", "compute_type"]:
@@ -467,6 +485,8 @@ class WhisperApp(QObject):
file_path, _ = QFileDialog.getOpenFileName(None, "Select Audio", "", "Audio (*.mp3 *.wav *.flac *.m4a *.ogg)")
if file_path:
self.bridge.update_status("Thinking...")
# Files use the default configured task usually, or we could ask?
# Default to config setting for files.
self.worker = TranscriptionWorker(self.transcriber, file_path, is_file=True, parent=self)
self.worker.finished.connect(self.on_transcription_done)
self.worker.start()
@@ -474,10 +494,13 @@ class WhisperApp(QObject):
@Slot()
def on_silence_detected(self):
from PySide6.QtCore import QMetaObject, Qt
# Silence detection always triggers the task that was active?
# Since silence stops recording, it just calls toggle_recording with no arg, using the stored current_task?
# Let's ensure toggle_recording handles no arg calls by stopping the CURRENT task.
QMetaObject.invokeMethod(self, "toggle_recording", Qt.QueuedConnection)
@Slot()
def toggle_recording(self):
@Slot() # Modified to allow lambda override
def toggle_recording(self, task_override=None):
if not self.audio_engine: return
# Prevent starting a new recording while we are still transcribing the last one
@@ -485,23 +508,36 @@ class WhisperApp(QObject):
logging.warning("Ignored toggle request: Transcription in progress.")
return
# Determine which task we are entering
if task_override:
intended_task = task_override
else:
intended_task = self.config.get("task")
if self.audio_engine.recording:
# STOP RECORDING
self.bridge.update_status("Thinking...")
self.bridge.isRecording = False
self.bridge.isProcessing = True # Start Processing
audio_data = self.audio_engine.stop_recording()
self.worker = TranscriptionWorker(self.transcriber, audio_data, parent=self)
# Use the task that started this session, or the override if provided (though usually override is for starting)
final_task = getattr(self, "current_recording_task", self.config.get("task"))
self.worker = TranscriptionWorker(self.transcriber, audio_data, parent=self, task_override=final_task)
self.worker.finished.connect(self.on_transcription_done)
self.worker.start()
else:
self.bridge.update_status("Recording")
# START RECORDING
self.current_recording_task = intended_task
self.bridge.update_status(f"Recording ({intended_task})...")
self.bridge.isRecording = True
self.audio_engine.start_recording()
@Slot(bool)
def on_ui_toggle_request(self, state):
if state != self.audio_engine.recording:
self.toggle_recording()
self.toggle_recording() # Default behavior for UI clicks
@Slot(str)
def on_transcription_done(self, text: str):
@@ -514,8 +550,8 @@ class WhisperApp(QObject):
@Slot(bool)
def on_hotkeys_enabled_toggle(self, state):
if self.hotkey_manager:
self.hotkey_manager.set_enabled(state)
if self.hk_transcribe: self.hk_transcribe.set_enabled(state)
if self.hk_translate: self.hk_translate.set_enabled(state)
@Slot(str)
def on_download_requested(self, size):

View File

@@ -16,6 +16,7 @@ from src.core.paths import get_base_path
# Default Configuration
DEFAULT_SETTINGS = {
"hotkey": "f8",
"hotkey_translate": "f10",
"model_size": "small",
"input_device": None, # Device ID (int) or Name (str), None = Default
"save_recordings": False, # Save .wav files for debugging
@@ -38,6 +39,7 @@ DEFAULT_SETTINGS = {
# AI - Advanced
"language": "auto", # "auto" or ISO code
"task": "transcribe", # "transcribe" or "translate" (to English)
"compute_device": "auto", # "auto", "cuda", "cpu"
"compute_type": "int8", # "int8", "float16", "float32"
"beam_size": 5,

View File

@@ -30,15 +30,16 @@ class HotkeyManager(QObject):
triggered = Signal()
def __init__(self, hotkey: str = "f8"):
def __init__(self, config_key: str = "hotkey"):
"""
Initialize the HotkeyManager.
Args:
hotkey (str): The global hotkey string description. Default: "f8".
config_key (str): The configuration key to look up (e.g. "hotkey").
"""
super().__init__()
self.hotkey = hotkey
self.config_key = config_key
self.hotkey = "f8" # Placeholder
self.is_listening = False
self._enabled = True
@@ -58,9 +59,9 @@ class HotkeyManager(QObject):
from src.core.config import ConfigManager
config = ConfigManager()
self.hotkey = config.get("hotkey")
self.hotkey = config.get(self.config_key)
logging.info(f"Registering global hotkey: {self.hotkey}")
logging.info(f"Registering global hotkey ({self.config_key}): {self.hotkey}")
try:
# We don't suppress=True here because we want the app to see keys during recording
# (Wait, actually if we are recording we WANT keyboard to see it,

120
src/core/languages.py Normal file
View File

@@ -0,0 +1,120 @@
"""
Supported Languages Module
==========================
Full list of languages supported by OpenAI Whisper.
Maps ISO codes to display names.
"""
LANGUAGES = {
"auto": "Auto Detect",
"af": "Afrikaans",
"sq": "Albanian",
"am": "Amharic",
"ar": "Arabic",
"hy": "Armenian",
"as": "Assamese",
"az": "Azerbaijani",
"ba": "Bashkir",
"eu": "Basque",
"be": "Belarusian",
"bn": "Bengali",
"bs": "Bosnian",
"br": "Breton",
"bg": "Bulgarian",
"my": "Burmese",
"ca": "Catalan",
"zh": "Chinese",
"hr": "Croatian",
"cs": "Czech",
"da": "Danish",
"nl": "Dutch",
"en": "English",
"et": "Estonian",
"fo": "Faroese",
"fi": "Finnish",
"fr": "French",
"gl": "Galician",
"ka": "Georgian",
"de": "German",
"el": "Greek",
"gu": "Gujarati",
"ht": "Haitian",
"ha": "Hausa",
"haw": "Hawaiian",
"he": "Hebrew",
"hi": "Hindi",
"hu": "Hungarian",
"is": "Icelandic",
"id": "Indonesian",
"it": "Italian",
"ja": "Japanese",
"jw": "Javanese",
"kn": "Kannada",
"kk": "Kazakh",
"km": "Khmer",
"ko": "Korean",
"lo": "Lao",
"la": "Latin",
"lv": "Latvian",
"ln": "Lingala",
"lt": "Lithuanian",
"lb": "Luxembourgish",
"mk": "Macedonian",
"mg": "Malagasy",
"ms": "Malay",
"ml": "Malayalam",
"mt": "Maltese",
"mi": "Maori",
"mr": "Marathi",
"mn": "Mongolian",
"ne": "Nepali",
"no": "Norwegian",
"oc": "Occitan",
"pa": "Punjabi",
"ps": "Pashto",
"fa": "Persian",
"pl": "Polish",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sa": "Sanskrit",
"sr": "Serbian",
"sn": "Shona",
"sd": "Sindhi",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"so": "Somali",
"es": "Spanish",
"su": "Sundanese",
"sw": "Swahili",
"sv": "Swedish",
"tl": "Tagalog",
"tg": "Tajik",
"ta": "Tamil",
"tt": "Tatar",
"te": "Telugu",
"th": "Thai",
"bo": "Tibetan",
"tr": "Turkish",
"tk": "Turkmen",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"vi": "Vietnamese",
"cy": "Welsh",
"yi": "Yiddish",
"yo": "Yoruba",
}
def get_language_names():
return list(LANGUAGES.values())
def get_code_by_name(name):
for code, lang in LANGUAGES.items():
if lang == name:
return code
return "auto"
def get_name_by_code(code):
return LANGUAGES.get(code, "Auto Detect")

View File

@@ -74,11 +74,11 @@ class WhisperTranscriber:
logging.error(f"Failed to load model: {e}")
self.model = None
def transcribe(self, audio_data, is_file: bool = False) -> str:
def transcribe(self, audio_data, is_file: bool = False, task: Optional[str] = None) -> str:
"""
Transcribe audio data.
"""
logging.info(f"Starting transcription... (is_file={is_file})")
logging.info(f"Starting transcription... (is_file={is_file}, task={task})")
# Ensure model is loaded
if not self.model:
@@ -91,6 +91,10 @@ class WhisperTranscriber:
beam_size = int(self.config.get("beam_size"))
best_of = int(self.config.get("best_of"))
vad = False if is_file else self.config.get("vad_filter")
language = self.config.get("language")
# Use task override if provided, otherwise config
final_task = task if task else self.config.get("task")
# Transcribe
segments, info = self.model.transcribe(
@@ -98,6 +102,8 @@ class WhisperTranscriber:
beam_size=beam_size,
best_of=best_of,
vad_filter=vad,
task=final_task,
language=language if language != "auto" else None,
vad_parameters=dict(min_silence_duration_ms=500),
condition_on_previous_text=self.config.get("condition_on_previous_text"),
without_timestamps=True

View File

@@ -245,6 +245,26 @@ class UIBridge(QObject):
# --- Methods called from QML ---
@Slot(result=list)
def get_supported_languages(self):
from src.core.languages import get_language_names
return get_language_names()
@Slot(str)
def set_language_by_name(self, name):
from src.core.languages import get_code_by_name
from src.core.config import ConfigManager
code = get_code_by_name(name)
ConfigManager().set("language", code)
self.settingChanged.emit("language", code)
@Slot(result=str)
def get_current_language_name(self):
from src.core.languages import get_name_by_code
from src.core.config import ConfigManager
code = ConfigManager().get("language")
return get_name_by_code(code)
@Slot(str, result='QVariant')
def getSetting(self, key):
from src.core.config import ConfigManager

View File

@@ -100,7 +100,7 @@ ComboBox {
popup: Popup {
y: control.height - 1
width: control.width
implicitHeight: contentItem.implicitHeight
implicitHeight: Math.min(contentItem.implicitHeight, 300)
padding: 5
contentItem: ListView {

View File

@@ -25,7 +25,7 @@ Rectangle {
Text {
anchors.centerIn: parent
text: control.recording ? "Listening..." : (control.currentSequence || "None")
text: control.recording ? "Listening..." : (formatSequence(control.currentSequence) || "None")
color: control.recording ? SettingsStyle.accent : (control.currentSequence ? "#ffffff" : "#808080")
font.family: "JetBrains Mono"
font.pixelSize: 13
@@ -72,6 +72,23 @@ Rectangle {
if (!activeFocus) control.recording = false
}
function formatSequence(seq) {
if (!seq) return ""
var parts = seq.split("+")
for (var i = 0; i < parts.length; i++) {
var p = parts[i]
// Standardize modifiers
if (p === "ctrl") parts[i] = "Ctrl"
else if (p === "alt") parts[i] = "Alt"
else if (p === "shift") parts[i] = "Shift"
else if (p === "win") parts[i] = "Win"
else if (p === "esc") parts[i] = "Esc"
// Capitalize F-keys and others (e.g. f8 -> F8, space -> Space)
else parts[i] = p.charAt(0).toUpperCase() + p.slice(1)
}
return parts.join(" + ")
}
function getKeyName(key, text) {
// F-Keys
if (key >= Qt.Key_F1 && key <= Qt.Key_F35) return "f" + (key - Qt.Key_F1 + 1)

View File

@@ -314,15 +314,25 @@ Window {
spacing: 0
ModernSettingsItem {
label: "Global Hotkey"
description: "Press to record a new shortcut (e.g. Ctrl+Space)"
label: "Global Hotkey (Transcribe)"
description: "Press to record a new shortcut (e.g. F9)"
control: ModernKeySequenceRecorder {
Layout.preferredWidth: 200
implicitWidth: 240
currentSequence: ui.getSetting("hotkey")
onSequenceChanged: (seq) => ui.setSetting("hotkey", seq)
}
}
ModernSettingsItem {
label: "Global Hotkey (Translate)"
description: "Press to record a new shortcut (e.g. F10)"
control: ModernKeySequenceRecorder {
implicitWidth: 240
currentSequence: ui.getSetting("hotkey_translate")
onSequenceChanged: (seq) => ui.setSetting("hotkey_translate", seq)
}
}
ModernSettingsItem {
label: "Run on Startup"
description: "Automatically launch when you log in"
@@ -742,15 +752,17 @@ Window {
ModernSettingsItem {
label: "Language"
description: "Force language or Auto-detect"
description: "Spoken language to transcribe"
control: ModernComboBox {
width: 140
model: ["auto", "en", "fr", "de", "es", "it", "ja", "zh", "ru"]
currentIndex: model.indexOf(ui.getSetting("language"))
onActivated: ui.setSetting("language", currentText)
Layout.preferredWidth: 200
model: ui.get_supported_languages()
currentIndex: model.indexOf(ui.get_current_language_name())
onActivated: (index) => ui.set_language_by_name(currentText)
}
}
// Task selector removed as per user request (Hotkeys handle this now)
ModernSettingsItem {
label: "Compute Device"
description: "Hardware acceleration (CUDA requires NVidia GPU)"

32
src/utils/formatters.py Normal file
View File

@@ -0,0 +1,32 @@
"""
Formatter Utilities
===================
Helper functions for text formatting.
"""
def format_hotkey(sequence: str) -> str:
"""
Formats a hotkey sequence string (e.g. 'ctrl+alt+f9')
into a pretty readable string (e.g. 'Ctrl + Alt + F9').
"""
if not sequence:
return "None"
parts = sequence.split('+')
formatted_parts = []
for p in parts:
p = p.strip().lower()
if p == 'ctrl': formatted_parts.append('Ctrl')
elif p == 'alt': formatted_parts.append('Alt')
elif p == 'shift': formatted_parts.append('Shift')
elif p == 'win': formatted_parts.append('Win')
elif p == 'esc': formatted_parts.append('Esc')
else:
# Capitalize first letter
if len(p) > 0:
formatted_parts.append(p[0].upper() + p[1:])
else:
formatted_parts.append(p)
return " + ".join(formatted_parts)