v1.0.1 Feature Update and Polish

Full Changelog:

[New Features]
- Added Native Translation Mode:
  - Whisper model now fully supports Translating any language to English
  - Added 'task' and 'language' parameters to Transcriber core
- Dual Hotkey Support:
  - Added separate Global Hotkeys for Transcribe (default F8) and Translate (default F10)
  - Both hotkeys are fully customizable in Settings
  - Engine dynamically switches modes based on which key is pressed

[UI/UX Improvements]
- Settings Window:
  - Widened Hotkey Input fields (240px) to accommodate long combinations
  - Added Pretty-Printing for hotkey sequences (e.g. 'ctrl+f9' display as 'Ctrl + F9')
  - Replaced Country Code dropdown with Full Language Names (99+ languages)
  - Made Language Dropdown scrollable (max height 300px) to prevent screen overflow
  - Removed redundant 'Task' selector (replaced by dedicated hotkeys)
- System Tray:
  - Tooltip now displays both Transcribe and Translate hotkeys
  - Tooltip hotkeys are formatted readably

[Core & Performance]
- Bootstrapper:
  - Implemented Smart Incremental Sync
  - Now checks filesize and content hash before copying files
  - Drastically reduces startup time for subsequent runs
  - Preserves user settings.json during updates
- Backend:
  - Fixed HotkeyManager to support dynamic configuration keys
  - Fixed Language Lock: selecting a language now correctly forces the model to use it
  - Refactored bridge/main connection for language list handling
This commit is contained in:
Your Name
2026-01-24 18:29:10 +02:00
parent f184eb0037
commit 4b84a27a67
11 changed files with 342 additions and 72 deletions

View File

@@ -16,6 +16,7 @@ from src.core.paths import get_base_path
# Default Configuration
DEFAULT_SETTINGS = {
"hotkey": "f8",
"hotkey_translate": "f10",
"model_size": "small",
"input_device": None, # Device ID (int) or Name (str), None = Default
"save_recordings": False, # Save .wav files for debugging
@@ -38,6 +39,7 @@ DEFAULT_SETTINGS = {
# AI - Advanced
"language": "auto", # "auto" or ISO code
"task": "transcribe", # "transcribe" or "translate" (to English)
"compute_device": "auto", # "auto", "cuda", "cpu"
"compute_type": "int8", # "int8", "float16", "float32"
"beam_size": 5,

View File

@@ -30,15 +30,16 @@ class HotkeyManager(QObject):
triggered = Signal()
def __init__(self, hotkey: str = "f8"):
def __init__(self, config_key: str = "hotkey"):
"""
Initialize the HotkeyManager.
Args:
hotkey (str): The global hotkey string description. Default: "f8".
config_key (str): The configuration key to look up (e.g. "hotkey").
"""
super().__init__()
self.hotkey = hotkey
self.config_key = config_key
self.hotkey = "f8" # Placeholder
self.is_listening = False
self._enabled = True
@@ -58,9 +59,9 @@ class HotkeyManager(QObject):
from src.core.config import ConfigManager
config = ConfigManager()
self.hotkey = config.get("hotkey")
self.hotkey = config.get(self.config_key)
logging.info(f"Registering global hotkey: {self.hotkey}")
logging.info(f"Registering global hotkey ({self.config_key}): {self.hotkey}")
try:
# We don't suppress=True here because we want the app to see keys during recording
# (Wait, actually if we are recording we WANT keyboard to see it,

120
src/core/languages.py Normal file
View File

@@ -0,0 +1,120 @@
"""
Supported Languages Module
==========================
Full list of languages supported by OpenAI Whisper.
Maps ISO codes to display names.
"""
LANGUAGES = {
"auto": "Auto Detect",
"af": "Afrikaans",
"sq": "Albanian",
"am": "Amharic",
"ar": "Arabic",
"hy": "Armenian",
"as": "Assamese",
"az": "Azerbaijani",
"ba": "Bashkir",
"eu": "Basque",
"be": "Belarusian",
"bn": "Bengali",
"bs": "Bosnian",
"br": "Breton",
"bg": "Bulgarian",
"my": "Burmese",
"ca": "Catalan",
"zh": "Chinese",
"hr": "Croatian",
"cs": "Czech",
"da": "Danish",
"nl": "Dutch",
"en": "English",
"et": "Estonian",
"fo": "Faroese",
"fi": "Finnish",
"fr": "French",
"gl": "Galician",
"ka": "Georgian",
"de": "German",
"el": "Greek",
"gu": "Gujarati",
"ht": "Haitian",
"ha": "Hausa",
"haw": "Hawaiian",
"he": "Hebrew",
"hi": "Hindi",
"hu": "Hungarian",
"is": "Icelandic",
"id": "Indonesian",
"it": "Italian",
"ja": "Japanese",
"jw": "Javanese",
"kn": "Kannada",
"kk": "Kazakh",
"km": "Khmer",
"ko": "Korean",
"lo": "Lao",
"la": "Latin",
"lv": "Latvian",
"ln": "Lingala",
"lt": "Lithuanian",
"lb": "Luxembourgish",
"mk": "Macedonian",
"mg": "Malagasy",
"ms": "Malay",
"ml": "Malayalam",
"mt": "Maltese",
"mi": "Maori",
"mr": "Marathi",
"mn": "Mongolian",
"ne": "Nepali",
"no": "Norwegian",
"oc": "Occitan",
"pa": "Punjabi",
"ps": "Pashto",
"fa": "Persian",
"pl": "Polish",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sa": "Sanskrit",
"sr": "Serbian",
"sn": "Shona",
"sd": "Sindhi",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"so": "Somali",
"es": "Spanish",
"su": "Sundanese",
"sw": "Swahili",
"sv": "Swedish",
"tl": "Tagalog",
"tg": "Tajik",
"ta": "Tamil",
"tt": "Tatar",
"te": "Telugu",
"th": "Thai",
"bo": "Tibetan",
"tr": "Turkish",
"tk": "Turkmen",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"vi": "Vietnamese",
"cy": "Welsh",
"yi": "Yiddish",
"yo": "Yoruba",
}
def get_language_names():
return list(LANGUAGES.values())
def get_code_by_name(name):
for code, lang in LANGUAGES.items():
if lang == name:
return code
return "auto"
def get_name_by_code(code):
return LANGUAGES.get(code, "Auto Detect")

View File

@@ -74,11 +74,11 @@ class WhisperTranscriber:
logging.error(f"Failed to load model: {e}")
self.model = None
def transcribe(self, audio_data, is_file: bool = False) -> str:
def transcribe(self, audio_data, is_file: bool = False, task: Optional[str] = None) -> str:
"""
Transcribe audio data.
"""
logging.info(f"Starting transcription... (is_file={is_file})")
logging.info(f"Starting transcription... (is_file={is_file}, task={task})")
# Ensure model is loaded
if not self.model:
@@ -91,6 +91,10 @@ class WhisperTranscriber:
beam_size = int(self.config.get("beam_size"))
best_of = int(self.config.get("best_of"))
vad = False if is_file else self.config.get("vad_filter")
language = self.config.get("language")
# Use task override if provided, otherwise config
final_task = task if task else self.config.get("task")
# Transcribe
segments, info = self.model.transcribe(
@@ -98,6 +102,8 @@ class WhisperTranscriber:
beam_size=beam_size,
best_of=best_of,
vad_filter=vad,
task=final_task,
language=language if language != "auto" else None,
vad_parameters=dict(min_silence_duration_ms=500),
condition_on_previous_text=self.config.get("condition_on_previous_text"),
without_timestamps=True

View File

@@ -245,6 +245,26 @@ class UIBridge(QObject):
# --- Methods called from QML ---
@Slot(result=list)
def get_supported_languages(self):
from src.core.languages import get_language_names
return get_language_names()
@Slot(str)
def set_language_by_name(self, name):
from src.core.languages import get_code_by_name
from src.core.config import ConfigManager
code = get_code_by_name(name)
ConfigManager().set("language", code)
self.settingChanged.emit("language", code)
@Slot(result=str)
def get_current_language_name(self):
from src.core.languages import get_name_by_code
from src.core.config import ConfigManager
code = ConfigManager().get("language")
return get_name_by_code(code)
@Slot(str, result='QVariant')
def getSetting(self, key):
from src.core.config import ConfigManager

View File

@@ -100,7 +100,7 @@ ComboBox {
popup: Popup {
y: control.height - 1
width: control.width
implicitHeight: contentItem.implicitHeight
implicitHeight: Math.min(contentItem.implicitHeight, 300)
padding: 5
contentItem: ListView {

View File

@@ -25,7 +25,7 @@ Rectangle {
Text {
anchors.centerIn: parent
text: control.recording ? "Listening..." : (control.currentSequence || "None")
text: control.recording ? "Listening..." : (formatSequence(control.currentSequence) || "None")
color: control.recording ? SettingsStyle.accent : (control.currentSequence ? "#ffffff" : "#808080")
font.family: "JetBrains Mono"
font.pixelSize: 13
@@ -72,6 +72,23 @@ Rectangle {
if (!activeFocus) control.recording = false
}
function formatSequence(seq) {
if (!seq) return ""
var parts = seq.split("+")
for (var i = 0; i < parts.length; i++) {
var p = parts[i]
// Standardize modifiers
if (p === "ctrl") parts[i] = "Ctrl"
else if (p === "alt") parts[i] = "Alt"
else if (p === "shift") parts[i] = "Shift"
else if (p === "win") parts[i] = "Win"
else if (p === "esc") parts[i] = "Esc"
// Capitalize F-keys and others (e.g. f8 -> F8, space -> Space)
else parts[i] = p.charAt(0).toUpperCase() + p.slice(1)
}
return parts.join(" + ")
}
function getKeyName(key, text) {
// F-Keys
if (key >= Qt.Key_F1 && key <= Qt.Key_F35) return "f" + (key - Qt.Key_F1 + 1)

View File

@@ -314,14 +314,24 @@ Window {
spacing: 0
ModernSettingsItem {
label: "Global Hotkey"
description: "Press to record a new shortcut (e.g. Ctrl+Space)"
label: "Global Hotkey (Transcribe)"
description: "Press to record a new shortcut (e.g. F9)"
control: ModernKeySequenceRecorder {
Layout.preferredWidth: 200
implicitWidth: 240
currentSequence: ui.getSetting("hotkey")
onSequenceChanged: (seq) => ui.setSetting("hotkey", seq)
}
}
ModernSettingsItem {
label: "Global Hotkey (Translate)"
description: "Press to record a new shortcut (e.g. F10)"
control: ModernKeySequenceRecorder {
implicitWidth: 240
currentSequence: ui.getSetting("hotkey_translate")
onSequenceChanged: (seq) => ui.setSetting("hotkey_translate", seq)
}
}
ModernSettingsItem {
label: "Run on Startup"
@@ -742,15 +752,17 @@ Window {
ModernSettingsItem {
label: "Language"
description: "Force language or Auto-detect"
description: "Spoken language to transcribe"
control: ModernComboBox {
width: 140
model: ["auto", "en", "fr", "de", "es", "it", "ja", "zh", "ru"]
currentIndex: model.indexOf(ui.getSetting("language"))
onActivated: ui.setSetting("language", currentText)
Layout.preferredWidth: 200
model: ui.get_supported_languages()
currentIndex: model.indexOf(ui.get_current_language_name())
onActivated: (index) => ui.set_language_by_name(currentText)
}
}
// Task selector removed as per user request (Hotkeys handle this now)
ModernSettingsItem {
label: "Compute Device"
description: "Hardware acceleration (CUDA requires NVidia GPU)"

32
src/utils/formatters.py Normal file
View File

@@ -0,0 +1,32 @@
"""
Formatter Utilities
===================
Helper functions for text formatting.
"""
def format_hotkey(sequence: str) -> str:
"""
Formats a hotkey sequence string (e.g. 'ctrl+alt+f9')
into a pretty readable string (e.g. 'Ctrl + Alt + F9').
"""
if not sequence:
return "None"
parts = sequence.split('+')
formatted_parts = []
for p in parts:
p = p.strip().lower()
if p == 'ctrl': formatted_parts.append('Ctrl')
elif p == 'alt': formatted_parts.append('Alt')
elif p == 'shift': formatted_parts.append('Shift')
elif p == 'win': formatted_parts.append('Win')
elif p == 'esc': formatted_parts.append('Esc')
else:
# Capitalize first letter
if len(p) > 0:
formatted_parts.append(p[0].upper() + p[1:])
else:
formatted_parts.append(p)
return " + ".join(formatted_parts)