Feat: Integrated Local LLM (Llama 3.2 1B) for Intelligent Correction -- New Core: Added LLMEngine utilizing llama-cpp-python for local private text post-processing. -- Forensic Protocol: Engineered strict system prompts to prevent LLM refusals, censorship, or assistant chatter. -- Three Modes: Grammar, Standard, Rewrite. -- Start/Stop Logic: Consolidated conflicting recording methods. -- Hotkeys: Added dedicated F9 (Correct) vs F8 (Transcribe). -- UI: Updated Settings. -- Build: Updated portable_build.py. -- Docs: Updated README.
This commit is contained in:
@@ -17,6 +17,7 @@ from src.core.paths import get_base_path
|
||||
DEFAULT_SETTINGS = {
|
||||
"hotkey": "f8",
|
||||
"hotkey_translate": "f10",
|
||||
"hotkey_correct": "f9", # New: Transcribe + Correct
|
||||
"model_size": "small",
|
||||
"input_device": None, # Device ID (int) or Name (str), None = Default
|
||||
"save_recordings": False, # Save .wav files for debugging
|
||||
@@ -49,6 +50,11 @@ DEFAULT_SETTINGS = {
|
||||
"condition_on_previous_text": True,
|
||||
"initial_prompt": "Mm-hmm. Okay, let's go. I speak in full sentences.", # Default: Forces punctuation
|
||||
|
||||
# LLM Correction
|
||||
"llm_enabled": False,
|
||||
"llm_mode": "Standard", # "Grammar", "Standard", "Rewrite"
|
||||
"llm_model_name": "llama-3.2-1b-instruct",
|
||||
|
||||
|
||||
|
||||
# Low VRAM Mode
|
||||
@@ -102,9 +108,9 @@ class ConfigManager:
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to save settings: {e}")
|
||||
|
||||
def get(self, key: str) -> Any:
|
||||
def get(self, key: str, default: Any = None) -> Any:
|
||||
"""Get a setting value."""
|
||||
return self.data.get(key, DEFAULT_SETTINGS.get(key))
|
||||
return self.data.get(key, DEFAULT_SETTINGS.get(key, default))
|
||||
|
||||
|
||||
|
||||
|
||||
185
src/core/llm_engine.py
Normal file
185
src/core/llm_engine.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""
|
||||
LLM Engine Module.
|
||||
==================
|
||||
|
||||
Handles interaction with the local Llama 3.2 1B model for transcription correction.
|
||||
Uses llama-cpp-python for efficient local inference.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from typing import Optional
|
||||
from src.core.paths import get_models_path
|
||||
from src.core.config import ConfigManager
|
||||
|
||||
try:
|
||||
from llama_cpp import Llama
|
||||
except ImportError:
|
||||
Llama = None
|
||||
|
||||
class LLMEngine:
|
||||
"""
|
||||
Manages the Llama model and performs text correction/rewriting.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.config = ConfigManager()
|
||||
self.model = None
|
||||
self.current_model_path = None
|
||||
|
||||
# --- Mode 1: Grammar Only (Strict) ---
|
||||
self.prompt_grammar = (
|
||||
"You are a text correction tool. "
|
||||
"Correct the grammar/spelling. Do not change punctuation or capitalization styles. "
|
||||
"Do not remove any words (including profanity). Output ONLY the result."
|
||||
"\n\nExample:\nInput: 'damn it works'\nOutput: 'damn it works'"
|
||||
)
|
||||
|
||||
# --- Mode 2: Standard (Grammar + Punctuation + Caps) ---
|
||||
self.prompt_standard = (
|
||||
"You are a text correction tool. "
|
||||
"Standardize the grammar, punctuation, and capitalization. "
|
||||
"Do not remove any words (including profanity). Output ONLY the result."
|
||||
"\n\nExample:\nInput: 'damn it works'\nOutput: 'Damn it works.'"
|
||||
)
|
||||
|
||||
# --- Mode 3: Rewrite (Tone-Aware Polish) ---
|
||||
self.prompt_rewrite = (
|
||||
"You are a text rewriting tool. Improve flow/clarity but keep the exact tone and vocabulary. "
|
||||
"Do not remove any words (including profanity). Output ONLY the result."
|
||||
"\n\nExample:\nInput: 'damn it works'\nOutput: 'Damn, it works.'"
|
||||
)
|
||||
|
||||
def load_model(self) -> bool:
|
||||
"""
|
||||
Loads the LLM model if it exists.
|
||||
Returns True if successful, False otherwise.
|
||||
"""
|
||||
if Llama is None:
|
||||
logging.error("llama-cpp-python not installed.")
|
||||
return False
|
||||
|
||||
model_name = self.config.get("llm_model_name", "llama-3.2-1b-instruct")
|
||||
model_dir = get_models_path() / "llm" / model_name
|
||||
model_file = model_dir / "llama-3.2-1b-instruct-q4_k_m.gguf"
|
||||
|
||||
if not model_file.exists():
|
||||
logging.warning(f"LLM Model not found at: {model_file}")
|
||||
return False
|
||||
|
||||
if self.model and self.current_model_path == str(model_file):
|
||||
return True
|
||||
|
||||
try:
|
||||
logging.info(f"Loading LLM from {model_file}...")
|
||||
n_gpu_layers = 0
|
||||
try:
|
||||
import torch
|
||||
if torch.cuda.is_available():
|
||||
n_gpu_layers = -1
|
||||
except:
|
||||
pass
|
||||
|
||||
self.model = Llama(
|
||||
model_path=str(model_file),
|
||||
n_gpu_layers=n_gpu_layers,
|
||||
n_ctx=2048,
|
||||
verbose=False
|
||||
)
|
||||
self.current_model_path = str(model_file)
|
||||
logging.info("LLM loaded successfully.")
|
||||
return True
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to load LLM: {e}")
|
||||
self.model = None
|
||||
return False
|
||||
|
||||
def correct_text(self, text: str, mode: str = "Standard") -> str:
|
||||
"""Corrects or rewrites the provided text."""
|
||||
if not text or not text.strip():
|
||||
return text
|
||||
|
||||
if not self.model:
|
||||
if not self.load_model():
|
||||
return text
|
||||
|
||||
logging.info(f"LLM Processing ({mode}): '{text}'")
|
||||
|
||||
system_prompt = self.prompt_standard
|
||||
if mode == "Grammar": system_prompt = self.prompt_grammar
|
||||
elif mode == "Rewrite": system_prompt = self.prompt_rewrite
|
||||
|
||||
# PREFIX INJECTION TECHNIQUE
|
||||
# We end the prompt with the start of the assistant's answer specifically phrased to force compliance.
|
||||
# "Here is the processed output:" forces it into a completion mode rather than a refusal mode.
|
||||
prefix_injection = "Here is the processed output:\n"
|
||||
|
||||
prompt = (
|
||||
f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>"
|
||||
f"<|start_header_id|>user<|end_header_id|>\n\nProcess this input:\n{text}<|eot_id|>"
|
||||
f"<|start_header_id|>assistant<|end_header_id|>\n\n{prefix_injection}"
|
||||
)
|
||||
|
||||
try:
|
||||
output = self.model(
|
||||
prompt,
|
||||
max_tokens=512,
|
||||
stop=["<|eot_id|>"],
|
||||
echo=False,
|
||||
temperature=0.1
|
||||
)
|
||||
|
||||
result = output['choices'][0]['text'].strip()
|
||||
|
||||
# 1. Fallback: If result is empty, it might have just outputted nothing because we prefilled?
|
||||
# Actually llama-cpp-python usually returns the *continuation*.
|
||||
# So if it outputted "My corrected text.", the full logical response is "Here is...: My corrected text."
|
||||
# We just want the result.
|
||||
|
||||
# Refusal Detection (Safety Net)
|
||||
refusal_triggers = [
|
||||
"I cannot", "I can't", "I am unable", "I apologize", "sorry",
|
||||
"As an AI", "explicit content", "harmful content", "safety guidelines"
|
||||
]
|
||||
lower_res = result.lower()
|
||||
if any(trig in lower_res for trig in refusal_triggers) and len(result) < 150:
|
||||
logging.warning(f"LLM Refusal Detected: '{result}'. Falling back to original.")
|
||||
return text # Return original text on refusal!
|
||||
|
||||
# --- Robust Post-Processing ---
|
||||
|
||||
# 1. Strip quotes
|
||||
if result.startswith('"') and result.endswith('"') and len(result) > 2 and '"' not in result[1:-1]:
|
||||
result = result[1:-1]
|
||||
if result.startswith("'") and result.endswith("'") and len(result) > 2 and "'" not in result[1:-1]:
|
||||
result = result[1:-1]
|
||||
|
||||
# 2. Split by newline
|
||||
if "\n" in result:
|
||||
lines = result.split('\n')
|
||||
clean_lines = [l.strip() for l in lines if l.strip()]
|
||||
if clean_lines:
|
||||
result = clean_lines[0]
|
||||
|
||||
# 3. Aggressive Preamble Stripping (Updates for new prefix)
|
||||
import re
|
||||
prefixes = [
|
||||
r"^Here is the processed output:?\s*", # The one we injected
|
||||
r"^Here is the corrected text:?\s*",
|
||||
r"^Here is the rewritten text:?\s*",
|
||||
r"^Here's the result:?\s*",
|
||||
r"^Sure,? here is regex.*:?\s*",
|
||||
r"^Output:?\s*",
|
||||
r"^Processing result:?\s*",
|
||||
]
|
||||
|
||||
for p in prefixes:
|
||||
result = re.sub(p, "", result, flags=re.IGNORECASE).strip()
|
||||
|
||||
if result.startswith('"') and result.endswith('"') and len(result) > 2 and '"' not in result[1:-1]:
|
||||
result = result[1:-1]
|
||||
|
||||
logging.info(f"LLM Result: '{result}'")
|
||||
return result
|
||||
except Exception as e:
|
||||
logging.error(f"LLM inference failed: {e}")
|
||||
return text # Fail safe logic
|
||||
@@ -110,6 +110,7 @@ class UIBridge(QObject):
|
||||
logAppended = Signal(str) # Emits new log line
|
||||
settingChanged = Signal(str, 'QVariant')
|
||||
modelStatesChanged = Signal() # Notify UI to re-check isModelDownloaded
|
||||
llmDownloadRequested = Signal()
|
||||
|
||||
def __init__(self, parent=None):
|
||||
super().__init__(parent)
|
||||
@@ -356,11 +357,7 @@ class UIBridge(QObject):
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to preload audio devices: {e}")
|
||||
|
||||
@Slot()
|
||||
def toggle_recording(self):
|
||||
"""Called by UI elements to trigger the app's recording logic."""
|
||||
# This will be connected to the main app's toggle logic
|
||||
pass
|
||||
|
||||
@Property(bool, notify=isDownloadingChanged)
|
||||
def isDownloading(self): return self._is_downloading
|
||||
|
||||
@@ -400,6 +397,16 @@ class UIBridge(QObject):
|
||||
logging.error(f"Error checking model status: {e}")
|
||||
return False
|
||||
|
||||
@Slot(result=bool)
|
||||
def isLLMModelDownloaded(self):
|
||||
try:
|
||||
from src.core.paths import get_models_path
|
||||
# Hardcoded check for the 1B model we support
|
||||
model_file = get_models_path() / "llm" / "llama-3.2-1b-instruct" / "llama-3.2-1b-instruct-q4_k_m.gguf"
|
||||
return model_file.exists()
|
||||
except:
|
||||
return False
|
||||
|
||||
@Slot(str)
|
||||
def downloadModel(self, size):
|
||||
self.downloadRequested.emit(size)
|
||||
@@ -407,3 +414,7 @@ class UIBridge(QObject):
|
||||
@Slot()
|
||||
def notifyModelStatesChanged(self):
|
||||
self.modelStatesChanged.emit()
|
||||
|
||||
@Slot()
|
||||
def downloadLLM(self):
|
||||
self.llmDownloadRequested.emit()
|
||||
|
||||
@@ -315,7 +315,7 @@ Window {
|
||||
|
||||
ModernSettingsItem {
|
||||
label: "Global Hotkey (Transcribe)"
|
||||
description: "Press to record a new shortcut (e.g. F9)"
|
||||
description: "Standard: Raw transcription"
|
||||
control: ModernKeySequenceRecorder {
|
||||
implicitWidth: 240
|
||||
currentSequence: ui.getSetting("hotkey")
|
||||
@@ -323,6 +323,16 @@ Window {
|
||||
}
|
||||
}
|
||||
|
||||
ModernSettingsItem {
|
||||
label: "Global Hotkey (Correct)"
|
||||
description: "Enhanced: Transcribe + AI Correction"
|
||||
control: ModernKeySequenceRecorder {
|
||||
implicitWidth: 240
|
||||
currentSequence: ui.getSetting("hotkey_correct")
|
||||
onSequenceChanged: (seq) => ui.setSetting("hotkey_correct", seq)
|
||||
}
|
||||
}
|
||||
|
||||
ModernSettingsItem {
|
||||
label: "Global Hotkey (Translate)"
|
||||
description: "Press to record a new shortcut (e.g. F10)"
|
||||
@@ -359,8 +369,8 @@ Window {
|
||||
showSeparator: false
|
||||
control: ModernSlider {
|
||||
Layout.preferredWidth: 200
|
||||
from: 10; to: 6000
|
||||
stepSize: 10
|
||||
from: 10; to: 20000
|
||||
stepSize: 100
|
||||
snapMode: Slider.SnapAlways
|
||||
value: ui.getSetting("typing_speed")
|
||||
onMoved: ui.setSetting("typing_speed", value)
|
||||
@@ -845,6 +855,137 @@ Window {
|
||||
}
|
||||
}
|
||||
|
||||
ModernSettingsSection {
|
||||
title: "Correction & Rewriting"
|
||||
Layout.margins: 32
|
||||
Layout.topMargin: 0
|
||||
|
||||
content: ColumnLayout {
|
||||
width: parent.width
|
||||
spacing: 0
|
||||
|
||||
ModernSettingsItem {
|
||||
label: "Enable Correction"
|
||||
description: "Post-process text with Llama 3.2 1B (Adds latency)"
|
||||
control: ModernSwitch {
|
||||
checked: ui.getSetting("llm_enabled")
|
||||
onToggled: ui.setSetting("llm_enabled", checked)
|
||||
}
|
||||
}
|
||||
|
||||
ModernSettingsItem {
|
||||
label: "Correction Mode"
|
||||
description: "Grammar Fix vs. Complete Rewrite"
|
||||
visible: ui.getSetting("llm_enabled")
|
||||
control: ModernComboBox {
|
||||
width: 140
|
||||
model: ["Grammar", "Standard", "Rewrite"]
|
||||
currentIndex: model.indexOf(ui.getSetting("llm_mode"))
|
||||
onActivated: ui.setSetting("llm_mode", currentText)
|
||||
}
|
||||
}
|
||||
|
||||
// LLM Model Status Card
|
||||
Rectangle {
|
||||
Layout.fillWidth: true
|
||||
Layout.margins: 12
|
||||
Layout.topMargin: 0
|
||||
Layout.bottomMargin: 16
|
||||
height: 54
|
||||
color: "#0a0a0f"
|
||||
visible: ui.getSetting("llm_enabled")
|
||||
radius: 6
|
||||
border.color: SettingsStyle.borderSubtle
|
||||
border.width: 1
|
||||
|
||||
property bool isDownloaded: false
|
||||
property bool isDownloading: ui.isDownloading && ui.statusText.indexOf("LLM") !== -1
|
||||
|
||||
Timer {
|
||||
interval: 2000
|
||||
running: visible
|
||||
repeat: true
|
||||
onTriggered: parent.checkStatus()
|
||||
}
|
||||
|
||||
function checkStatus() {
|
||||
isDownloaded = ui.isLLMModelDownloaded()
|
||||
}
|
||||
|
||||
Component.onCompleted: checkStatus()
|
||||
|
||||
Connections {
|
||||
target: ui
|
||||
function onModelStatesChanged() { parent.checkStatus() }
|
||||
function onIsDownloadingChanged() { parent.checkStatus() }
|
||||
}
|
||||
|
||||
RowLayout {
|
||||
anchors.fill: parent
|
||||
anchors.leftMargin: 12
|
||||
anchors.rightMargin: 12
|
||||
spacing: 12
|
||||
|
||||
Image {
|
||||
source: "smart_toy.svg"
|
||||
sourceSize: Qt.size(16, 16)
|
||||
layer.enabled: true
|
||||
layer.effect: MultiEffect {
|
||||
colorization: 1.0
|
||||
colorizationColor: parent.parent.isDownloaded ? SettingsStyle.accent : "#808080"
|
||||
}
|
||||
}
|
||||
|
||||
ColumnLayout {
|
||||
Layout.fillWidth: true
|
||||
spacing: 2
|
||||
Text {
|
||||
text: "Llama 3.2 1B (Instruct)"
|
||||
color: "#ffffff"
|
||||
font.family: "JetBrains Mono"; font.bold: true
|
||||
font.pixelSize: 11
|
||||
}
|
||||
Text {
|
||||
text: parent.parent.isDownloaded ? "Ready." : "Model missing (~1.2GB)"
|
||||
color: SettingsStyle.textSecondary
|
||||
font.family: "JetBrains Mono"; font.pixelSize: 10
|
||||
}
|
||||
}
|
||||
|
||||
Button {
|
||||
id: dlBtn
|
||||
text: "Download"
|
||||
visible: !parent.parent.isDownloaded && !parent.parent.isDownloading
|
||||
Layout.preferredHeight: 24
|
||||
Layout.preferredWidth: 80
|
||||
|
||||
contentItem: Text {
|
||||
text: "DOWNLOAD"
|
||||
font.pixelSize: 10; font.bold: true; color: "#000000"; horizontalAlignment: Text.AlignHCenter; verticalAlignment: Text.AlignVCenter
|
||||
}
|
||||
background: Rectangle {
|
||||
color: dlBtn.hovered ? "#ffffff" : SettingsStyle.accent; radius: 4
|
||||
}
|
||||
onClicked: ui.downloadLLM()
|
||||
}
|
||||
|
||||
// Progress Bar
|
||||
Rectangle {
|
||||
visible: parent.parent.isDownloading
|
||||
Layout.fillWidth: true
|
||||
height: 4
|
||||
color: "#30ffffff"
|
||||
Rectangle {
|
||||
width: parent.width * (ui.downloadProgress / 100)
|
||||
height: parent.height
|
||||
color: SettingsStyle.accent
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ModernSettingsSection {
|
||||
title: "Advanced Decoding"
|
||||
Layout.margins: 32
|
||||
|
||||
Reference in New Issue
Block a user