Feat: Integrated Local LLM (Llama 3.2 1B) for Intelligent Correction -- New Core: Added LLMEngine utilizing llama-cpp-python for local private text post-processing. -- Forensic Protocol: Engineered strict system prompts to prevent LLM refusals, censorship, or assistant chatter. -- Three Modes: Grammar, Standard, Rewrite. -- Start/Stop Logic: Consolidated conflicting recording methods. -- Hotkeys: Added dedicated F9 (Correct) vs F8 (Transcribe). -- UI: Updated Settings. -- Build: Updated portable_build.py. -- Docs: Updated README.

2026-01-31 01:02:24 +02:00
parent 6737ed4547
commit 798a35e6d9
10 changed files with 601 additions and 61 deletions
@@ -17,6 +17,7 @@ from src.core.paths import get_base_path
 DEFAULT_SETTINGS = {
    "hotkey": "f8",
    "hotkey_translate": "f10",
+    "hotkey_correct": "f9",     # New: Transcribe + Correct
    "model_size": "small",
    "input_device": None,       # Device ID (int) or Name (str), None = Default
    "save_recordings": False,   # Save .wav files for debugging
@@ -49,6 +50,11 @@ DEFAULT_SETTINGS = {
    "condition_on_previous_text": True,
    "initial_prompt": "Mm-hmm. Okay, let's go. I speak in full sentences.", # Default: Forces punctuation
    
+    # LLM Correction
+    "llm_enabled": False,
+    "llm_mode": "Standard", # "Grammar", "Standard", "Rewrite"
+    "llm_model_name": "llama-3.2-1b-instruct",
+    

    
    # Low VRAM Mode
@@ -102,9 +108,9 @@ class ConfigManager:
        except Exception as e:
            logging.error(f"Failed to save settings: {e}")

-    def get(self, key: str) -> Any:
+    def get(self, key: str, default: Any = None) -> Any:
        """Get a setting value."""
-        return self.data.get(key, DEFAULT_SETTINGS.get(key))
+        return self.data.get(key, DEFAULT_SETTINGS.get(key, default))



@@ -0,0 +1,185 @@
+"""
+LLM Engine Module.
+==================
+
+Handles interaction with the local Llama 3.2 1B model for transcription correction.
+Uses llama-cpp-python for efficient local inference.
+"""
+
+import os
+import logging
+from typing import Optional
+from src.core.paths import get_models_path
+from src.core.config import ConfigManager
+
+try:
+    from llama_cpp import Llama
+except ImportError:
+    Llama = None
+
+class LLMEngine:
+    """
+    Manages the Llama model and performs text correction/rewriting.
+    """
+    def __init__(self):
+        self.config = ConfigManager()
+        self.model = None
+        self.current_model_path = None
+        
+        # --- Mode 1: Grammar Only (Strict) ---
+        self.prompt_grammar = (
+            "You are a text correction tool. "
+            "Correct the grammar/spelling. Do not change punctuation or capitalization styles. "
+            "Do not remove any words (including profanity). Output ONLY the result."
+            "\n\nExample:\nInput: 'damn it works'\nOutput: 'damn it works'"
+        )
+
+        # --- Mode 2: Standard (Grammar + Punctuation + Caps) ---
+        self.prompt_standard = (
+            "You are a text correction tool. "
+            "Standardize the grammar, punctuation, and capitalization. "
+            "Do not remove any words (including profanity). Output ONLY the result."
+            "\n\nExample:\nInput: 'damn it works'\nOutput: 'Damn it works.'"
+        )
+
+        # --- Mode 3: Rewrite (Tone-Aware Polish) ---
+        self.prompt_rewrite = (
+            "You are a text rewriting tool. Improve flow/clarity but keep the exact tone and vocabulary. "
+            "Do not remove any words (including profanity). Output ONLY the result."
+            "\n\nExample:\nInput: 'damn it works'\nOutput: 'Damn, it works.'"
+        )
+
+    def load_model(self) -> bool:
+        """
+        Loads the LLM model if it exists.
+        Returns True if successful, False otherwise.
+        """
+        if Llama is None:
+            logging.error("llama-cpp-python not installed.")
+            return False
+
+        model_name = self.config.get("llm_model_name", "llama-3.2-1b-instruct")
+        model_dir = get_models_path() / "llm" / model_name
+        model_file = model_dir / "llama-3.2-1b-instruct-q4_k_m.gguf"
+        
+        if not model_file.exists():
+            logging.warning(f"LLM Model not found at: {model_file}")
+            return False
+
+        if self.model and self.current_model_path == str(model_file):
+            return True
+
+        try:
+            logging.info(f"Loading LLM from {model_file}...")
+            n_gpu_layers = 0
+            try:
+                import torch
+                if torch.cuda.is_available():
+                    n_gpu_layers = -1 
+            except:
+                pass
+
+            self.model = Llama(
+                model_path=str(model_file),
+                n_gpu_layers=n_gpu_layers,
+                n_ctx=2048, 
+                verbose=False
+            )
+            self.current_model_path = str(model_file)
+            logging.info("LLM loaded successfully.")
+            return True
+        except Exception as e:
+            logging.error(f"Failed to load LLM: {e}")
+            self.model = None
+            return False
+
+    def correct_text(self, text: str, mode: str = "Standard") -> str:
+        """Corrects or rewrites the provided text."""
+        if not text or not text.strip():
+            return text
+
+        if not self.model:
+            if not self.load_model():
+                return text 
+
+        logging.info(f"LLM Processing ({mode}): '{text}'")
+
+        system_prompt = self.prompt_standard 
+        if mode == "Grammar": system_prompt = self.prompt_grammar
+        elif mode == "Rewrite": system_prompt = self.prompt_rewrite
+        
+        # PREFIX INJECTION TECHNIQUE
+        # We end the prompt with the start of the assistant's answer specifically phrased to force compliance.
+        # "Here is the processed output:" forces it into a completion mode rather than a refusal mode.
+        prefix_injection = "Here is the processed output:\n"
+
+        prompt = (
+            f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_prompt}<|eot_id|>"
+            f"<|start_header_id|>user<|end_header_id|>\n\nProcess this input:\n{text}<|eot_id|>"
+            f"<|start_header_id|>assistant<|end_header_id|>\n\n{prefix_injection}"
+        )
+
+        try:
+            output = self.model(
+                prompt,
+                max_tokens=512, 
+                stop=["<|eot_id|>"],
+                echo=False,
+                temperature=0.1 
+            )
+            
+            result = output['choices'][0]['text'].strip()
+            
+            # 1. Fallback: If result is empty, it might have just outputted nothing because we prefilled?
+            # Actually llama-cpp-python usually returns the *continuation*.
+            # So if it outputted "My corrected text.", the full logical response is "Here is...: My corrected text."
+            # We just want the result.
+            
+            # Refusal Detection (Safety Net)
+            refusal_triggers = [
+                "I cannot", "I can't", "I am unable", "I apologize", "sorry", 
+                "As an AI", "explicit content", "harmful content", "safety guidelines"
+            ]
+            lower_res = result.lower()
+            if any(trig in lower_res for trig in refusal_triggers) and len(result) < 150:
+                logging.warning(f"LLM Refusal Detected: '{result}'. Falling back to original.")
+                return text # Return original text on refusal!
+            
+            # --- Robust Post-Processing ---
+            
+            # 1. Strip quotes 
+            if result.startswith('"') and result.endswith('"') and len(result) > 2 and '"' not in result[1:-1]:
+                 result = result[1:-1]
+            if result.startswith("'") and result.endswith("'") and len(result) > 2 and "'" not in result[1:-1]:
+                 result = result[1:-1]
+                 
+            # 2. Split by newline
+            if "\n" in result:
+                lines = result.split('\n')
+                clean_lines = [l.strip() for l in lines if l.strip()]
+                if clean_lines:
+                    result = clean_lines[0]
+                    
+            # 3. Aggressive Preamble Stripping (Updates for new prefix)
+            import re
+            prefixes = [
+                r"^Here is the processed output:?\s*", # The one we injected
+                r"^Here is the corrected text:?\s*",
+                r"^Here is the rewritten text:?\s*",
+                r"^Here's the result:?\s*",
+                r"^Sure,? here is regex.*:?\s*",
+                r"^Output:?\s*",
+                r"^Processing result:?\s*",
+            ]
+            
+            for p in prefixes:
+                result = re.sub(p, "", result, flags=re.IGNORECASE).strip()
+            
+            if result.startswith('"') and result.endswith('"') and len(result) > 2 and '"' not in result[1:-1]:
+                 result = result[1:-1]
+                 
+            logging.info(f"LLM Result: '{result}'")
+            return result
+        except Exception as e:
+            logging.error(f"LLM inference failed: {e}")
+            return text # Fail safe logic
@@ -110,6 +110,7 @@ class UIBridge(QObject):
    logAppended = Signal(str) # Emits new log line
    settingChanged = Signal(str, 'QVariant')
    modelStatesChanged = Signal() # Notify UI to re-check isModelDownloaded
+    llmDownloadRequested = Signal()

    def __init__(self, parent=None):
        super().__init__(parent)
@@ -356,11 +357,7 @@ class UIBridge(QObject):
        except Exception as e:
            logging.error(f"Failed to preload audio devices: {e}")

-    @Slot()
-    def toggle_recording(self):
-        """Called by UI elements to trigger the app's recording logic."""
-        # This will be connected to the main app's toggle logic
-        pass 
+
    @Property(bool, notify=isDownloadingChanged)
    def isDownloading(self): return self._is_downloading

@@ -400,6 +397,16 @@ class UIBridge(QObject):
            logging.error(f"Error checking model status: {e}")
            return False

+    @Slot(result=bool)
+    def isLLMModelDownloaded(self):
+        try:
+            from src.core.paths import get_models_path
+            # Hardcoded check for the 1B model we support
+            model_file = get_models_path() / "llm" / "llama-3.2-1b-instruct" / "llama-3.2-1b-instruct-q4_k_m.gguf"
+            return model_file.exists()
+        except:
+            return False
+
    @Slot(str)
    def downloadModel(self, size):
        self.downloadRequested.emit(size)
@@ -407,3 +414,7 @@ class UIBridge(QObject):
    @Slot()
    def notifyModelStatesChanged(self):
        self.modelStatesChanged.emit()
+
+    @Slot()
+    def downloadLLM(self):
+        self.llmDownloadRequested.emit()
@@ -315,7 +315,7 @@ Window {
                                    
                                    ModernSettingsItem {
                                        label: "Global Hotkey (Transcribe)"
-                                        description: "Press to record a new shortcut (e.g. F9)"
+                                        description: "Standard: Raw transcription"
                                        control: ModernKeySequenceRecorder {
                                            implicitWidth: 240
                                            currentSequence: ui.getSetting("hotkey")
@@ -323,6 +323,16 @@ Window {
                                        }
                                    }

+                                    ModernSettingsItem {
+                                        label: "Global Hotkey (Correct)"
+                                        description: "Enhanced: Transcribe + AI Correction"
+                                        control: ModernKeySequenceRecorder {
+                                            implicitWidth: 240
+                                            currentSequence: ui.getSetting("hotkey_correct")
+                                            onSequenceChanged: (seq) => ui.setSetting("hotkey_correct", seq)
+                                        }
+                                    }
+
                                    ModernSettingsItem {
                                        label: "Global Hotkey (Translate)"
                                        description: "Press to record a new shortcut (e.g. F10)"
@@ -359,8 +369,8 @@ Window {
                                        showSeparator: false
                                        control: ModernSlider {
                                            Layout.preferredWidth: 200
-                                            from: 10; to: 6000
-                                            stepSize: 10
+                                            from: 10; to: 20000
+                                            stepSize: 100
                                            snapMode: Slider.SnapAlways
                                            value: ui.getSetting("typing_speed")
                                            onMoved: ui.setSetting("typing_speed", value)
@@ -845,6 +855,137 @@ Window {
                                }
                            }

+                            ModernSettingsSection {
+                                title: "Correction & Rewriting"
+                                Layout.margins: 32
+                                Layout.topMargin: 0
+                                
+                                content: ColumnLayout {
+                                    width: parent.width
+                                    spacing: 0
+                                    
+                                    ModernSettingsItem {
+                                        label: "Enable Correction"
+                                        description: "Post-process text with Llama 3.2 1B (Adds latency)"
+                                        control: ModernSwitch {
+                                            checked: ui.getSetting("llm_enabled")
+                                            onToggled: ui.setSetting("llm_enabled", checked)
+                                        }
+                                    }
+
+                                    ModernSettingsItem {
+                                        label: "Correction Mode"
+                                        description: "Grammar Fix vs. Complete Rewrite"
+                                        visible: ui.getSetting("llm_enabled")
+                                        control: ModernComboBox {
+                                            width: 140
+                                            model: ["Grammar", "Standard", "Rewrite"]
+                                            currentIndex: model.indexOf(ui.getSetting("llm_mode"))
+                                            onActivated: ui.setSetting("llm_mode", currentText)
+                                        }
+                                    }
+
+                                    // LLM Model Status Card
+                                    Rectangle {
+                                        Layout.fillWidth: true
+                                        Layout.margins: 12
+                                        Layout.topMargin: 0
+                                        Layout.bottomMargin: 16
+                                        height: 54
+                                        color: "#0a0a0f"
+                                        visible: ui.getSetting("llm_enabled")
+                                        radius: 6
+                                        border.color: SettingsStyle.borderSubtle
+                                        border.width: 1
+
+                                        property bool isDownloaded: false
+                                        property bool isDownloading: ui.isDownloading && ui.statusText.indexOf("LLM") !== -1
+
+                                        Timer {
+                                            interval: 2000
+                                            running: visible
+                                            repeat: true
+                                            onTriggered: parent.checkStatus()
+                                        }
+                                        
+                                        function checkStatus() {
+                                            isDownloaded = ui.isLLMModelDownloaded()
+                                        }
+                                        
+                                        Component.onCompleted: checkStatus()
+                                        
+                                        Connections {
+                                            target: ui
+                                            function onModelStatesChanged() { parent.checkStatus() }
+                                            function onIsDownloadingChanged() { parent.checkStatus() } 
+                                        }
+
+                                        RowLayout {
+                                            anchors.fill: parent
+                                            anchors.leftMargin: 12
+                                            anchors.rightMargin: 12
+                                            spacing: 12
+                                            
+                                            Image {
+                                                source: "smart_toy.svg"
+                                                sourceSize: Qt.size(16, 16)
+                                                layer.enabled: true
+                                                layer.effect: MultiEffect {
+                                                    colorization: 1.0
+                                                    colorizationColor: parent.parent.isDownloaded ? SettingsStyle.accent : "#808080"
+                                                }
+                                            }
+
+                                            ColumnLayout {
+                                                Layout.fillWidth: true
+                                                spacing: 2
+                                                Text {
+                                                    text: "Llama 3.2 1B (Instruct)"
+                                                    color: "#ffffff"
+                                                    font.family: "JetBrains Mono"; font.bold: true
+                                                    font.pixelSize: 11
+                                                }
+                                                Text {
+                                                    text: parent.parent.isDownloaded ? "Ready." : "Model missing (~1.2GB)"
+                                                    color: SettingsStyle.textSecondary
+                                                    font.family: "JetBrains Mono"; font.pixelSize: 10
+                                                }
+                                            }
+                                            
+                                            Button {
+                                                id: dlBtn
+                                                text: "Download"
+                                                visible: !parent.parent.isDownloaded && !parent.parent.isDownloading
+                                                Layout.preferredHeight: 24
+                                                Layout.preferredWidth: 80
+                                                
+                                                contentItem: Text {
+                                                    text: "DOWNLOAD"
+                                                    font.pixelSize: 10; font.bold: true; color: "#000000"; horizontalAlignment: Text.AlignHCenter; verticalAlignment: Text.AlignVCenter
+                                                }
+                                                background: Rectangle {
+                                                    color: dlBtn.hovered ? "#ffffff" : SettingsStyle.accent; radius: 4
+                                                }
+                                                onClicked: ui.downloadLLM()
+                                            }
+
+                                            // Progress Bar
+                                            Rectangle {
+                                                visible: parent.parent.isDownloading
+                                                Layout.fillWidth: true
+                                                height: 4
+                                                color: "#30ffffff"
+                                                Rectangle {
+                                                    width: parent.width * (ui.downloadProgress / 100)
+                                                    height: parent.height
+                                                    color: SettingsStyle.accent
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+
                            ModernSettingsSection {
                                title: "Advanced Decoding"
                                Layout.margins: 32