whisper_voice/src/core/transcriber.py

"""
Whisper Transcriber Module.
===========================
Transcriber Module.
===================

Handles audio transcription using faster-whisper.
Runs IN-PROCESS (no subprocess) to ensure stability on all systems.
"""

import os
import logging
from typing import Optional
import numpy as np
from src.core.config import ConfigManager
from src.core.paths import get_models_path

# Import directly - valid since we are now running in the full environment
from faster_whisper import WhisperModel

class WhisperTranscriber:
    """
    Manages the faster-whisper model and transcription process.
    """

    def __init__(self):
        """Initialize settings."""
        self.config = ConfigManager()
        self.model = None
        self.current_model_size = None
        self.current_compute_device = None
        self.current_compute_type = None

    def load_model(self):
        """
        Loads the model specified in config.
        Safe to call multiple times (checks if reload needed).
        """
        size = self.config.get("model_size")
        device = self.config.get("compute_device")
        compute = self.config.get("compute_type")

        # Check if already loaded
        if (self.model and
            self.current_model_size == size and
            self.current_compute_device == device and
            self.current_compute_type == compute):
            return

        logging.info(f"Loading Model: {size} on {device} ({compute})...")

        try:
            # Construct path to local model for offline support
            new_path = get_models_path() / f"faster-whisper-{size}"
            model_input = str(new_path) if new_path.exists() else size

            # Force offline if path exists to avoid HF errors
            local_only = new_path.exists()

            self.model = WhisperModel(
                model_input,
                device=device,
                compute_type=compute,
                download_root=str(get_models_path()),
                local_files_only=local_only
            )

            self.current_model_size = size
            self.current_compute_device = device
            self.current_compute_type = compute
            logging.info("Model loaded successfully.")

        except Exception as e:
            logging.error(f"Failed to load model: {e}")
            self.model = None

    def transcribe(self, audio_data, is_file: bool = False, task: Optional[str] = None) -> str:
        """
        Transcribe audio data.
        """
        logging.info(f"Starting transcription... (is_file={is_file}, task={task})")

        # Ensure model is loaded
        if not self.model:
            self.load_model()
            if not self.model:
                return "Error: Model failed to load."

        try:
            # Config
            beam_size = int(self.config.get("beam_size"))
            best_of = int(self.config.get("best_of"))
            vad = False if is_file else self.config.get("vad_filter")
            language = self.config.get("language")

            # Use task override if provided, otherwise config
            # Ensure safe string and lowercase ("transcribe" vs "Transcribe")
            raw_task = task if task else self.config.get("task")
            final_task = str(raw_task).strip().lower() if raw_task else "transcribe"

            # Sanity check for valid Whisper tasks
            if final_task not in ["transcribe", "translate"]:
                logging.warning(f"Invalid task '{final_task}' detected. Defaulting to 'transcribe'.")
                final_task = "transcribe"

            # Language handling
            final_language = language if language != "auto" else None

            # Anti-Hallucination: Force condition_on_previous_text=False for translation
            condition_prev = self.config.get("condition_on_previous_text")

            # Helper options for Translation Stability
            initial_prompt = self.config.get("initial_prompt")

            if final_task == "translate":
                condition_prev = False
                # Force beam search if user has set it to greedy (1)
                # Translation requires more search breadth to find the English mapping
                if beam_size < 5:
                    logging.info("Forcing beam_size=5 for Translation task.")
                    beam_size = 5

                # Inject guidance prompt if none exists
                if not initial_prompt:
                    initial_prompt = "Translate this to English."

            logging.info(f"Model Dispatch: Task='{final_task}', Language='{final_language}', ConditionPrev={condition_prev}, Beam={beam_size}")

            # Build arguments dynamically to avoid passing None if that's the issue
            transcribe_opts = {
                "beam_size": beam_size,
                "best_of": best_of,
                "vad_filter": vad,
                "task": final_task,
                "vad_parameters": dict(min_silence_duration_ms=500),
                "condition_on_previous_text": condition_prev,
                "without_timestamps": True
            }

            if initial_prompt:
                 transcribe_opts["initial_prompt"] = initial_prompt

            # Only add language if it's explicitly set (not None/Auto)
            # This avoids potentially confusing the model with explicit None
            if final_language:
                transcribe_opts["language"] = final_language

            # Transcribe
            segments, info = self.model.transcribe(audio_data, **transcribe_opts)

            # Aggregate text
            text_result = ""
            for segment in segments:
                text_result += segment.text + " "

            return text_result.strip()

        except Exception as e:
            logging.error(f"Transcription failed: {e}")
            return f"Error: {str(e)}"

    def model_exists(self, size: str) -> bool:
        """Checks if a model size is already downloaded."""
        new_path = get_models_path() / f"faster-whisper-{size}"
        if (new_path / "config.json").exists():
            return True

        # Legacy HF cache check
        folder_name = f"models--Systran--faster-whisper-{size}"
        path = get_models_path() / folder_name / "snapshots"
        if path.exists() and any(path.iterdir()):
            return True

        return False