""" Whisper Transcriber Module. =========================== Transcriber Module. =================== Handles audio transcription using faster-whisper. Runs IN-PROCESS (no subprocess) to ensure stability on all systems. """ import os import logging from typing import Optional import numpy as np from src.core.config import ConfigManager from src.core.paths import get_models_path # Import directly - valid since we are now running in the full environment from faster_whisper import WhisperModel class WhisperTranscriber: """ Manages the faster-whisper model and transcription process. """ def __init__(self): """Initialize settings.""" self.config = ConfigManager() self.model = None self.current_model_size = None self.current_compute_device = None self.current_compute_type = None def load_model(self): """ Loads the model specified in config. Safe to call multiple times (checks if reload needed). """ size = self.config.get("model_size") device = self.config.get("compute_device") compute = self.config.get("compute_type") # Check if already loaded if (self.model and self.current_model_size == size and self.current_compute_device == device and self.current_compute_type == compute): return logging.info(f"Loading Model: {size} on {device} ({compute})...") try: # Construct path to local model for offline support new_path = get_models_path() / f"faster-whisper-{size}" model_input = str(new_path) if new_path.exists() else size # Force offline if path exists to avoid HF errors local_only = new_path.exists() self.model = WhisperModel( model_input, device=device, compute_type=compute, download_root=str(get_models_path()), local_files_only=local_only ) self.current_model_size = size self.current_compute_device = device self.current_compute_type = compute logging.info("Model loaded successfully.") except Exception as e: logging.error(f"Failed to load model: {e}") self.model = None def transcribe(self, audio_data, is_file: bool = False, task: Optional[str] = None) -> str: """ Transcribe audio data. """ logging.info(f"Starting transcription... (is_file={is_file}, task={task})") # Ensure model is loaded if not self.model: self.load_model() if not self.model: return "Error: Model failed to load." try: # Config beam_size = int(self.config.get("beam_size")) best_of = int(self.config.get("best_of")) vad = False if is_file else self.config.get("vad_filter") language = self.config.get("language") # Use task override if provided, otherwise config # Ensure safe string and lowercase ("transcribe" vs "Transcribe") raw_task = task if task else self.config.get("task") final_task = str(raw_task).strip().lower() if raw_task else "transcribe" # Sanity check for valid Whisper tasks if final_task not in ["transcribe", "translate"]: logging.warning(f"Invalid task '{final_task}' detected. Defaulting to 'transcribe'.") final_task = "transcribe" # Language handling final_language = language if language != "auto" else None # Anti-Hallucination: Force condition_on_previous_text=False for translation condition_prev = self.config.get("condition_on_previous_text") # Helper options for Translation Stability initial_prompt = self.config.get("initial_prompt") if final_task == "translate": condition_prev = False # Force beam search if user has set it to greedy (1) # Translation requires more search breadth to find the English mapping if beam_size < 5: logging.info("Forcing beam_size=5 for Translation task.") beam_size = 5 # Inject guidance prompt if none exists if not initial_prompt: initial_prompt = "Translate this to English." logging.info(f"Model Dispatch: Task='{final_task}', Language='{final_language}', ConditionPrev={condition_prev}, Beam={beam_size}") # Build arguments dynamically to avoid passing None if that's the issue transcribe_opts = { "beam_size": beam_size, "best_of": best_of, "vad_filter": vad, "task": final_task, "vad_parameters": dict(min_silence_duration_ms=500), "condition_on_previous_text": condition_prev, "without_timestamps": True } if initial_prompt: transcribe_opts["initial_prompt"] = initial_prompt # Only add language if it's explicitly set (not None/Auto) # This avoids potentially confusing the model with explicit None if final_language: transcribe_opts["language"] = final_language # Transcribe segments, info = self.model.transcribe(audio_data, **transcribe_opts) # Aggregate text text_result = "" for segment in segments: text_result += segment.text + " " return text_result.strip() except Exception as e: logging.error(f"Transcription failed: {e}") return f"Error: {str(e)}" def model_exists(self, size: str) -> bool: """Checks if a model size is already downloaded.""" new_path = get_models_path() / f"faster-whisper-{size}" if (new_path / "config.json").exists(): return True # Legacy HF cache check folder_name = f"models--Systran--faster-whisper-{size}" path = get_models_path() / folder_name / "snapshots" if path.exists() and any(path.iterdir()): return True return False