""" Audio file processor using Whisper """ import logging from pathlib import Path from typing import Dict, Any from core import FileProcessingError from config import settings from services import vram_manager from services.gpu_detector import gpu_detector from .base_processor import FileProcessor try: import whisper import torch WHISPER_AVAILABLE = True except ImportError: WHISPER_AVAILABLE = False class AudioProcessor(FileProcessor): """Processor for audio files using Whisper""" def __init__(self): super().__init__("AudioProcessor") self.logger = logging.getLogger(__name__) self._model = None self._model_name = "medium" # Optimized for Spanish def can_process(self, file_path: str) -> bool: """Check if file is an audio file""" ext = self.get_file_extension(file_path) return ext in settings.AUDIO_EXTENSIONS def _load_model(self): """Load Whisper model lazily""" if not WHISPER_AVAILABLE: raise FileProcessingError("Whisper not installed") if self._model is None: device = gpu_detector.get_device() self.logger.info(f"Loading Whisper model: {self._model_name} on {device}") self._model = whisper.load_model(self._model_name, device=device) vram_manager.update_usage() def process(self, file_path: str) -> Dict[str, Any]: """Transcribe audio file""" self.validate_file(file_path) audio_path = Path(file_path) output_path = settings.LOCAL_DOWNLOADS_PATH / f"{audio_path.stem}.txt" self.logger.info(f"Processing audio file: {audio_path}") try: # Load model if needed self._load_model() # Update VRAM usage vram_manager.update_usage() # Transcribe with torch.no_grad() for memory efficiency with torch.inference_mode(): result = self._model.transcribe( str(audio_path), language="es", fp16=True, verbose=False ) # Save transcription output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(result["text"]) self.logger.info(f"Transcription completed: {output_path}") return { "success": True, "transcription_path": str(output_path), "text": result["text"], "model_used": self._model_name } except Exception as e: self.logger.error(f"Audio processing failed: {e}") raise FileProcessingError(f"Audio processing failed: {e}") def cleanup(self) -> None: """Cleanup model""" if self._model is not None: del self._model self._model = None vram_manager.cleanup()