Files
cbcren2026/processors/audio_processor.py

94 lines
2.9 KiB
Python

"""
Audio file processor using Whisper
"""
import logging
from pathlib import Path
from typing import Dict, Any
from ..core import FileProcessingError
from ..config import settings
from ..services import vram_manager
from ..services.gpu_detector import gpu_detector
from .base_processor import FileProcessor
try:
import whisper
import torch
WHISPER_AVAILABLE = True
except ImportError:
WHISPER_AVAILABLE = False
class AudioProcessor(FileProcessor):
"""Processor for audio files using Whisper"""
def __init__(self):
super().__init__("AudioProcessor")
self.logger = logging.getLogger(__name__)
self._model = None
self._model_name = "medium" # Optimized for Spanish
def can_process(self, file_path: str) -> bool:
"""Check if file is an audio file"""
ext = self.get_file_extension(file_path)
return ext in settings.AUDIO_EXTENSIONS
def _load_model(self):
"""Load Whisper model lazily"""
if not WHISPER_AVAILABLE:
raise FileProcessingError("Whisper not installed")
if self._model is None:
device = gpu_detector.get_device()
self.logger.info(f"Loading Whisper model: {self._model_name} on {device}")
self._model = whisper.load_model(self._model_name, device=device)
vram_manager.update_usage()
def process(self, file_path: str) -> Dict[str, Any]:
"""Transcribe audio file"""
self.validate_file(file_path)
audio_path = Path(file_path)
output_path = settings.LOCAL_DOWNLOADS_PATH / f"{audio_path.stem}.txt"
self.logger.info(f"Processing audio file: {audio_path}")
try:
# Load model if needed
self._load_model()
# Update VRAM usage
vram_manager.update_usage()
# Transcribe with torch.no_grad() for memory efficiency
with torch.inference_mode():
result = self._model.transcribe(
str(audio_path),
language="es",
fp16=True,
verbose=False
)
# Save transcription
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(result["text"])
self.logger.info(f"Transcription completed: {output_path}")
return {
"success": True,
"transcription_path": str(output_path),
"text": result["text"],
"model_used": self._model_name
}
except Exception as e:
self.logger.error(f"Audio processing failed: {e}")
raise FileProcessingError(f"Audio processing failed: {e}")
def cleanup(self) -> None:
"""Cleanup model"""
if self._model is not None:
del self._model
self._model = None
vram_manager.cleanup()