CBCFacil v8.0 - Refactored with AMD GPU support
This commit is contained in:
93
processors/audio_processor.py
Normal file
93
processors/audio_processor.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""
|
||||
Audio file processor using Whisper
|
||||
"""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
from ..core import FileProcessingError
|
||||
from ..config import settings
|
||||
from ..services import vram_manager
|
||||
from ..services.gpu_detector import gpu_detector
|
||||
from .base_processor import FileProcessor
|
||||
|
||||
try:
|
||||
import whisper
|
||||
import torch
|
||||
WHISPER_AVAILABLE = True
|
||||
except ImportError:
|
||||
WHISPER_AVAILABLE = False
|
||||
|
||||
|
||||
class AudioProcessor(FileProcessor):
|
||||
"""Processor for audio files using Whisper"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__("AudioProcessor")
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._model = None
|
||||
self._model_name = "medium" # Optimized for Spanish
|
||||
|
||||
def can_process(self, file_path: str) -> bool:
|
||||
"""Check if file is an audio file"""
|
||||
ext = self.get_file_extension(file_path)
|
||||
return ext in settings.AUDIO_EXTENSIONS
|
||||
|
||||
def _load_model(self):
|
||||
"""Load Whisper model lazily"""
|
||||
if not WHISPER_AVAILABLE:
|
||||
raise FileProcessingError("Whisper not installed")
|
||||
|
||||
if self._model is None:
|
||||
device = gpu_detector.get_device()
|
||||
self.logger.info(f"Loading Whisper model: {self._model_name} on {device}")
|
||||
self._model = whisper.load_model(self._model_name, device=device)
|
||||
vram_manager.update_usage()
|
||||
|
||||
def process(self, file_path: str) -> Dict[str, Any]:
|
||||
"""Transcribe audio file"""
|
||||
self.validate_file(file_path)
|
||||
audio_path = Path(file_path)
|
||||
output_path = settings.LOCAL_DOWNLOADS_PATH / f"{audio_path.stem}.txt"
|
||||
|
||||
self.logger.info(f"Processing audio file: {audio_path}")
|
||||
|
||||
try:
|
||||
# Load model if needed
|
||||
self._load_model()
|
||||
|
||||
# Update VRAM usage
|
||||
vram_manager.update_usage()
|
||||
|
||||
# Transcribe with torch.no_grad() for memory efficiency
|
||||
with torch.inference_mode():
|
||||
result = self._model.transcribe(
|
||||
str(audio_path),
|
||||
language="es",
|
||||
fp16=True,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
# Save transcription
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(result["text"])
|
||||
|
||||
self.logger.info(f"Transcription completed: {output_path}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"transcription_path": str(output_path),
|
||||
"text": result["text"],
|
||||
"model_used": self._model_name
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Audio processing failed: {e}")
|
||||
raise FileProcessingError(f"Audio processing failed: {e}")
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Cleanup model"""
|
||||
if self._model is not None:
|
||||
del self._model
|
||||
self._model = None
|
||||
vram_manager.cleanup()
|
||||
Reference in New Issue
Block a user