""" validate_key_detection.py - Script de validación T019 Valida que librosa detecta key correctamente en ≥70% de samples armónicos. Uso: python validate_key_detection.py [--samples N] """ import sys import random import argparse from pathlib import Path from typing import List, Dict, Any import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger("T019-Validation") # Importar AudioAnalyzer try: from audio_analyzer import AudioAnalyzer, SampleType ANALYZER_AVAILABLE = True except ImportError: ANALYZER_AVAILABLE = False logger.error("No se pudo importar AudioAnalyzer") sys.exit(1) def find_harmonic_samples(library_dir: str, max_samples: int = 50) -> List[Path]: """ Busca samples armónicos (bass, pad, synth, chord, lead, etc.) en la librería. """ library_path = Path(library_dir) extensions = {'.wav', '.aif', '.aiff', '.mp3'} all_files = [] for ext in extensions: all_files.extend(library_path.rglob(f'*{ext}')) all_files.extend(library_path.rglob(f'*{ext.upper()}')) # Filtrar por nombre para encontrar samples armónicos probables harmonic_keywords = [ 'bass', 'pad', 'synth', 'lead', 'chord', 'stab', 'pluck', 'arp', 'vocal', 'keys', 'piano', 'guitar', 'strings', 'pad' ] harmonic_files = [] for f in all_files: name_lower = f.stem.lower() if any(kw in name_lower for kw in harmonic_keywords): harmonic_files.append(f) # Seleccionar muestra aleatoria if len(harmonic_files) > max_samples: return random.sample(harmonic_files, max_samples) return harmonic_files def validate_key_detection(samples: List[Path]) -> Dict[str, Any]: """ Valida detección de key en samples. Retorna estadísticas de la validación. """ analyzer = AudioAnalyzer() results = { 'total': len(samples), 'with_key_detected': 0, 'with_key_in_name': 0, 'matching_keys': 0, 'high_confidence': 0, # confidence > 0.6 'low_confidence': 0, 'by_type': {}, 'failures': [] } for sample_path in samples: try: features = analyzer.analyze(str(sample_path)) # Extraer key del nombre si existe key_from_name = analyzer._extract_key_from_name(sample_path.stem) result_entry = { 'file': str(sample_path), 'detected_key': features.key, 'key_confidence': features.key_confidence, 'key_from_name': key_from_name, 'sample_type': features.sample_type.value, 'spectral_centroid': features.spectral_centroid, 'is_harmonic': features.is_harmonic } # Contar key detectada if features.key: results['with_key_detected'] += 1 # Alta confianza if features.key_confidence > 0.6: results['high_confidence'] += 1 else: results['low_confidence'] += 1 # Key en nombre if key_from_name: results['with_key_in_name'] += 1 # Comparar si coinciden if features.key and features.key.lower() == key_from_name.lower(): results['matching_keys'] += 1 result_entry['match'] = True else: result_entry['match'] = False # Por tipo sample_type = features.sample_type.value if sample_type not in results['by_type']: results['by_type'][sample_type] = {'total': 0, 'with_key': 0} results['by_type'][sample_type]['total'] += 1 if features.key: results['by_type'][sample_type]['with_key'] += 1 # Si no detectó key en sample armónico, es un "failure" if features.is_harmonic and not features.key: results['failures'].append(result_entry) logger.info(f"✓ {sample_path.stem}: key={features.key} " f"(conf={features.key_confidence:.2f}, " f"type={features.sample_type.value})") except Exception as e: logger.error(f"✗ Error analizando {sample_path}: {e}") results['failures'].append({'file': str(sample_path), 'error': str(e)}) return results def print_report(results: Dict[str, Any]): """Imprime reporte de validación T019.""" total = results['total'] print("\n" + "=" * 60) print("📊 REPORTE DE VALIDACIÓN T019: Key Detection con librosa") print("=" * 60) print(f"\n📁 Total samples analizados: {total}") print(f"🔑 Keys detectadas: {results['with_key_detected']} " f"({results['with_key_detected'] / total * 100:.1f}%)") print(f"📋 Keys en nombre de archivo: {results['with_key_in_name']}") print(f"✅ Keys coincidentes (detectada vs nombre): {results['matching_keys']}") print(f"\n📈 Distribución de confianza:") print(f" Alta (>0.6): {results['high_confidence']} " f"({results['high_confidence'] / total * 100:.1f}%)") print(f" Baja (≤0.6): {results['low_confidence']} " f"({results['low_confidence'] / total * 100:.1f}%)") print(f"\n📊 Por tipo de sample:") for sample_type, stats in sorted(results['by_type'].items()): rate = stats['with_key'] / stats['total'] * 100 if stats['total'] > 0 else 0 print(f" {sample_type}: {stats['with_key']}/{stats['total']} con key ({rate:.1f}%)") # Verificar KPI T019 detection_rate = results['with_key_detected'] / total * 100 if total > 0 else 0 print(f"\n🎯 KPI T019: Detección de key en ≥70% de samples") print(f" Resultado: {detection_rate:.1f}%") if detection_rate >= 70: print(f" ✅ CUMPLE el objetivo de 70%") else: print(f" ❌ NO CUMPLE el objetivo (necesita mejorar)") if results['failures']: print(f"\n⚠️ {len(results['failures'])} samples armónicos sin key detectada:") for f in results['failures'][:10]: # Mostrar primeros 10 print(f" - {Path(f['file']).name}") print("\n" + "=" * 60) def main(): parser = argparse.ArgumentParser( description='Validar detección de key con librosa (T019)' ) parser.add_argument( 'library_dir', help='Ruta a la librería de samples' ) parser.add_argument( '--samples', '-n', type=int, default=50, help='Número de samples a analizar (default: 50)' ) parser.add_argument( '--seed', type=int, default=42, help='Seed para reproducibilidad (default: 42)' ) args = parser.parse_args() random.seed(args.seed) print(f"🔍 Buscando samples armónicos en: {args.library_dir}") samples = find_harmonic_samples(args.library_dir, args.samples) if not samples: logger.error("No se encontraron samples armónicos") sys.exit(1) print(f"🎵 Analizando {len(samples)} samples...") results = validate_key_detection(samples) print_report(results) # Exit code según KPI detection_rate = results['with_key_detected'] / results['total'] * 100 sys.exit(0 if detection_rate >= 70 else 1) if __name__ == '__main__': main()