""" audio_analyzer.py - Análisis de audio para detección de Key y BPM Proporciona análisis básico de archivos de audio para extraer: - BPM (tempo) mediante detección de onset y autocorrelación - Key (tonalidad) mediante análisis de cromagrama - Características espectrales para clasificación """ import os import logging import numpy as np import subprocess from pathlib import Path from typing import Dict, Any, Optional, Tuple, List from dataclasses import dataclass from enum import Enum logger = logging.getLogger("AudioAnalyzer") # Constantes musicales NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] KEY_PROFILES = { # Perfiles de Krumhansl-Schmuckler para detección de tonalidad 'major': [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88], 'minor': [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17] } CIRCLE_OF_FIFTHS_MAJOR = ['C', 'G', 'D', 'A', 'E', 'B', 'F#', 'C#', 'G#', 'D#', 'A#', 'F'] CIRCLE_OF_FIFTHS_MINOR = ['Am', 'Em', 'Bm', 'F#m', 'C#m', 'G#m', 'D#m', 'A#m', 'Fm', 'Cm', 'Gm', 'Dm'] class SampleType(Enum): """Tipos de samples musicales""" KICK = "kick" SNARE = "snare" CLAP = "clap" HAT_CLOSED = "hat_closed" HAT_OPEN = "hat_open" HAT = "hat" PERC = "perc" SHAKER = "shaker" TOM = "tom" CRASH = "crash" RIDE = "ride" BASS = "bass" SYNTH = "synth" PAD = "pad" LEAD = "lead" PLUCK = "pluck" ARP = "arp" CHORD = "chord" STAB = "stab" VOCAL = "vocal" FX = "fx" LOOP = "loop" AMBIENCE = "ambience" UNKNOWN = "unknown" @dataclass class AudioFeatures: """Características extraídas de un archivo de audio""" bpm: Optional[float] key: Optional[str] key_confidence: float duration: float sample_rate: int sample_type: SampleType spectral_centroid: float spectral_rolloff: float zero_crossing_rate: float rms_energy: float is_harmonic: bool is_percussive: bool suggested_genres: List[str] class AudioAnalyzer: """ Analizador de audio para samples musicales. Soporta múltiples backends: - librosa (recomendado, más preciso) - basic (fallback sin dependencias externas, basado en nombre de archivo) """ def __init__(self, backend: str = "auto"): """ Inicializa el analizador de audio. Args: backend: 'librosa', 'basic', o 'auto' (detecta automáticamente) """ self.backend = backend self._librosa_available = False self._soundfile_available = False if backend in ("auto", "librosa"): self._check_librosa() if self._librosa_available: logger.info("Usando backend: librosa") else: logger.info("Usando backend: basic (análisis por nombre de archivo)") def _check_librosa(self): """Verifica si librosa está disponible""" try: import librosa import soundfile as sf self._librosa_available = True self._soundfile_available = True self.librosa = librosa self.sf = sf except ImportError: self._librosa_available = False self._soundfile_available = False def analyze(self, file_path: str) -> AudioFeatures: """ Analiza un archivo de audio y extrae características. Args: file_path: Ruta al archivo de audio Returns: AudioFeatures con los datos extraídos """ path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"Archivo no encontrado: {file_path}") # Intentar análisis con librosa si está disponible if self._librosa_available: try: return self._analyze_with_librosa(file_path) except Exception as e: logger.warning(f"Error con librosa: {e}, usando análisis básico") # Fallback a análisis básico return self._analyze_basic(file_path) def _analyze_with_librosa(self, file_path: str) -> AudioFeatures: """Análisis completo usando librosa""" # Cargar audio y, sr = self.librosa.load(file_path, sr=None, mono=True) # Duración duration = self.librosa.get_duration(y=y, sr=sr) # Detectar BPM tempo, _ = self.librosa.beat.beat_track(y=y, sr=sr) bpm = float(tempo) if isinstance(tempo, (int, float, np.number)) else None # Análisis espectral spectral_centroids = self.librosa.feature.spectral_centroid(y=y, sr=sr)[0] spectral_rolloffs = self.librosa.feature.spectral_rolloff(y=y, sr=sr)[0] zcr = self.librosa.feature.zero_crossing_rate(y)[0] rms = self.librosa.feature.rms(y=y)[0] # Detectar key key, key_confidence = self._detect_key_librosa(y, sr) # Clasificación percusivo vs armónico is_percussive = self._is_percussive(y, sr) is_harmonic = not is_percussive and duration > 1.0 # Determinar tipo de sample sample_type = self._classify_sample_type( file_path, is_percussive, is_harmonic, duration, float(np.mean(spectral_centroids)), float(np.mean(rms)) ) # Sugerir géneros suggested_genres = self._suggest_genres(sample_type, bpm, key) return AudioFeatures( bpm=bpm, key=key, key_confidence=key_confidence, duration=duration, sample_rate=sr, sample_type=sample_type, spectral_centroid=float(np.mean(spectral_centroids)), spectral_rolloff=float(np.mean(spectral_rolloffs)), zero_crossing_rate=float(np.mean(zcr)), rms_energy=float(np.mean(rms)), is_harmonic=is_harmonic, is_percussive=is_percussive, suggested_genres=suggested_genres ) def _detect_key_librosa(self, y: np.ndarray, sr: int) -> Tuple[Optional[str], float]: """ Detecta la tonalidad usando cromagrama y correlación con perfiles. """ try: # Calcular cromagrama chroma = self.librosa.feature.chroma_stft(y=y, sr=sr) chroma_avg = np.mean(chroma, axis=1) # Normalizar chroma_avg = chroma_avg / (np.sum(chroma_avg) + 1e-10) best_key = None best_score = -np.inf best_mode = None # Probar todas las tonalidades mayores y menores for mode, profile in KEY_PROFILES.items(): for i in range(12): # Rotar el perfil rotated_profile = np.roll(profile, i) # Correlación score = np.corrcoef(chroma_avg, rotated_profile)[0, 1] if score > best_score: best_score = score best_mode = mode best_key = NOTE_NAMES[i] # Formatear resultado if best_key: if best_mode == 'minor': best_key = best_key + 'm' confidence = max(0.0, min(1.0, (best_score + 1) / 2)) return best_key, confidence except Exception as e: logger.warning(f"Error detectando key: {e}") return None, 0.0 def _is_percussive(self, y: np.ndarray, sr: int) -> bool: """ Determina si un sonido es principalmente percusivo. """ try: # Separar componentes armónicos y percusivos y_harmonic, y_percussive = self.librosa.effects.hpss(y) # Calcular energía relativa energy_harmonic = np.sum(y_harmonic ** 2) energy_percussive = np.sum(y_percussive ** 2) total_energy = energy_harmonic + energy_percussive if total_energy > 0: percussive_ratio = energy_percussive / total_energy return percussive_ratio > 0.6 except Exception as e: logger.warning(f"Error en separación HPSS: {e}") # Fallback: usar duración como heurística duration = len(y) / sr return duration < 0.5 def _analyze_basic(self, file_path: str) -> AudioFeatures: """ Análisis básico sin dependencias externas. Usa metadatos del archivo y nombre para inferir características. """ path = Path(file_path) name = path.stem # Extraer del nombre bpm = self._extract_bpm_from_name(name) key = self._extract_key_from_name(name) # Estimar duración del archivo duration = self._estimate_duration(file_path) # Clasificar por nombre sample_type = self._classify_by_name(name) # Determinar características por tipo is_percussive = sample_type in [ SampleType.KICK, SampleType.SNARE, SampleType.CLAP, SampleType.HAT, SampleType.HAT_CLOSED, SampleType.HAT_OPEN, SampleType.PERC, SampleType.SHAKER, SampleType.TOM, SampleType.CRASH, SampleType.RIDE ] is_harmonic = sample_type in [ SampleType.BASS, SampleType.SYNTH, SampleType.PAD, SampleType.LEAD, SampleType.PLUCK, SampleType.CHORD, SampleType.VOCAL ] # Valores por defecto basados en tipo spectral_centroid = 5000.0 if is_percussive else 1000.0 rms_energy = 0.5 suggested_genres = self._suggest_genres(sample_type, bpm, key) return AudioFeatures( bpm=bpm, key=key, key_confidence=0.7 if key else 0.0, duration=duration, sample_rate=44100, sample_type=sample_type, spectral_centroid=spectral_centroid, spectral_rolloff=spectral_centroid * 2, zero_crossing_rate=0.1 if is_harmonic else 0.3, rms_energy=rms_energy, is_harmonic=is_harmonic, is_percussive=is_percussive, suggested_genres=suggested_genres ) def _estimate_duration(self, file_path: str) -> float: """Estima la duración del archivo de audio""" try: import wave ext = Path(file_path).suffix.lower() if ext == '.wav': with wave.open(file_path, 'rb') as wav: frames = wav.getnframes() rate = wav.getframerate() return frames / float(rate) elif ext in ('.mp3', '.ogg', '.flac', '.aif', '.aiff', '.m4a'): windows_duration = self._estimate_duration_with_windows_shell(file_path) if windows_duration > 0: return windows_duration # Estimación por tamaño de archivo size = os.path.getsize(file_path) # Aproximación: ~176KB por segundo para CD quality stereo return size / (176.4 * 1024) except Exception as e: logger.warning(f"Error estimando duración: {e}") return 0.0 def _estimate_duration_with_windows_shell(self, file_path: str) -> float: """Obtiene la duración usando metadatos del shell de Windows cuando están disponibles.""" if os.name != 'nt': return 0.0 safe_path = file_path.replace("'", "''") powershell_command = ( f"$path = '{safe_path}'; " "$shell = New-Object -ComObject Shell.Application; " "$folder = $shell.Namespace((Split-Path $path)); " "$file = $folder.ParseName((Split-Path $path -Leaf)); " "$duration = $folder.GetDetailsOf($file, 27); " "Write-Output $duration" ) try: result = subprocess.run( f'powershell -NoProfile -Command "{powershell_command}"', capture_output=True, text=True, timeout=5, check=False, shell=True, ) value = (result.stdout or "").strip() if not value: return 0.0 parts = value.split(':') if len(parts) == 3: return (int(parts[0]) * 3600) + (int(parts[1]) * 60) + float(parts[2]) return 0.0 except Exception: return 0.0 def _extract_bpm_from_name(self, name: str) -> Optional[float]: """Extrae BPM del nombre del archivo""" import re patterns = [ r'[_\s\-](\d{2,3})\s*BPM', r'[_\s\-](\d{2,3})[_\s\-]', r'(\d{2,3})bpm', r'[_\s\-](\d{2,3})\s*(?:BPM|bpm)?\s*(?:\.wav|\.mp3|\.aif)', ] for pattern in patterns: match = re.search(pattern, name, re.IGNORECASE) if match: bpm = int(match.group(1)) if 60 <= bpm <= 200: return float(bpm) return None def _extract_key_from_name(self, name: str) -> Optional[str]: """Extrae key del nombre del archivo""" import re patterns = [ r'[_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]', r'\bin\s+([A-G][#b]?(?:m|min|minor)?)\b', r'Key\s+([A-G][#b]?(?:m|min|minor)?)', r'[_\s\-]([A-G][#b]?)\s*(?:maj|major)?[_\s\-]', ] for pattern in patterns: match = re.search(pattern, name, re.IGNORECASE) if match: key = match.group(1) # Normalizar key = key.replace('b', '#').replace('Db', 'C#').replace('Eb', 'D#') key = key.replace('Gb', 'F#').replace('Ab', 'G#').replace('Bb', 'A#') # Detectar si es menor is_minor = 'm' in key.lower() or 'min' in key.lower() key = key.replace('min', '').replace('minor', '').replace('major', '') key = key.rstrip('mM') if is_minor: key = key + 'm' return key return None def _classify_sample_type(self, file_path: str, is_percussive: bool, is_harmonic: bool, duration: float, spectral_centroid: float, rms: float) -> SampleType: """Clasifica el tipo de sample basado en características""" # Primero intentar por nombre sample_type = self._classify_by_name(Path(file_path).stem) if sample_type != SampleType.UNKNOWN: return sample_type # Clasificación por características de audio if is_percussive: if duration < 0.1: if spectral_centroid < 2000: return SampleType.KICK elif spectral_centroid > 8000: return SampleType.HAT_CLOSED else: return SampleType.SNARE elif duration < 0.3: return SampleType.CLAP else: return SampleType.PERC elif is_harmonic: if spectral_centroid < 500: return SampleType.BASS elif duration > 4.0: return SampleType.PAD else: return SampleType.SYNTH return SampleType.UNKNOWN def _classify_by_name(self, name: str) -> SampleType: """Clasifica el tipo de sample basado en su nombre""" name_lower = name.lower() # Mapeo de palabras clave a tipos keywords = { SampleType.KICK: ['kick', 'bd', 'bass drum', 'kickdrum', 'kik'], SampleType.SNARE: ['snare', 'snr', 'sd', 'rim'], SampleType.CLAP: ['clap', 'clp', 'handclap'], SampleType.HAT_CLOSED: ['closed hat', 'closedhat', 'chh', 'closed'], SampleType.HAT_OPEN: ['open hat', 'openhat', 'ohh', 'open'], SampleType.HAT: ['hat', 'hihat', 'hi-hat', 'hh'], SampleType.PERC: ['perc', 'percussion', 'conga', 'bongo', 'timb'], SampleType.SHAKER: ['shaker', 'shake', 'tamb'], SampleType.TOM: ['tom', 'tomtom'], SampleType.CRASH: ['crash', 'cymbal'], SampleType.RIDE: ['ride'], SampleType.BASS: ['bass', 'bassline', 'sub', '808', 'reese'], SampleType.SYNTH: ['synth', 'lead', 'arp', 'sequence'], SampleType.PAD: ['pad', 'atmosphere', 'dron'], SampleType.PLUCK: ['pluck'], SampleType.CHORD: ['chord', 'stab'], SampleType.VOCAL: ['vocal', 'vox', 'voice', 'speech', 'talk'], SampleType.FX: ['fx', 'effect', 'sweep', 'riser', 'downlifter', 'impact', 'hit', 'noise'], SampleType.LOOP: ['loop', 'full', 'groove'], } for sample_type, words in keywords.items(): for word in words: if word in name_lower: return sample_type return SampleType.UNKNOWN def _suggest_genres(self, sample_type: SampleType, bpm: Optional[float], key: Optional[str]) -> List[str]: """Sugiere géneros musicales apropiados para el sample""" genres = [] if bpm: if 118 <= bpm <= 128: genres.extend(['house', 'tech-house', 'deep-house']) elif 124 <= bpm <= 132: genres.extend(['tech-house', 'techno']) elif 132 <= bpm <= 142: genres.extend(['techno', 'peak-time-techno']) elif 142 <= bpm <= 150: genres.extend(['trance', 'hard-techno']) elif 160 <= bpm <= 180: genres.extend(['drum-and-bass', 'neurofunk']) elif bpm < 118: genres.extend(['downtempo', 'ambient', 'lo-fi']) # Por tipo de sample if sample_type in [SampleType.KICK, SampleType.SNARE, SampleType.CLAP]: if not genres: genres = ['techno', 'house'] elif sample_type == SampleType.BASS: if not genres: genres = ['techno', 'house', 'bass-music'] elif sample_type in [SampleType.SYNTH, SampleType.PAD]: if not genres: genres = ['trance', 'progressive', 'ambient'] return genres if genres else ['electronic'] def get_compatible_key(self, key: str, shift: int = 0) -> str: """ Obtiene una key compatible usando el círculo de quintas. Args: key: Key original (ej: 'Am', 'F#m') shift: Desplazamiento en el círculo (+1 = quinta arriba, -1 = quinta abajo) Returns: Key resultante """ is_minor = key.endswith('m') root = key.rstrip('m') if root not in NOTE_NAMES: return key circle = CIRCLE_OF_FIFTHS_MINOR if is_minor else CIRCLE_OF_FIFTHS_MAJOR try: idx = circle.index(key) new_idx = (idx + shift) % 12 return circle[new_idx] except ValueError: return key def calculate_key_compatibility(self, key1: str, key2: str) -> float: """ Calcula la compatibilidad entre dos keys (0-1). Usa el círculo de quintas: keys cercanas son más compatibles. """ if key1 == key2: return 1.0 # Normalizar def normalize(k): is_minor = k.endswith('m') root = k.rstrip('m') # Convertir bemoles a sostenidos root = root.replace('Db', 'C#').replace('Eb', 'D#') root = root.replace('Gb', 'F#').replace('Ab', 'G#').replace('Bb', 'A#') return root + ('m' if is_minor else '') k1 = normalize(key1) k2 = normalize(key2) if k1 == k2: return 1.0 # Verificar si son modos diferentes de la misma nota if k1.rstrip('m') == k2.rstrip('m'): return 0.8 # Mismo root, diferente modo # Usar círculo de quintas is_minor1 = k1.endswith('m') is_minor2 = k2.endswith('m') if is_minor1 != is_minor2: return 0.3 # Diferente modo, baja compatibilidad circle = CIRCLE_OF_FIFTHS_MINOR if is_minor1 else CIRCLE_OF_FIFTHS_MAJOR try: idx1 = circle.index(k1) idx2 = circle.index(k2) distance = min(abs(idx1 - idx2), 12 - abs(idx1 - idx2)) # Compatibilidad decrece con la distancia compatibility = max(0.0, 1.0 - (distance * 0.2)) return compatibility except ValueError: return 0.0 # Instancia global _analyzer: Optional[AudioAnalyzer] = None def get_analyzer() -> AudioAnalyzer: """Obtiene la instancia global del analizador""" global _analyzer if _analyzer is None: _analyzer = AudioAnalyzer() return _analyzer def analyze_sample(file_path: str) -> Dict[str, Any]: """ Función de conveniencia para analizar un sample. Returns: Diccionario con las características del sample """ analyzer = get_analyzer() features = analyzer.analyze(file_path) return { 'bpm': features.bpm, 'key': features.key, 'key_confidence': features.key_confidence, 'duration': features.duration, 'sample_rate': features.sample_rate, 'sample_type': features.sample_type.value, 'spectral_centroid': features.spectral_centroid, 'rms_energy': features.rms_energy, 'is_harmonic': features.is_harmonic, 'is_percussive': features.is_percussive, 'suggested_genres': features.suggested_genres, } def quick_analyze(file_path: str) -> Dict[str, Any]: """ Análisis rápido basado solo en el nombre del archivo. No requiere dependencias externas. """ analyzer = AudioAnalyzer(backend="basic") features = analyzer.analyze(file_path) return { 'bpm': features.bpm, 'key': features.key, 'sample_type': features.sample_type.value, 'suggested_genres': features.suggested_genres, } # Testing if __name__ == "__main__": import sys logging.basicConfig(level=logging.INFO) if len(sys.argv) < 2: print("Uso: python audio_analyzer.py ") sys.exit(1) file_path = sys.argv[1] print(f"\nAnalizando: {file_path}") print("=" * 50) try: result = analyze_sample(file_path) print("\nResultados:") print(f" BPM: {result['bpm'] or 'No detectado'}") print(f" Key: {result['key'] or 'No detectado'} (confianza: {result['key_confidence']:.2f})") print(f" Duración: {result['duration']:.2f}s") print(f" Tipo: {result['sample_type']}") print(f" Géneros sugeridos: {', '.join(result['suggested_genres'])}") print(f" Es percusivo: {result['is_percussive']}") print(f" Es armónico: {result['is_harmonic']}") except Exception as e: print(f"Error: {e}") sys.exit(1)