ableton-mcp-ai/AbletonMCP_AI/MCP_Server/audio_analyzer.py

"""
audio_analyzer.py - Análisis de audio para detección de Key y BPM

Proporciona análisis básico de archivos de audio para extraer:
- BPM (tempo) mediante detección de onset y autocorrelación
- Key (tonalidad) mediante análisis de cromagrama
- Características espectrales para clasificación
"""

import os
import logging
import numpy as np
import subprocess
from pathlib import Path
from typing import Dict, Any, Optional, Tuple, List
from dataclasses import dataclass
from enum import Enum

logger = logging.getLogger("AudioAnalyzer")

# Constantes musicales
NOTE_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
KEY_PROFILES = {
    # Perfiles de Krumhansl-Schmuckler para detección de tonalidad
    'major': [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88],
    'minor': [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
}

CIRCLE_OF_FIFTHS_MAJOR = ['C', 'G', 'D', 'A', 'E', 'B', 'F#', 'C#', 'G#', 'D#', 'A#', 'F']
CIRCLE_OF_FIFTHS_MINOR = ['Am', 'Em', 'Bm', 'F#m', 'C#m', 'G#m', 'D#m', 'A#m', 'Fm', 'Cm', 'Gm', 'Dm']


class SampleType(Enum):
    """Tipos de samples musicales"""
    KICK = "kick"
    SNARE = "snare"
    CLAP = "clap"
    HAT_CLOSED = "hat_closed"
    HAT_OPEN = "hat_open"
    HAT = "hat"
    PERC = "perc"
    SHAKER = "shaker"
    TOM = "tom"
    CRASH = "crash"
    RIDE = "ride"
    BASS = "bass"
    SYNTH = "synth"
    PAD = "pad"
    LEAD = "lead"
    PLUCK = "pluck"
    ARP = "arp"
    CHORD = "chord"
    STAB = "stab"
    VOCAL = "vocal"
    FX = "fx"
    LOOP = "loop"
    AMBIENCE = "ambience"
    UNKNOWN = "unknown"


@dataclass
class AudioFeatures:
    """Características extraídas de un archivo de audio"""
    bpm: Optional[float]
    key: Optional[str]
    key_confidence: float
    duration: float
    sample_rate: int
    sample_type: SampleType
    spectral_centroid: float
    spectral_rolloff: float
    zero_crossing_rate: float
    rms_energy: float
    is_harmonic: bool
    is_percussive: bool
    suggested_genres: List[str]


class AudioAnalyzer:
    """
    Analizador de audio para samples musicales.

    Soporta múltiples backends:
    - librosa (recomendado, más preciso)
    - basic (fallback sin dependencias externas, basado en nombre de archivo)
    """

    def __init__(self, backend: str = "auto"):
        """
        Inicializa el analizador de audio.

        Args:
            backend: 'librosa', 'basic', o 'auto' (detecta automáticamente)
        """
        self.backend = backend
        self._librosa_available = False
        self._soundfile_available = False

        if backend in ("auto", "librosa"):
            self._check_librosa()

        if self._librosa_available:
            logger.info("Usando backend: librosa")
        else:
            logger.info("Usando backend: basic (análisis por nombre de archivo)")

    def _check_librosa(self):
        """Verifica si librosa está disponible"""
        try:
            import librosa
            import soundfile as sf
            self._librosa_available = True
            self._soundfile_available = True
            self.librosa = librosa
            self.sf = sf
        except ImportError:
            self._librosa_available = False
            self._soundfile_available = False

    def analyze(self, file_path: str) -> AudioFeatures:
        """
        Analiza un archivo de audio y extrae características.

        Args:
            file_path: Ruta al archivo de audio

        Returns:
            AudioFeatures con los datos extraídos
        """
        path = Path(file_path)

        if not path.exists():
            raise FileNotFoundError(f"Archivo no encontrado: {file_path}")

        # Intentar análisis con librosa si está disponible
        if self._librosa_available:
            try:
                return self._analyze_with_librosa(file_path)
            except Exception as e:
                logger.warning(f"Error con librosa: {e}, usando análisis básico")

        # Fallback a análisis básico
        return self._analyze_basic(file_path)

    def _analyze_with_librosa(self, file_path: str) -> AudioFeatures:
        """Análisis completo usando librosa"""
        # Cargar audio
        y, sr = self.librosa.load(file_path, sr=None, mono=True)

        # Duración
        duration = self.librosa.get_duration(y=y, sr=sr)

        # Detectar BPM
        tempo, _ = self.librosa.beat.beat_track(y=y, sr=sr)
        bpm = float(tempo) if isinstance(tempo, (int, float, np.number)) else None

        # Análisis espectral
        spectral_centroids = self.librosa.feature.spectral_centroid(y=y, sr=sr)[0]
        spectral_rolloffs = self.librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
        zcr = self.librosa.feature.zero_crossing_rate(y)[0]
        rms = self.librosa.feature.rms(y=y)[0]

        # Detectar key
        key, key_confidence = self._detect_key_librosa(y, sr)

        # Clasificación percusivo vs armónico
        is_percussive = self._is_percussive(y, sr)
        is_harmonic = not is_percussive and duration > 1.0

        # Determinar tipo de sample
        sample_type = self._classify_sample_type(
            file_path, is_percussive, is_harmonic, duration,
            float(np.mean(spectral_centroids)), float(np.mean(rms))
        )

        # Sugerir géneros
        suggested_genres = self._suggest_genres(sample_type, bpm, key)

        return AudioFeatures(
            bpm=bpm,
            key=key,
            key_confidence=key_confidence,
            duration=duration,
            sample_rate=sr,
            sample_type=sample_type,
            spectral_centroid=float(np.mean(spectral_centroids)),
            spectral_rolloff=float(np.mean(spectral_rolloffs)),
            zero_crossing_rate=float(np.mean(zcr)),
            rms_energy=float(np.mean(rms)),
            is_harmonic=is_harmonic,
            is_percussive=is_percussive,
            suggested_genres=suggested_genres
        )

    def _detect_key_librosa(self, y: np.ndarray, sr: int) -> Tuple[Optional[str], float]:
        """
        Detecta la tonalidad usando cromagrama y correlación con perfiles.
        """
        try:
            # Calcular cromagrama
            chroma = self.librosa.feature.chroma_stft(y=y, sr=sr)
            chroma_avg = np.mean(chroma, axis=1)

            # Normalizar
            chroma_avg = chroma_avg / (np.sum(chroma_avg) + 1e-10)

            best_key = None
            best_score = -np.inf
            best_mode = None

            # Probar todas las tonalidades mayores y menores
            for mode, profile in KEY_PROFILES.items():
                for i in range(12):
                    # Rotar el perfil
                    rotated_profile = np.roll(profile, i)
                    # Correlación
                    score = np.corrcoef(chroma_avg, rotated_profile)[0, 1]

                    if score > best_score:
                        best_score = score
                        best_mode = mode
                        best_key = NOTE_NAMES[i]

            # Formatear resultado
            if best_key:
                if best_mode == 'minor':
                    best_key = best_key + 'm'
                confidence = max(0.0, min(1.0, (best_score + 1) / 2))
                return best_key, confidence

        except Exception as e:
            logger.warning(f"Error detectando key: {e}")

        return None, 0.0

    def _is_percussive(self, y: np.ndarray, sr: int) -> bool:
        """
        Determina si un sonido es principalmente percusivo.
        """
        try:
            # Separar componentes armónicos y percusivos
            y_harmonic, y_percussive = self.librosa.effects.hpss(y)

            # Calcular energía relativa
            energy_harmonic = np.sum(y_harmonic ** 2)
            energy_percussive = np.sum(y_percussive ** 2)
            total_energy = energy_harmonic + energy_percussive

            if total_energy > 0:
                percussive_ratio = energy_percussive / total_energy
                return percussive_ratio > 0.6

        except Exception as e:
            logger.warning(f"Error en separación HPSS: {e}")

        # Fallback: usar duración como heurística
        duration = len(y) / sr
        return duration < 0.5

    def _analyze_basic(self, file_path: str) -> AudioFeatures:
        """
        Análisis básico sin dependencias externas.
        Usa metadatos del archivo y nombre para inferir características.
        """
        path = Path(file_path)
        name = path.stem

        # Extraer del nombre
        bpm = self._extract_bpm_from_name(name)
        key = self._extract_key_from_name(name)

        # Estimar duración del archivo
        duration = self._estimate_duration(file_path)

        # Clasificar por nombre
        sample_type = self._classify_by_name(name)

        # Determinar características por tipo
        is_percussive = sample_type in [
            SampleType.KICK, SampleType.SNARE, SampleType.CLAP,
            SampleType.HAT, SampleType.HAT_CLOSED, SampleType.HAT_OPEN,
            SampleType.PERC, SampleType.SHAKER, SampleType.TOM,
            SampleType.CRASH, SampleType.RIDE
        ]
        is_harmonic = sample_type in [
            SampleType.BASS, SampleType.SYNTH, SampleType.PAD,
            SampleType.LEAD, SampleType.PLUCK, SampleType.CHORD,
            SampleType.VOCAL
        ]

        # Valores por defecto basados en tipo
        spectral_centroid = 5000.0 if is_percussive else 1000.0
        rms_energy = 0.5

        suggested_genres = self._suggest_genres(sample_type, bpm, key)

        return AudioFeatures(
            bpm=bpm,
            key=key,
            key_confidence=0.7 if key else 0.0,
            duration=duration,
            sample_rate=44100,
            sample_type=sample_type,
            spectral_centroid=spectral_centroid,
            spectral_rolloff=spectral_centroid * 2,
            zero_crossing_rate=0.1 if is_harmonic else 0.3,
            rms_energy=rms_energy,
            is_harmonic=is_harmonic,
            is_percussive=is_percussive,
            suggested_genres=suggested_genres
        )

    def _estimate_duration(self, file_path: str) -> float:
        """Estima la duración del archivo de audio"""
        try:
            import wave

            ext = Path(file_path).suffix.lower()

            if ext == '.wav':
                with wave.open(file_path, 'rb') as wav:
                    frames = wav.getnframes()
                    rate = wav.getframerate()
                    return frames / float(rate)

            elif ext in ('.mp3', '.ogg', '.flac', '.aif', '.aiff', '.m4a'):
                windows_duration = self._estimate_duration_with_windows_shell(file_path)
                if windows_duration > 0:
                    return windows_duration
                # Estimación por tamaño de archivo
                size = os.path.getsize(file_path)
                # Aproximación: ~176KB por segundo para CD quality stereo
                return size / (176.4 * 1024)

        except Exception as e:
            logger.warning(f"Error estimando duración: {e}")

        return 0.0

    def _estimate_duration_with_windows_shell(self, file_path: str) -> float:
        """Obtiene la duración usando metadatos del shell de Windows cuando están disponibles."""
        if os.name != 'nt':
            return 0.0

        safe_path = file_path.replace("'", "''")
        powershell_command = (
            f"$path = '{safe_path}'; "
            "$shell = New-Object -ComObject Shell.Application; "
            "$folder = $shell.Namespace((Split-Path $path)); "
            "$file = $folder.ParseName((Split-Path $path -Leaf)); "
            "$duration = $folder.GetDetailsOf($file, 27); "
            "Write-Output $duration"
        )
        try:
            result = subprocess.run(
                f'powershell -NoProfile -Command "{powershell_command}"',
                capture_output=True,
                text=True,
                timeout=5,
                check=False,
                shell=True,
            )
            value = (result.stdout or "").strip()
            if not value:
                return 0.0
            parts = value.split(':')
            if len(parts) == 3:
                return (int(parts[0]) * 3600) + (int(parts[1]) * 60) + float(parts[2])
            return 0.0
        except Exception:
            return 0.0

    def _extract_bpm_from_name(self, name: str) -> Optional[float]:
        """Extrae BPM del nombre del archivo"""
        import re

        patterns = [
            r'[_\s\-](\d{2,3})\s*BPM',
            r'[_\s\-](\d{2,3})[_\s\-]',
            r'(\d{2,3})bpm',
            r'[_\s\-](\d{2,3})\s*(?:BPM|bpm)?\s*(?:\.wav|\.mp3|\.aif)',
        ]

        for pattern in patterns:
            match = re.search(pattern, name, re.IGNORECASE)
            if match:
                bpm = int(match.group(1))
                if 60 <= bpm <= 200:
                    return float(bpm)

        return None

    def _extract_key_from_name(self, name: str) -> Optional[str]:
        """Extrae key del nombre del archivo"""
        import re

        patterns = [
            r'[_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]',
            r'\bin\s+([A-G][#b]?(?:m|min|minor)?)\b',
            r'Key\s+([A-G][#b]?(?:m|min|minor)?)',
            r'[_\s\-]([A-G][#b]?)\s*(?:maj|major)?[_\s\-]',
        ]

        for pattern in patterns:
            match = re.search(pattern, name, re.IGNORECASE)
            if match:
                key = match.group(1)
                # Normalizar
                key = key.replace('b', '#').replace('Db', 'C#').replace('Eb', 'D#')
                key = key.replace('Gb', 'F#').replace('Ab', 'G#').replace('Bb', 'A#')

                # Detectar si es menor
                is_minor = 'm' in key.lower() or 'min' in key.lower()
                key = key.replace('min', '').replace('minor', '').replace('major', '')
                key = key.rstrip('mM')

                if is_minor:
                    key = key + 'm'

                return key

        return None

    def _classify_sample_type(self, file_path: str, is_percussive: bool,
                               is_harmonic: bool, duration: float,
                               spectral_centroid: float, rms: float) -> SampleType:
        """Clasifica el tipo de sample basado en características"""
        # Primero intentar por nombre
        sample_type = self._classify_by_name(Path(file_path).stem)
        if sample_type != SampleType.UNKNOWN:
            return sample_type

        # Clasificación por características de audio
        if is_percussive:
            if duration < 0.1:
                if spectral_centroid < 2000:
                    return SampleType.KICK
                elif spectral_centroid > 8000:
                    return SampleType.HAT_CLOSED
                else:
                    return SampleType.SNARE
            elif duration < 0.3:
                return SampleType.CLAP
            else:
                return SampleType.PERC

        elif is_harmonic:
            if spectral_centroid < 500:
                return SampleType.BASS
            elif duration > 4.0:
                return SampleType.PAD
            else:
                return SampleType.SYNTH

        return SampleType.UNKNOWN

    def _classify_by_name(self, name: str) -> SampleType:
        """Clasifica el tipo de sample basado en su nombre"""
        name_lower = name.lower()

        # Mapeo de palabras clave a tipos
        keywords = {
            SampleType.KICK: ['kick', 'bd', 'bass drum', 'kickdrum', 'kik'],
            SampleType.SNARE: ['snare', 'snr', 'sd', 'rim'],
            SampleType.CLAP: ['clap', 'clp', 'handclap'],
            SampleType.HAT_CLOSED: ['closed hat', 'closedhat', 'chh', 'closed'],
            SampleType.HAT_OPEN: ['open hat', 'openhat', 'ohh', 'open'],
            SampleType.HAT: ['hat', 'hihat', 'hi-hat', 'hh'],
            SampleType.PERC: ['perc', 'percussion', 'conga', 'bongo', 'timb'],
            SampleType.SHAKER: ['shaker', 'shake', 'tamb'],
            SampleType.TOM: ['tom', 'tomtom'],
            SampleType.CRASH: ['crash', 'cymbal'],
            SampleType.RIDE: ['ride'],
            SampleType.BASS: ['bass', 'bassline', 'sub', '808', 'reese'],
            SampleType.SYNTH: ['synth', 'lead', 'arp', 'sequence'],
            SampleType.PAD: ['pad', 'atmosphere', 'dron'],
            SampleType.PLUCK: ['pluck'],
            SampleType.CHORD: ['chord', 'stab'],
            SampleType.VOCAL: ['vocal', 'vox', 'voice', 'speech', 'talk'],
            SampleType.FX: ['fx', 'effect', 'sweep', 'riser', 'downlifter', 'impact', 'hit', 'noise'],
            SampleType.LOOP: ['loop', 'full', 'groove'],
        }

        for sample_type, words in keywords.items():
            for word in words:
                if word in name_lower:
                    return sample_type

        return SampleType.UNKNOWN

    def _suggest_genres(self, sample_type: SampleType, bpm: Optional[float],
                        key: Optional[str]) -> List[str]:
        """Sugiere géneros musicales apropiados para el sample"""
        genres = []

        if bpm:
            if 118 <= bpm <= 128:
                genres.extend(['house', 'tech-house', 'deep-house'])
            elif 124 <= bpm <= 132:
                genres.extend(['tech-house', 'techno'])
            elif 132 <= bpm <= 142:
                genres.extend(['techno', 'peak-time-techno'])
            elif 142 <= bpm <= 150:
                genres.extend(['trance', 'hard-techno'])
            elif 160 <= bpm <= 180:
                genres.extend(['drum-and-bass', 'neurofunk'])
            elif bpm < 118:
                genres.extend(['downtempo', 'ambient', 'lo-fi'])

        # Por tipo de sample
        if sample_type in [SampleType.KICK, SampleType.SNARE, SampleType.CLAP]:
            if not genres:
                genres = ['techno', 'house']
        elif sample_type == SampleType.BASS:
            if not genres:
                genres = ['techno', 'house', 'bass-music']
        elif sample_type in [SampleType.SYNTH, SampleType.PAD]:
            if not genres:
                genres = ['trance', 'progressive', 'ambient']

        return genres if genres else ['electronic']

    def get_compatible_key(self, key: str, shift: int = 0) -> str:
        """
        Obtiene una key compatible usando el círculo de quintas.

        Args:
            key: Key original (ej: 'Am', 'F#m')
            shift: Desplazamiento en el círculo (+1 = quinta arriba, -1 = quinta abajo)

        Returns:
            Key resultante
        """
        is_minor = key.endswith('m')
        root = key.rstrip('m')

        if root not in NOTE_NAMES:
            return key

        circle = CIRCLE_OF_FIFTHS_MINOR if is_minor else CIRCLE_OF_FIFTHS_MAJOR

        try:
            idx = circle.index(key)
            new_idx = (idx + shift) % 12
            return circle[new_idx]
        except ValueError:
            return key

    def calculate_key_compatibility(self, key1: str, key2: str) -> float:
        """
        Calcula la compatibilidad entre dos keys (0-1).

        Usa el círculo de quintas: keys cercanas son más compatibles.
        """
        if key1 == key2:
            return 1.0

        # Normalizar
        def normalize(k):
            is_minor = k.endswith('m')
            root = k.rstrip('m')
            # Convertir bemoles a sostenidos
            root = root.replace('Db', 'C#').replace('Eb', 'D#')
            root = root.replace('Gb', 'F#').replace('Ab', 'G#').replace('Bb', 'A#')
            return root + ('m' if is_minor else '')

        k1 = normalize(key1)
        k2 = normalize(key2)

        if k1 == k2:
            return 1.0

        # Verificar si son modos diferentes de la misma nota
        if k1.rstrip('m') == k2.rstrip('m'):
            return 0.8  # Mismo root, diferente modo

        # Usar círculo de quintas
        is_minor1 = k1.endswith('m')
        is_minor2 = k2.endswith('m')

        if is_minor1 != is_minor2:
            return 0.3  # Diferente modo, baja compatibilidad

        circle = CIRCLE_OF_FIFTHS_MINOR if is_minor1 else CIRCLE_OF_FIFTHS_MAJOR

        try:
            idx1 = circle.index(k1)
            idx2 = circle.index(k2)
            distance = min(abs(idx1 - idx2), 12 - abs(idx1 - idx2))

            # Compatibilidad decrece con la distancia
            compatibility = max(0.0, 1.0 - (distance * 0.2))
            return compatibility

        except ValueError:
            return 0.0


# Instancia global
_analyzer: Optional[AudioAnalyzer] = None


def get_analyzer() -> AudioAnalyzer:
    """Obtiene la instancia global del analizador"""
    global _analyzer
    if _analyzer is None:
        _analyzer = AudioAnalyzer()
    return _analyzer


def analyze_sample(file_path: str) -> Dict[str, Any]:
    """
    Función de conveniencia para analizar un sample.

    Returns:
        Diccionario con las características del sample
    """
    analyzer = get_analyzer()
    features = analyzer.analyze(file_path)

    return {
        'bpm': features.bpm,
        'key': features.key,
        'key_confidence': features.key_confidence,
        'duration': features.duration,
        'sample_rate': features.sample_rate,
        'sample_type': features.sample_type.value,
        'spectral_centroid': features.spectral_centroid,
        'rms_energy': features.rms_energy,
        'is_harmonic': features.is_harmonic,
        'is_percussive': features.is_percussive,
        'suggested_genres': features.suggested_genres,
    }


def quick_analyze(file_path: str) -> Dict[str, Any]:
    """
    Análisis rápido basado solo en el nombre del archivo.
    No requiere dependencias externas.
    """
    analyzer = AudioAnalyzer(backend="basic")
    features = analyzer.analyze(file_path)

    return {
        'bpm': features.bpm,
        'key': features.key,
        'sample_type': features.sample_type.value,
        'suggested_genres': features.suggested_genres,
    }


# Testing
if __name__ == "__main__":
    import sys

    logging.basicConfig(level=logging.INFO)

    if len(sys.argv) < 2:
        print("Uso: python audio_analyzer.py <archivo_de_audio>")
        sys.exit(1)

    file_path = sys.argv[1]

    print(f"\nAnalizando: {file_path}")
    print("=" * 50)

    try:
        result = analyze_sample(file_path)

        print("\nResultados:")
        print(f"  BPM: {result['bpm'] or 'No detectado'}")
        print(f"  Key: {result['key'] or 'No detectado'} (confianza: {result['key_confidence']:.2f})")
        print(f"  Duración: {result['duration']:.2f}s")
        print(f"  Tipo: {result['sample_type']}")
        print(f"  Géneros sugeridos: {', '.join(result['suggested_genres'])}")
        print(f"  Es percusivo: {result['is_percussive']}")
        print(f"  Es armónico: {result['is_harmonic']}")

    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)