ableton-mcp-ai/mcp_server/engines/audio_analyzer_dual.py

"""
AudioAnalyzerDual - Dual-backend audio analyzer for AbletonMCP_AI

Primary: librosa for full spectral analysis
Fallback: filename-based inference when librosa unavailable

This module provides intelligent audio sample analysis with graceful
degradation when heavy dependencies aren't available.
"""

import os
import re
import wave
import struct
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Tuple, Any
from pathlib import Path


@dataclass
class AudioFeatures:
    """Complete audio feature set for sample analysis."""
    bpm: Optional[float]
    key: Optional[str]
    key_confidence: float
    duration: float
    sample_rate: int
    sample_type: str
    spectral_centroid: float
    spectral_rolloff: float
    zero_crossing_rate: float
    rms_energy: float
    is_harmonic: bool
    is_percussive: bool
    suggested_genres: List[str] = field(default_factory=list)
    groove_template: Optional[Dict] = None
    transients: Optional[List[float]] = None

    def to_dict(self) -> Dict[str, Any]:
        """Convert features to dictionary for serialization."""
        return {
            'bpm': self.bpm,
            'key': self.key,
            'key_confidence': self.key_confidence,
            'duration': self.duration,
            'sample_rate': self.sample_rate,
            'sample_type': self.sample_type,
            'spectral_centroid': self.spectral_centroid,
            'spectral_rolloff': self.spectral_rolloff,
            'zero_crossing_rate': self.zero_crossing_rate,
            'rms_energy': self.rms_energy,
            'is_harmonic': self.is_harmonic,
            'is_percussive': self.is_percussive,
            'suggested_genres': self.suggested_genres,
            'groove_template': self.groove_template,
            'transients': self.transients
        }


class AudioAnalyzerDual:
    """
    Dual-backend audio analyzer:
    - Primary: librosa for full spectral analysis
    - Fallback: filename-based inference when librosa unavailable
    """

    # Key profiles for Krumhansl-Schmuckler algorithm (major and minor)
    KRUMHANSL_MAJOR = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
    KRUMHANSL_MINOR = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]

    # Circle of fifths positions for key detection
    KEY_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    KEY_NAMES_FLAT = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']

    # Genre suggestions based on BPM ranges
    GENRE_BPM_RANGES = {
        'reggaeton': (85, 100),
        'trap': (130, 150),
        'hip_hop': (85, 110),
        'house': (120, 130),
        'techno': (125, 140),
        'dubstep': (140, 150),
        'drum_and_bass': (160, 180),
        'pop': (100, 130),
        'rock': (120, 140),
        'jazz': (120, 180),
        'ambient': (60, 85),
        'lofi': (70, 90)
    }

    # Sample type keywords for filename-based classification
    TYPE_KEYWORDS = {
        'kick': ['kick', 'bd', 'bass_drum', 'kck'],
        'snare': ['snare', 'sd', 'rim', 'snr'],
        'clap': ['clap', 'cp'],
        'hihat': ['hihat', 'hat', 'hh', 'hi_hat', 'openhat', 'closedhat'],
        'perc': ['perc', 'percussion', 'bongo', 'conga', 'timbal'],
        'tom': ['tom', 'toms'],
        'cymbal': ['cymbal', 'crash', 'ride', 'splash'],
        'bass': ['bass', 'sub', '808', 'bassline'],
        'synth': ['synth', 'pad', 'lead', 'pluck', 'arp'],
        'fx': ['fx', 'effect', 'riser', 'downer', 'sweep', 'impact'],
        'vocal': ['vocal', 'voice', 'vox', 'chant'],
        'loop': ['loop', 'full', 'groove']
    }

    def __init__(self, backend="auto"):
        """Initialize the analyzer with specified backend."""
        self.backend = self._detect_backend(backend)
        self.librosa = None
        self.numpy = None
        self._init_libraries()

    def _detect_backend(self, preferred):
        """Detect and return the appropriate backend."""
        if preferred == "librosa":
            try:
                import librosa
                import numpy as np
                return "librosa"
            except ImportError:
                return "basic"
        elif preferred == "basic":
            return "basic"
        else:  # auto
            try:
                import librosa
                import numpy as np
                return "librosa"
            except ImportError:
                return "basic"

    def _init_libraries(self):
        """Initialize library references if available."""
        if self.backend == "librosa":
            try:
                import librosa
                import numpy as np
                self.librosa = librosa
                self.numpy = np
            except ImportError:
                self.backend = "basic"
                self.librosa = None
                self.numpy = None

    def analyze_sample(self, file_path):
        """
        Main entry point for audio analysis.

        Args:
            file_path: Path to audio file

        Returns:
            AudioFeatures dataclass with analysis results
        """
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"Audio file not found: {file_path}")

        if self.backend == "librosa":
            try:
                return self._analyze_with_librosa(file_path)
            except Exception as e:
                # Fall back to basic analysis if librosa fails
                return self._analyze_basic(file_path, error_context=str(e))
        else:
            return self._analyze_basic(file_path)

    def _analyze_with_librosa(self, file_path):
        """
        Full analysis using librosa:
        1. Load audio: librosa.load()
        2. Detect BPM: librosa.beat.beat_track()
        3. Extract spectral: centroid, rolloff, zcr, rms
        4. Detect key: chromagram + Krumhansl-Schmuckler
        5. HPSS: harmonic/percussive separation
        6. Classify type based on features
        7. Extract groove template (for drums)
        8. Suggest genres based on BPM
        """
        y, sr = self.librosa.load(file_path, sr=None)

        # Basic info
        duration = self.librosa.get_duration(y=y, sr=sr)

        # BPM detection
        bpm = self._detect_bpm_librosa(y, sr)

        # Spectral features
        spectral_centroid = float(self.numpy.mean(self.librosa.feature.spectral_centroid(y=y, sr=sr)))
        spectral_rolloff = float(self.numpy.mean(self.librosa.feature.spectral_rolloff(y=y, sr=sr)))
        zero_crossing_rate = float(self.numpy.mean(self.librosa.feature.zero_crossing_rate(y)))
        rms_energy = float(self.numpy.mean(self.librosa.feature.rms(y=y)))

        # Key detection
        key, key_confidence = self._detect_key_librosa(y, sr)

        # HPSS separation
        y_harmonic, y_percussive = self.librosa.effects.hpss(y)
        harmonic_energy = self.numpy.sum(y_harmonic ** 2)
        percussive_energy = self.numpy.sum(y_percussive ** 2)
        total_energy = harmonic_energy + percussive_energy

        is_harmonic = (harmonic_energy / total_energy) > 0.6 if total_energy > 0 else False
        is_percussive = (percussive_energy / total_energy) > 0.6 if total_energy > 0 else False

        # Classify sample type
        sample_type = self._classify_sample_type(file_path, is_harmonic, is_percussive, spectral_centroid)

        # Extract groove template for drum loops
        groove_template = None
        transients = None
        if is_percussive or sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'loop']:
            groove_template = self._extract_groove_template(y, sr)
            transients = groove_template.get('transient_positions', []) if groove_template else []

        # Genre suggestions
        suggested_genres = self._suggest_genres(bpm)

        return AudioFeatures(
            bpm=bpm,
            key=key,
            key_confidence=key_confidence,
            duration=duration,
            sample_rate=sr,
            sample_type=sample_type,
            spectral_centroid=spectral_centroid,
            spectral_rolloff=spectral_rolloff,
            zero_crossing_rate=zero_crossing_rate,
            rms_energy=rms_energy,
            is_harmonic=is_harmonic,
            is_percussive=is_percussive,
            suggested_genres=suggested_genres,
            groove_template=groove_template,
            transients=transients
        )

    def _analyze_basic(self, file_path, error_context=None):
        """
        Filename-based analysis:
        - Extract BPM from filename patterns
        - Extract key from filename patterns
        - Estimate duration (if wave module available)
        - Classify type by keyword matching
        - Set default spectral features based on type
        """
        filename = os.path.basename(file_path)

        # Extract info from filename
        bpm = self._extract_bpm_from_name(filename)
        key = self._extract_key_from_name(filename)
        sample_type = self._classify_by_filename(filename)

        # Try to get duration from wave header
        duration, sample_rate = self._get_wave_info(file_path)

        # Set default spectral features based on type
        defaults = self._get_default_features_by_type(sample_type)

        # Suggest genres based on BPM
        suggested_genres = self._suggest_genres(bpm)

        # Determine harmonic/percussive nature by type
        is_harmonic = sample_type in ['synth', 'bass', 'vocal', 'pad', 'lead', 'pluck']
        is_percussive = sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'tom', 'cymbal']

        return AudioFeatures(
            bpm=bpm,
            key=key,
            key_confidence=0.5 if key else 0.0,  # Moderate confidence for filename-based
            duration=duration,
            sample_rate=sample_rate,
            sample_type=sample_type,
            spectral_centroid=defaults['spectral_centroid'],
            spectral_rolloff=defaults['spectral_rolloff'],
            zero_crossing_rate=defaults['zero_crossing_rate'],
            rms_energy=defaults['rms_energy'],
            is_harmonic=is_harmonic,
            is_percussive=is_percussive,
            suggested_genres=suggested_genres,
            groove_template=None,
            transients=None
        )

    def _detect_key_librosa(self, y, sr):
        """
        Uses chromagram and Krumhansl-Schmuckler key profiles.

        Returns:
            (key, confidence)
        """
        # Compute chromagram
        chromagram = self.librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_mean = self.numpy.mean(chromagram, axis=1)

        # Calculate correlation with major and minor profiles for all keys
        best_score = -1
        best_key = None
        best_mode = None

        for shift in range(12):
            # Rotate chroma to test this key
            rotated_chroma = self.numpy.roll(chroma_mean, shift)

            # Normalize
            rotated_chroma = rotated_chroma / (self.numpy.sum(rotated_chroma) + 1e-10)

            # Correlation with major
            major_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MAJOR)[0, 1]
            if major_corr > best_score:
                best_score = major_corr
                best_key = shift
                best_mode = 'major'

            # Correlation with minor
            minor_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MINOR)[0, 1]
            if minor_corr > best_score:
                best_score = minor_corr
                best_key = shift
                best_mode = 'minor'

        # Convert to key name
        key_name = self.KEY_NAMES[best_key]
        if best_mode == 'minor':
            key_name += 'm'

        # Confidence is the correlation score (normalized to 0-1)
        confidence = (best_score + 1) / 2  # Convert from [-1, 1] to [0, 1]
        confidence = max(0.0, min(1.0, confidence))

        return key_name, confidence

    def _extract_key_from_name(self, filename):
        r"""
        Extract key from filename using regex patterns.

        Patterns:
        - [_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]
        - \bin\s+([A-G][#b]?(?:m|min|minor)?)\b
        - Key[_\s]?([A-G][#b]?m?)
        """
        # Pattern 1: Key surrounded by separators
        pattern1 = r'[_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]'
        match = re.search(pattern1, filename, re.IGNORECASE)
        if match:
            return self._normalize_key(match.group(1))

        # Pattern 2: "in Key" format
        pattern2 = r'\bin\s+([A-G][#b]?(?:m|min|minor)?)\b'
        match = re.search(pattern2, filename, re.IGNORECASE)
        if match:
            return self._normalize_key(match.group(1))

        # Pattern 3: Key prefix
        pattern3 = r'Key[_\s]?([A-G][#b]?m?)'
        match = re.search(pattern3, filename, re.IGNORECASE)
        if match:
            return self._normalize_key(match.group(1))

        return None

    def _normalize_key(self, key_str):
        """Normalize key string to standard format."""
        key_str = key_str.strip().upper()

        # Handle variations
        if 'MINOR' in key_str or key_str.endswith('MIN'):
            root = key_str.replace('MINOR', '').replace('MIN', '').strip()
            return root + 'm'

        # Handle flat/sharp notation
        if 'B' in key_str and '#' not in key_str and len(key_str) > 1:
            # Convert flats to sharps where applicable
            flat_to_sharp = {'DB': 'C#', 'EB': 'D#', 'GB': 'F#', 'AB': 'G#', 'BB': 'A#'}
            root = key_str.rstrip('M').rstrip('m')
            if root in flat_to_sharp:
                key_str = flat_to_sharp[root] + ('m' if 'm' in key_str.lower() else '')

        return key_str

    def _detect_bpm_librosa(self, y, sr):
        """Detect BPM using librosa.beat.beat_track()."""
        try:
            tempo, _ = self.librosa.beat.beat_track(y=y, sr=sr)
            if isinstance(tempo, self.numpy.ndarray):
                tempo = float(tempo.item())
            return float(tempo) if tempo > 0 else None
        except Exception:
            return None

    def _extract_bpm_from_name(self, filename):
        r"""
        Extract BPM from filename using regex patterns.

        Patterns:
        - [_\s\-](\d{2,3})\s*BPM
        - [_\s\-](\d{2,3})[_\s\-]
        - (\d{2,3})bpm

        Range validation: 60-200 BPM
        """
        # Pattern 1: Explicit BPM suffix
        pattern1 = r'[_\s\-](\d{2,3})\s*BPM'
        match = re.search(pattern1, filename, re.IGNORECASE)
        if match:
            bpm = int(match.group(1))
            if 60 <= bpm <= 200:
                return float(bpm)

        # Pattern 2: Number surrounded by separators
        pattern2 = r'[_\s\-](\d{2,3})[_\s\-]'
        matches = re.findall(pattern2, filename)
        for m in matches:
            bpm = int(m)
            if 60 <= bpm <= 200:
                return float(bpm)

        # Pattern 3: BPM suffix without separator
        pattern3 = r'(\d{2,3})bpm'
        match = re.search(pattern3, filename, re.IGNORECASE)
        if match:
            bpm = int(match.group(1))
            if 60 <= bpm <= 200:
                return float(bpm)

        return None

    def _extract_groove_template(self, y, sr):
        """
        Extract groove template for drum loops.

        For drum loops:
        1. Detect transients: librosa.onset.onset_detect()
        2. Filter by RMS threshold
        3. Categorize by velocity: kick-like, snare-like, hat-like
        4. Map to beat grid
        5. Return template dict
        """
        # Detect onsets
        onset_frames = self.librosa.onset.onset_detect(y=y, sr=sr)
        onset_times = self.librosa.frames_to_time(onset_frames, sr=sr)

        # Calculate RMS around each onset for velocity
        hop_length = 512
        rms = self.librosa.feature.rms(y=y, hop_length=hop_length)[0]

        # Filter by RMS threshold
        rms_threshold = self.numpy.mean(rms) * 0.5

        transients = []
        for onset_time in onset_times:
            frame_idx = self.librosa.time_to_frames(onset_time, sr=sr, hop_length=hop_length)
            if frame_idx < len(rms) and rms[frame_idx] > rms_threshold:
                transients.append({
                    'time': float(onset_time),
                    'velocity': float(rms[frame_idx]),
                    'category': self._categorize_transient(rms[frame_idx], self.numpy.mean(rms))
                })

        # Map to beat grid (assume 4/4, map to 16th notes)
        if transients:
            max_time = max(t['time'] for t in transients)
            num_beats = max(4, int(max_time / (60.0 / 95.0)))  # Assume 95 BPM if unknown

            grid_positions = []
            for t in transients:
                beat_pos = (t['time'] / max_time) * num_beats
                sixteenth = int((beat_pos % 1) * 16)
                grid_positions.append({
                    'beat': int(beat_pos),
                    'sixteenth': sixteenth,
                    'velocity': t['velocity'],
                    'category': t['category']
                })

            return {
                'transient_positions': [t['time'] for t in transients],
                'grid_positions': grid_positions,
                'num_beats': num_beats,
                'kick_positions': [p for p in grid_positions if p['category'] == 'kick'],
                'snare_positions': [p for p in grid_positions if p['category'] == 'snare'],
                'hat_positions': [p for p in grid_positions if p['category'] == 'hat']
            }

        return None

    def _categorize_transient(self, velocity, mean_rms):
        """Categorize transient by velocity level."""
        ratio = velocity / (mean_rms + 1e-10)
        if ratio > 1.5:
            return 'kick'
        elif ratio > 0.8:
            return 'snare'
        else:
            return 'hat'

    def _classify_sample_type(self, file_path, is_harmonic, is_percussive, spectral_centroid):
        """Classify sample type based on analysis and filename."""
        filename = os.path.basename(file_path).lower()

        # First try filename matching
        type_by_name = self._classify_by_filename(filename)
        if type_by_name != 'unknown':
            return type_by_name

        # Fall back to spectral classification
        if is_percussive:
            if spectral_centroid < 500:
                return 'kick'
            elif spectral_centroid < 2000:
                return 'snare'
            elif spectral_centroid < 8000:
                return 'hihat'
            else:
                return 'cymbal'
        elif is_harmonic:
            if spectral_centroid < 500:
                return 'bass'
            elif spectral_centroid < 2000:
                return 'synth'
            else:
                return 'synth'

        return 'unknown'

    def _classify_by_filename(self, filename):
        """Classify sample type by keywords in filename."""
        filename_lower = filename.lower()

        for sample_type, keywords in self.TYPE_KEYWORDS.items():
            for keyword in keywords:
                if keyword in filename_lower:
                    return sample_type

        return 'unknown'

    def _get_default_features_by_type(self, sample_type):
        """Return default spectral features based on sample type."""
        defaults = {
            'kick': {'spectral_centroid': 300, 'spectral_rolloff': 800, 'zero_crossing_rate': 0.05, 'rms_energy': 0.3},
            'snare': {'spectral_centroid': 1500, 'spectral_rolloff': 4000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.25},
            'clap': {'spectral_centroid': 2000, 'spectral_rolloff': 5000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2},
            'hihat': {'spectral_centroid': 8000, 'spectral_rolloff': 15000, 'zero_crossing_rate': 0.3, 'rms_energy': 0.1},
            'perc': {'spectral_centroid': 2500, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.2, 'rms_energy': 0.2},
            'tom': {'spectral_centroid': 800, 'spectral_rolloff': 2000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.25},
            'cymbal': {'spectral_centroid': 10000, 'spectral_rolloff': 18000, 'zero_crossing_rate': 0.35, 'rms_energy': 0.15},
            'bass': {'spectral_centroid': 400, 'spectral_rolloff': 1200, 'zero_crossing_rate': 0.03, 'rms_energy': 0.2},
            'synth': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.15},
            'fx': {'spectral_centroid': 5000, 'spectral_rolloff': 12000, 'zero_crossing_rate': 0.25, 'rms_energy': 0.2},
            'vocal': {'spectral_centroid': 2000, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.18},
            'loop': {'spectral_centroid': 2500, 'spectral_rolloff': 7000, 'zero_crossing_rate': 0.12, 'rms_energy': 0.2},
            'unknown': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2}
        }

        return defaults.get(sample_type, defaults['unknown'])

    def _suggest_genres(self, bpm):
        """Suggest genres based on BPM."""
        if bpm is None:
            return []

        suggestions = []
        for genre, (min_bpm, max_bpm) in self.GENRE_BPM_RANGES.items():
            if min_bpm <= bpm <= max_bpm:
                suggestions.append(genre)

        return suggestions

    def _get_wave_info(self, file_path):
        """Try to get duration and sample rate from wave file header."""
        duration = 0.0
        sample_rate = 44100

        try:
            if file_path.lower().endswith('.wav'):
                with wave.open(file_path, 'rb') as wf:
                    sample_rate = wf.getframerate()
                    n_frames = wf.getnframes()
                    duration = n_frames / sample_rate
        except Exception:
            # If wave fails, try to estimate from file size (rough)
            try:
                file_size = os.path.getsize(file_path)
                # Rough estimate: assume 16-bit stereo at 44.1kHz = ~176KB per second
                duration = file_size / (44100 * 2 * 2)
            except Exception:
                duration = 0.0

        return duration, sample_rate

    def get_backend_info(self):
        """Return information about current backend."""
        return {
            'backend': self.backend,
            'librosa_available': self.librosa is not None,
            'numpy_available': self.numpy is not None,
            'version': '1.0.0'
        }


# Convenience function for direct usage
def analyze_audio(file_path, backend="auto"):
    """
    Analyze an audio file and return features.

    Args:
        file_path: Path to audio file
        backend: "auto", "librosa", or "basic"

    Returns:
        AudioFeatures dataclass
    """
    analyzer = AudioAnalyzerDual(backend=backend)
    return analyzer.analyze_sample(file_path)