""" AudioAnalyzerDual - Dual-backend audio analyzer for AbletonMCP_AI Primary: librosa for full spectral analysis Fallback: filename-based inference when librosa unavailable This module provides intelligent audio sample analysis with graceful degradation when heavy dependencies aren't available. """ import os import re import wave import struct from dataclasses import dataclass, field from typing import Optional, List, Dict, Tuple, Any from pathlib import Path @dataclass class AudioFeatures: """Complete audio feature set for sample analysis.""" bpm: Optional[float] key: Optional[str] key_confidence: float duration: float sample_rate: int sample_type: str spectral_centroid: float spectral_rolloff: float zero_crossing_rate: float rms_energy: float is_harmonic: bool is_percussive: bool suggested_genres: List[str] = field(default_factory=list) groove_template: Optional[Dict] = None transients: Optional[List[float]] = None def to_dict(self) -> Dict[str, Any]: """Convert features to dictionary for serialization.""" return { 'bpm': self.bpm, 'key': self.key, 'key_confidence': self.key_confidence, 'duration': self.duration, 'sample_rate': self.sample_rate, 'sample_type': self.sample_type, 'spectral_centroid': self.spectral_centroid, 'spectral_rolloff': self.spectral_rolloff, 'zero_crossing_rate': self.zero_crossing_rate, 'rms_energy': self.rms_energy, 'is_harmonic': self.is_harmonic, 'is_percussive': self.is_percussive, 'suggested_genres': self.suggested_genres, 'groove_template': self.groove_template, 'transients': self.transients } class AudioAnalyzerDual: """ Dual-backend audio analyzer: - Primary: librosa for full spectral analysis - Fallback: filename-based inference when librosa unavailable """ # Key profiles for Krumhansl-Schmuckler algorithm (major and minor) KRUMHANSL_MAJOR = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88] KRUMHANSL_MINOR = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17] # Circle of fifths positions for key detection KEY_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] KEY_NAMES_FLAT = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B'] # Genre suggestions based on BPM ranges GENRE_BPM_RANGES = { 'reggaeton': (85, 100), 'trap': (130, 150), 'hip_hop': (85, 110), 'house': (120, 130), 'techno': (125, 140), 'dubstep': (140, 150), 'drum_and_bass': (160, 180), 'pop': (100, 130), 'rock': (120, 140), 'jazz': (120, 180), 'ambient': (60, 85), 'lofi': (70, 90) } # Sample type keywords for filename-based classification TYPE_KEYWORDS = { 'kick': ['kick', 'bd', 'bass_drum', 'kck'], 'snare': ['snare', 'sd', 'rim', 'snr'], 'clap': ['clap', 'cp'], 'hihat': ['hihat', 'hat', 'hh', 'hi_hat', 'openhat', 'closedhat'], 'perc': ['perc', 'percussion', 'bongo', 'conga', 'timbal'], 'tom': ['tom', 'toms'], 'cymbal': ['cymbal', 'crash', 'ride', 'splash'], 'bass': ['bass', 'sub', '808', 'bassline'], 'synth': ['synth', 'pad', 'lead', 'pluck', 'arp'], 'fx': ['fx', 'effect', 'riser', 'downer', 'sweep', 'impact'], 'vocal': ['vocal', 'voice', 'vox', 'chant'], 'loop': ['loop', 'full', 'groove'] } def __init__(self, backend="auto"): """Initialize the analyzer with specified backend.""" self.backend = self._detect_backend(backend) self.librosa = None self.numpy = None self._init_libraries() def _detect_backend(self, preferred): """Detect and return the appropriate backend.""" if preferred == "librosa": try: import librosa import numpy as np return "librosa" except ImportError: return "basic" elif preferred == "basic": return "basic" else: # auto try: import librosa import numpy as np return "librosa" except ImportError: return "basic" def _init_libraries(self): """Initialize library references if available.""" if self.backend == "librosa": try: import librosa import numpy as np self.librosa = librosa self.numpy = np except ImportError: self.backend = "basic" self.librosa = None self.numpy = None def analyze_sample(self, file_path): """ Main entry point for audio analysis. Args: file_path: Path to audio file Returns: AudioFeatures dataclass with analysis results """ if not os.path.exists(file_path): raise FileNotFoundError(f"Audio file not found: {file_path}") if self.backend == "librosa": try: return self._analyze_with_librosa(file_path) except Exception as e: # Fall back to basic analysis if librosa fails return self._analyze_basic(file_path, error_context=str(e)) else: return self._analyze_basic(file_path) def _analyze_with_librosa(self, file_path): """ Full analysis using librosa: 1. Load audio: librosa.load() 2. Detect BPM: librosa.beat.beat_track() 3. Extract spectral: centroid, rolloff, zcr, rms 4. Detect key: chromagram + Krumhansl-Schmuckler 5. HPSS: harmonic/percussive separation 6. Classify type based on features 7. Extract groove template (for drums) 8. Suggest genres based on BPM """ y, sr = self.librosa.load(file_path, sr=None) # Basic info duration = self.librosa.get_duration(y=y, sr=sr) # BPM detection bpm = self._detect_bpm_librosa(y, sr) # Spectral features spectral_centroid = float(self.numpy.mean(self.librosa.feature.spectral_centroid(y=y, sr=sr))) spectral_rolloff = float(self.numpy.mean(self.librosa.feature.spectral_rolloff(y=y, sr=sr))) zero_crossing_rate = float(self.numpy.mean(self.librosa.feature.zero_crossing_rate(y))) rms_energy = float(self.numpy.mean(self.librosa.feature.rms(y=y))) # Key detection key, key_confidence = self._detect_key_librosa(y, sr) # HPSS separation y_harmonic, y_percussive = self.librosa.effects.hpss(y) harmonic_energy = self.numpy.sum(y_harmonic ** 2) percussive_energy = self.numpy.sum(y_percussive ** 2) total_energy = harmonic_energy + percussive_energy is_harmonic = (harmonic_energy / total_energy) > 0.6 if total_energy > 0 else False is_percussive = (percussive_energy / total_energy) > 0.6 if total_energy > 0 else False # Classify sample type sample_type = self._classify_sample_type(file_path, is_harmonic, is_percussive, spectral_centroid) # Extract groove template for drum loops groove_template = None transients = None if is_percussive or sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'loop']: groove_template = self._extract_groove_template(y, sr) transients = groove_template.get('transient_positions', []) if groove_template else [] # Genre suggestions suggested_genres = self._suggest_genres(bpm) return AudioFeatures( bpm=bpm, key=key, key_confidence=key_confidence, duration=duration, sample_rate=sr, sample_type=sample_type, spectral_centroid=spectral_centroid, spectral_rolloff=spectral_rolloff, zero_crossing_rate=zero_crossing_rate, rms_energy=rms_energy, is_harmonic=is_harmonic, is_percussive=is_percussive, suggested_genres=suggested_genres, groove_template=groove_template, transients=transients ) def _analyze_basic(self, file_path, error_context=None): """ Filename-based analysis: - Extract BPM from filename patterns - Extract key from filename patterns - Estimate duration (if wave module available) - Classify type by keyword matching - Set default spectral features based on type """ filename = os.path.basename(file_path) # Extract info from filename bpm = self._extract_bpm_from_name(filename) key = self._extract_key_from_name(filename) sample_type = self._classify_by_filename(filename) # Try to get duration from wave header duration, sample_rate = self._get_wave_info(file_path) # Set default spectral features based on type defaults = self._get_default_features_by_type(sample_type) # Suggest genres based on BPM suggested_genres = self._suggest_genres(bpm) # Determine harmonic/percussive nature by type is_harmonic = sample_type in ['synth', 'bass', 'vocal', 'pad', 'lead', 'pluck'] is_percussive = sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'tom', 'cymbal'] return AudioFeatures( bpm=bpm, key=key, key_confidence=0.5 if key else 0.0, # Moderate confidence for filename-based duration=duration, sample_rate=sample_rate, sample_type=sample_type, spectral_centroid=defaults['spectral_centroid'], spectral_rolloff=defaults['spectral_rolloff'], zero_crossing_rate=defaults['zero_crossing_rate'], rms_energy=defaults['rms_energy'], is_harmonic=is_harmonic, is_percussive=is_percussive, suggested_genres=suggested_genres, groove_template=None, transients=None ) def _detect_key_librosa(self, y, sr): """ Uses chromagram and Krumhansl-Schmuckler key profiles. Returns: (key, confidence) """ # Compute chromagram chromagram = self.librosa.feature.chroma_stft(y=y, sr=sr) chroma_mean = self.numpy.mean(chromagram, axis=1) # Calculate correlation with major and minor profiles for all keys best_score = -1 best_key = None best_mode = None for shift in range(12): # Rotate chroma to test this key rotated_chroma = self.numpy.roll(chroma_mean, shift) # Normalize rotated_chroma = rotated_chroma / (self.numpy.sum(rotated_chroma) + 1e-10) # Correlation with major major_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MAJOR)[0, 1] if major_corr > best_score: best_score = major_corr best_key = shift best_mode = 'major' # Correlation with minor minor_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MINOR)[0, 1] if minor_corr > best_score: best_score = minor_corr best_key = shift best_mode = 'minor' # Convert to key name key_name = self.KEY_NAMES[best_key] if best_mode == 'minor': key_name += 'm' # Confidence is the correlation score (normalized to 0-1) confidence = (best_score + 1) / 2 # Convert from [-1, 1] to [0, 1] confidence = max(0.0, min(1.0, confidence)) return key_name, confidence def _extract_key_from_name(self, filename): r""" Extract key from filename using regex patterns. Patterns: - [_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-] - \bin\s+([A-G][#b]?(?:m|min|minor)?)\b - Key[_\s]?([A-G][#b]?m?) """ # Pattern 1: Key surrounded by separators pattern1 = r'[_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]' match = re.search(pattern1, filename, re.IGNORECASE) if match: return self._normalize_key(match.group(1)) # Pattern 2: "in Key" format pattern2 = r'\bin\s+([A-G][#b]?(?:m|min|minor)?)\b' match = re.search(pattern2, filename, re.IGNORECASE) if match: return self._normalize_key(match.group(1)) # Pattern 3: Key prefix pattern3 = r'Key[_\s]?([A-G][#b]?m?)' match = re.search(pattern3, filename, re.IGNORECASE) if match: return self._normalize_key(match.group(1)) return None def _normalize_key(self, key_str): """Normalize key string to standard format.""" key_str = key_str.strip().upper() # Handle variations if 'MINOR' in key_str or key_str.endswith('MIN'): root = key_str.replace('MINOR', '').replace('MIN', '').strip() return root + 'm' # Handle flat/sharp notation if 'B' in key_str and '#' not in key_str and len(key_str) > 1: # Convert flats to sharps where applicable flat_to_sharp = {'DB': 'C#', 'EB': 'D#', 'GB': 'F#', 'AB': 'G#', 'BB': 'A#'} root = key_str.rstrip('M').rstrip('m') if root in flat_to_sharp: key_str = flat_to_sharp[root] + ('m' if 'm' in key_str.lower() else '') return key_str def _detect_bpm_librosa(self, y, sr): """Detect BPM using librosa.beat.beat_track().""" try: tempo, _ = self.librosa.beat.beat_track(y=y, sr=sr) if isinstance(tempo, self.numpy.ndarray): tempo = float(tempo.item()) return float(tempo) if tempo > 0 else None except Exception: return None def _extract_bpm_from_name(self, filename): r""" Extract BPM from filename using regex patterns. Patterns: - [_\s\-](\d{2,3})\s*BPM - [_\s\-](\d{2,3})[_\s\-] - (\d{2,3})bpm Range validation: 60-200 BPM """ # Pattern 1: Explicit BPM suffix pattern1 = r'[_\s\-](\d{2,3})\s*BPM' match = re.search(pattern1, filename, re.IGNORECASE) if match: bpm = int(match.group(1)) if 60 <= bpm <= 200: return float(bpm) # Pattern 2: Number surrounded by separators pattern2 = r'[_\s\-](\d{2,3})[_\s\-]' matches = re.findall(pattern2, filename) for m in matches: bpm = int(m) if 60 <= bpm <= 200: return float(bpm) # Pattern 3: BPM suffix without separator pattern3 = r'(\d{2,3})bpm' match = re.search(pattern3, filename, re.IGNORECASE) if match: bpm = int(match.group(1)) if 60 <= bpm <= 200: return float(bpm) return None def _extract_groove_template(self, y, sr): """ Extract groove template for drum loops. For drum loops: 1. Detect transients: librosa.onset.onset_detect() 2. Filter by RMS threshold 3. Categorize by velocity: kick-like, snare-like, hat-like 4. Map to beat grid 5. Return template dict """ # Detect onsets onset_frames = self.librosa.onset.onset_detect(y=y, sr=sr) onset_times = self.librosa.frames_to_time(onset_frames, sr=sr) # Calculate RMS around each onset for velocity hop_length = 512 rms = self.librosa.feature.rms(y=y, hop_length=hop_length)[0] # Filter by RMS threshold rms_threshold = self.numpy.mean(rms) * 0.5 transients = [] for onset_time in onset_times: frame_idx = self.librosa.time_to_frames(onset_time, sr=sr, hop_length=hop_length) if frame_idx < len(rms) and rms[frame_idx] > rms_threshold: transients.append({ 'time': float(onset_time), 'velocity': float(rms[frame_idx]), 'category': self._categorize_transient(rms[frame_idx], self.numpy.mean(rms)) }) # Map to beat grid (assume 4/4, map to 16th notes) if transients: max_time = max(t['time'] for t in transients) num_beats = max(4, int(max_time / (60.0 / 95.0))) # Assume 95 BPM if unknown grid_positions = [] for t in transients: beat_pos = (t['time'] / max_time) * num_beats sixteenth = int((beat_pos % 1) * 16) grid_positions.append({ 'beat': int(beat_pos), 'sixteenth': sixteenth, 'velocity': t['velocity'], 'category': t['category'] }) return { 'transient_positions': [t['time'] for t in transients], 'grid_positions': grid_positions, 'num_beats': num_beats, 'kick_positions': [p for p in grid_positions if p['category'] == 'kick'], 'snare_positions': [p for p in grid_positions if p['category'] == 'snare'], 'hat_positions': [p for p in grid_positions if p['category'] == 'hat'] } return None def _categorize_transient(self, velocity, mean_rms): """Categorize transient by velocity level.""" ratio = velocity / (mean_rms + 1e-10) if ratio > 1.5: return 'kick' elif ratio > 0.8: return 'snare' else: return 'hat' def _classify_sample_type(self, file_path, is_harmonic, is_percussive, spectral_centroid): """Classify sample type based on analysis and filename.""" filename = os.path.basename(file_path).lower() # First try filename matching type_by_name = self._classify_by_filename(filename) if type_by_name != 'unknown': return type_by_name # Fall back to spectral classification if is_percussive: if spectral_centroid < 500: return 'kick' elif spectral_centroid < 2000: return 'snare' elif spectral_centroid < 8000: return 'hihat' else: return 'cymbal' elif is_harmonic: if spectral_centroid < 500: return 'bass' elif spectral_centroid < 2000: return 'synth' else: return 'synth' return 'unknown' def _classify_by_filename(self, filename): """Classify sample type by keywords in filename.""" filename_lower = filename.lower() for sample_type, keywords in self.TYPE_KEYWORDS.items(): for keyword in keywords: if keyword in filename_lower: return sample_type return 'unknown' def _get_default_features_by_type(self, sample_type): """Return default spectral features based on sample type.""" defaults = { 'kick': {'spectral_centroid': 300, 'spectral_rolloff': 800, 'zero_crossing_rate': 0.05, 'rms_energy': 0.3}, 'snare': {'spectral_centroid': 1500, 'spectral_rolloff': 4000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.25}, 'clap': {'spectral_centroid': 2000, 'spectral_rolloff': 5000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2}, 'hihat': {'spectral_centroid': 8000, 'spectral_rolloff': 15000, 'zero_crossing_rate': 0.3, 'rms_energy': 0.1}, 'perc': {'spectral_centroid': 2500, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.2, 'rms_energy': 0.2}, 'tom': {'spectral_centroid': 800, 'spectral_rolloff': 2000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.25}, 'cymbal': {'spectral_centroid': 10000, 'spectral_rolloff': 18000, 'zero_crossing_rate': 0.35, 'rms_energy': 0.15}, 'bass': {'spectral_centroid': 400, 'spectral_rolloff': 1200, 'zero_crossing_rate': 0.03, 'rms_energy': 0.2}, 'synth': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.15}, 'fx': {'spectral_centroid': 5000, 'spectral_rolloff': 12000, 'zero_crossing_rate': 0.25, 'rms_energy': 0.2}, 'vocal': {'spectral_centroid': 2000, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.18}, 'loop': {'spectral_centroid': 2500, 'spectral_rolloff': 7000, 'zero_crossing_rate': 0.12, 'rms_energy': 0.2}, 'unknown': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2} } return defaults.get(sample_type, defaults['unknown']) def _suggest_genres(self, bpm): """Suggest genres based on BPM.""" if bpm is None: return [] suggestions = [] for genre, (min_bpm, max_bpm) in self.GENRE_BPM_RANGES.items(): if min_bpm <= bpm <= max_bpm: suggestions.append(genre) return suggestions def _get_wave_info(self, file_path): """Try to get duration and sample rate from wave file header.""" duration = 0.0 sample_rate = 44100 try: if file_path.lower().endswith('.wav'): with wave.open(file_path, 'rb') as wf: sample_rate = wf.getframerate() n_frames = wf.getnframes() duration = n_frames / sample_rate except Exception: # If wave fails, try to estimate from file size (rough) try: file_size = os.path.getsize(file_path) # Rough estimate: assume 16-bit stereo at 44.1kHz = ~176KB per second duration = file_size / (44100 * 2 * 2) except Exception: duration = 0.0 return duration, sample_rate def get_backend_info(self): """Return information about current backend.""" return { 'backend': self.backend, 'librosa_available': self.librosa is not None, 'numpy_available': self.numpy is not None, 'version': '1.0.0' } # Convenience function for direct usage def analyze_audio(file_path, backend="auto"): """ Analyze an audio file and return features. Args: file_path: Path to audio file backend: "auto", "librosa", or "basic" Returns: AudioFeatures dataclass """ analyzer = AudioAnalyzerDual(backend=backend) return analyzer.analyze_sample(file_path)