feat: Implement senior audio injection with 5 fallback methods

- Add _cmd_create_arrangement_audio_pattern with 5-method fallback chain
- Method 1: track.insert_arrangement_clip() [Live 12+]
- Method 2: track.create_audio_clip() [Live 11+]
- Method 3: arrangement_clips.add_new_clip() [Live 12+]
- Method 4: Session->duplicate_clip_to_arrangement [Legacy]
- Method 5: Session->Recording [Universal]

- Add _cmd_duplicate_clip_to_arrangement for session-to-arrangement workflow
- Update skills documentation
- Verified: 3 clips created at positions [0, 4, 8] in Arrangement View

Closes: Audio injection in Arrangement View
This commit is contained in:
OpenCode Agent
2026-04-12 14:02:32 -03:00
commit 5ce8187c65
118 changed files with 55075 additions and 0 deletions

View File

@@ -0,0 +1,613 @@
"""
AudioAnalyzerDual - Dual-backend audio analyzer for AbletonMCP_AI
Primary: librosa for full spectral analysis
Fallback: filename-based inference when librosa unavailable
This module provides intelligent audio sample analysis with graceful
degradation when heavy dependencies aren't available.
"""
import os
import re
import wave
import struct
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Tuple, Any
from pathlib import Path
@dataclass
class AudioFeatures:
"""Complete audio feature set for sample analysis."""
bpm: Optional[float]
key: Optional[str]
key_confidence: float
duration: float
sample_rate: int
sample_type: str
spectral_centroid: float
spectral_rolloff: float
zero_crossing_rate: float
rms_energy: float
is_harmonic: bool
is_percussive: bool
suggested_genres: List[str] = field(default_factory=list)
groove_template: Optional[Dict] = None
transients: Optional[List[float]] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert features to dictionary for serialization."""
return {
'bpm': self.bpm,
'key': self.key,
'key_confidence': self.key_confidence,
'duration': self.duration,
'sample_rate': self.sample_rate,
'sample_type': self.sample_type,
'spectral_centroid': self.spectral_centroid,
'spectral_rolloff': self.spectral_rolloff,
'zero_crossing_rate': self.zero_crossing_rate,
'rms_energy': self.rms_energy,
'is_harmonic': self.is_harmonic,
'is_percussive': self.is_percussive,
'suggested_genres': self.suggested_genres,
'groove_template': self.groove_template,
'transients': self.transients
}
class AudioAnalyzerDual:
"""
Dual-backend audio analyzer:
- Primary: librosa for full spectral analysis
- Fallback: filename-based inference when librosa unavailable
"""
# Key profiles for Krumhansl-Schmuckler algorithm (major and minor)
KRUMHANSL_MAJOR = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
KRUMHANSL_MINOR = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
# Circle of fifths positions for key detection
KEY_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
KEY_NAMES_FLAT = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']
# Genre suggestions based on BPM ranges
GENRE_BPM_RANGES = {
'reggaeton': (85, 100),
'trap': (130, 150),
'hip_hop': (85, 110),
'house': (120, 130),
'techno': (125, 140),
'dubstep': (140, 150),
'drum_and_bass': (160, 180),
'pop': (100, 130),
'rock': (120, 140),
'jazz': (120, 180),
'ambient': (60, 85),
'lofi': (70, 90)
}
# Sample type keywords for filename-based classification
TYPE_KEYWORDS = {
'kick': ['kick', 'bd', 'bass_drum', 'kck'],
'snare': ['snare', 'sd', 'rim', 'snr'],
'clap': ['clap', 'cp'],
'hihat': ['hihat', 'hat', 'hh', 'hi_hat', 'openhat', 'closedhat'],
'perc': ['perc', 'percussion', 'bongo', 'conga', 'timbal'],
'tom': ['tom', 'toms'],
'cymbal': ['cymbal', 'crash', 'ride', 'splash'],
'bass': ['bass', 'sub', '808', 'bassline'],
'synth': ['synth', 'pad', 'lead', 'pluck', 'arp'],
'fx': ['fx', 'effect', 'riser', 'downer', 'sweep', 'impact'],
'vocal': ['vocal', 'voice', 'vox', 'chant'],
'loop': ['loop', 'full', 'groove']
}
def __init__(self, backend="auto"):
"""Initialize the analyzer with specified backend."""
self.backend = self._detect_backend(backend)
self.librosa = None
self.numpy = None
self._init_libraries()
def _detect_backend(self, preferred):
"""Detect and return the appropriate backend."""
if preferred == "librosa":
try:
import librosa
import numpy as np
return "librosa"
except ImportError:
return "basic"
elif preferred == "basic":
return "basic"
else: # auto
try:
import librosa
import numpy as np
return "librosa"
except ImportError:
return "basic"
def _init_libraries(self):
"""Initialize library references if available."""
if self.backend == "librosa":
try:
import librosa
import numpy as np
self.librosa = librosa
self.numpy = np
except ImportError:
self.backend = "basic"
self.librosa = None
self.numpy = None
def analyze_sample(self, file_path):
"""
Main entry point for audio analysis.
Args:
file_path: Path to audio file
Returns:
AudioFeatures dataclass with analysis results
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"Audio file not found: {file_path}")
if self.backend == "librosa":
try:
return self._analyze_with_librosa(file_path)
except Exception as e:
# Fall back to basic analysis if librosa fails
return self._analyze_basic(file_path, error_context=str(e))
else:
return self._analyze_basic(file_path)
def _analyze_with_librosa(self, file_path):
"""
Full analysis using librosa:
1. Load audio: librosa.load()
2. Detect BPM: librosa.beat.beat_track()
3. Extract spectral: centroid, rolloff, zcr, rms
4. Detect key: chromagram + Krumhansl-Schmuckler
5. HPSS: harmonic/percussive separation
6. Classify type based on features
7. Extract groove template (for drums)
8. Suggest genres based on BPM
"""
y, sr = self.librosa.load(file_path, sr=None)
# Basic info
duration = self.librosa.get_duration(y=y, sr=sr)
# BPM detection
bpm = self._detect_bpm_librosa(y, sr)
# Spectral features
spectral_centroid = float(self.numpy.mean(self.librosa.feature.spectral_centroid(y=y, sr=sr)))
spectral_rolloff = float(self.numpy.mean(self.librosa.feature.spectral_rolloff(y=y, sr=sr)))
zero_crossing_rate = float(self.numpy.mean(self.librosa.feature.zero_crossing_rate(y)))
rms_energy = float(self.numpy.mean(self.librosa.feature.rms(y=y)))
# Key detection
key, key_confidence = self._detect_key_librosa(y, sr)
# HPSS separation
y_harmonic, y_percussive = self.librosa.effects.hpss(y)
harmonic_energy = self.numpy.sum(y_harmonic ** 2)
percussive_energy = self.numpy.sum(y_percussive ** 2)
total_energy = harmonic_energy + percussive_energy
is_harmonic = (harmonic_energy / total_energy) > 0.6 if total_energy > 0 else False
is_percussive = (percussive_energy / total_energy) > 0.6 if total_energy > 0 else False
# Classify sample type
sample_type = self._classify_sample_type(file_path, is_harmonic, is_percussive, spectral_centroid)
# Extract groove template for drum loops
groove_template = None
transients = None
if is_percussive or sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'loop']:
groove_template = self._extract_groove_template(y, sr)
transients = groove_template.get('transient_positions', []) if groove_template else []
# Genre suggestions
suggested_genres = self._suggest_genres(bpm)
return AudioFeatures(
bpm=bpm,
key=key,
key_confidence=key_confidence,
duration=duration,
sample_rate=sr,
sample_type=sample_type,
spectral_centroid=spectral_centroid,
spectral_rolloff=spectral_rolloff,
zero_crossing_rate=zero_crossing_rate,
rms_energy=rms_energy,
is_harmonic=is_harmonic,
is_percussive=is_percussive,
suggested_genres=suggested_genres,
groove_template=groove_template,
transients=transients
)
def _analyze_basic(self, file_path, error_context=None):
"""
Filename-based analysis:
- Extract BPM from filename patterns
- Extract key from filename patterns
- Estimate duration (if wave module available)
- Classify type by keyword matching
- Set default spectral features based on type
"""
filename = os.path.basename(file_path)
# Extract info from filename
bpm = self._extract_bpm_from_name(filename)
key = self._extract_key_from_name(filename)
sample_type = self._classify_by_filename(filename)
# Try to get duration from wave header
duration, sample_rate = self._get_wave_info(file_path)
# Set default spectral features based on type
defaults = self._get_default_features_by_type(sample_type)
# Suggest genres based on BPM
suggested_genres = self._suggest_genres(bpm)
# Determine harmonic/percussive nature by type
is_harmonic = sample_type in ['synth', 'bass', 'vocal', 'pad', 'lead', 'pluck']
is_percussive = sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'tom', 'cymbal']
return AudioFeatures(
bpm=bpm,
key=key,
key_confidence=0.5 if key else 0.0, # Moderate confidence for filename-based
duration=duration,
sample_rate=sample_rate,
sample_type=sample_type,
spectral_centroid=defaults['spectral_centroid'],
spectral_rolloff=defaults['spectral_rolloff'],
zero_crossing_rate=defaults['zero_crossing_rate'],
rms_energy=defaults['rms_energy'],
is_harmonic=is_harmonic,
is_percussive=is_percussive,
suggested_genres=suggested_genres,
groove_template=None,
transients=None
)
def _detect_key_librosa(self, y, sr):
"""
Uses chromagram and Krumhansl-Schmuckler key profiles.
Returns:
(key, confidence)
"""
# Compute chromagram
chromagram = self.librosa.feature.chroma_stft(y=y, sr=sr)
chroma_mean = self.numpy.mean(chromagram, axis=1)
# Calculate correlation with major and minor profiles for all keys
best_score = -1
best_key = None
best_mode = None
for shift in range(12):
# Rotate chroma to test this key
rotated_chroma = self.numpy.roll(chroma_mean, shift)
# Normalize
rotated_chroma = rotated_chroma / (self.numpy.sum(rotated_chroma) + 1e-10)
# Correlation with major
major_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MAJOR)[0, 1]
if major_corr > best_score:
best_score = major_corr
best_key = shift
best_mode = 'major'
# Correlation with minor
minor_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MINOR)[0, 1]
if minor_corr > best_score:
best_score = minor_corr
best_key = shift
best_mode = 'minor'
# Convert to key name
key_name = self.KEY_NAMES[best_key]
if best_mode == 'minor':
key_name += 'm'
# Confidence is the correlation score (normalized to 0-1)
confidence = (best_score + 1) / 2 # Convert from [-1, 1] to [0, 1]
confidence = max(0.0, min(1.0, confidence))
return key_name, confidence
def _extract_key_from_name(self, filename):
r"""
Extract key from filename using regex patterns.
Patterns:
- [_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]
- \bin\s+([A-G][#b]?(?:m|min|minor)?)\b
- Key[_\s]?([A-G][#b]?m?)
"""
# Pattern 1: Key surrounded by separators
pattern1 = r'[_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]'
match = re.search(pattern1, filename, re.IGNORECASE)
if match:
return self._normalize_key(match.group(1))
# Pattern 2: "in Key" format
pattern2 = r'\bin\s+([A-G][#b]?(?:m|min|minor)?)\b'
match = re.search(pattern2, filename, re.IGNORECASE)
if match:
return self._normalize_key(match.group(1))
# Pattern 3: Key prefix
pattern3 = r'Key[_\s]?([A-G][#b]?m?)'
match = re.search(pattern3, filename, re.IGNORECASE)
if match:
return self._normalize_key(match.group(1))
return None
def _normalize_key(self, key_str):
"""Normalize key string to standard format."""
key_str = key_str.strip().upper()
# Handle variations
if 'MINOR' in key_str or key_str.endswith('MIN'):
root = key_str.replace('MINOR', '').replace('MIN', '').strip()
return root + 'm'
# Handle flat/sharp notation
if 'B' in key_str and '#' not in key_str and len(key_str) > 1:
# Convert flats to sharps where applicable
flat_to_sharp = {'DB': 'C#', 'EB': 'D#', 'GB': 'F#', 'AB': 'G#', 'BB': 'A#'}
root = key_str.rstrip('M').rstrip('m')
if root in flat_to_sharp:
key_str = flat_to_sharp[root] + ('m' if 'm' in key_str.lower() else '')
return key_str
def _detect_bpm_librosa(self, y, sr):
"""Detect BPM using librosa.beat.beat_track()."""
try:
tempo, _ = self.librosa.beat.beat_track(y=y, sr=sr)
if isinstance(tempo, self.numpy.ndarray):
tempo = float(tempo.item())
return float(tempo) if tempo > 0 else None
except Exception:
return None
def _extract_bpm_from_name(self, filename):
r"""
Extract BPM from filename using regex patterns.
Patterns:
- [_\s\-](\d{2,3})\s*BPM
- [_\s\-](\d{2,3})[_\s\-]
- (\d{2,3})bpm
Range validation: 60-200 BPM
"""
# Pattern 1: Explicit BPM suffix
pattern1 = r'[_\s\-](\d{2,3})\s*BPM'
match = re.search(pattern1, filename, re.IGNORECASE)
if match:
bpm = int(match.group(1))
if 60 <= bpm <= 200:
return float(bpm)
# Pattern 2: Number surrounded by separators
pattern2 = r'[_\s\-](\d{2,3})[_\s\-]'
matches = re.findall(pattern2, filename)
for m in matches:
bpm = int(m)
if 60 <= bpm <= 200:
return float(bpm)
# Pattern 3: BPM suffix without separator
pattern3 = r'(\d{2,3})bpm'
match = re.search(pattern3, filename, re.IGNORECASE)
if match:
bpm = int(match.group(1))
if 60 <= bpm <= 200:
return float(bpm)
return None
def _extract_groove_template(self, y, sr):
"""
Extract groove template for drum loops.
For drum loops:
1. Detect transients: librosa.onset.onset_detect()
2. Filter by RMS threshold
3. Categorize by velocity: kick-like, snare-like, hat-like
4. Map to beat grid
5. Return template dict
"""
# Detect onsets
onset_frames = self.librosa.onset.onset_detect(y=y, sr=sr)
onset_times = self.librosa.frames_to_time(onset_frames, sr=sr)
# Calculate RMS around each onset for velocity
hop_length = 512
rms = self.librosa.feature.rms(y=y, hop_length=hop_length)[0]
# Filter by RMS threshold
rms_threshold = self.numpy.mean(rms) * 0.5
transients = []
for onset_time in onset_times:
frame_idx = self.librosa.time_to_frames(onset_time, sr=sr, hop_length=hop_length)
if frame_idx < len(rms) and rms[frame_idx] > rms_threshold:
transients.append({
'time': float(onset_time),
'velocity': float(rms[frame_idx]),
'category': self._categorize_transient(rms[frame_idx], self.numpy.mean(rms))
})
# Map to beat grid (assume 4/4, map to 16th notes)
if transients:
max_time = max(t['time'] for t in transients)
num_beats = max(4, int(max_time / (60.0 / 95.0))) # Assume 95 BPM if unknown
grid_positions = []
for t in transients:
beat_pos = (t['time'] / max_time) * num_beats
sixteenth = int((beat_pos % 1) * 16)
grid_positions.append({
'beat': int(beat_pos),
'sixteenth': sixteenth,
'velocity': t['velocity'],
'category': t['category']
})
return {
'transient_positions': [t['time'] for t in transients],
'grid_positions': grid_positions,
'num_beats': num_beats,
'kick_positions': [p for p in grid_positions if p['category'] == 'kick'],
'snare_positions': [p for p in grid_positions if p['category'] == 'snare'],
'hat_positions': [p for p in grid_positions if p['category'] == 'hat']
}
return None
def _categorize_transient(self, velocity, mean_rms):
"""Categorize transient by velocity level."""
ratio = velocity / (mean_rms + 1e-10)
if ratio > 1.5:
return 'kick'
elif ratio > 0.8:
return 'snare'
else:
return 'hat'
def _classify_sample_type(self, file_path, is_harmonic, is_percussive, spectral_centroid):
"""Classify sample type based on analysis and filename."""
filename = os.path.basename(file_path).lower()
# First try filename matching
type_by_name = self._classify_by_filename(filename)
if type_by_name != 'unknown':
return type_by_name
# Fall back to spectral classification
if is_percussive:
if spectral_centroid < 500:
return 'kick'
elif spectral_centroid < 2000:
return 'snare'
elif spectral_centroid < 8000:
return 'hihat'
else:
return 'cymbal'
elif is_harmonic:
if spectral_centroid < 500:
return 'bass'
elif spectral_centroid < 2000:
return 'synth'
else:
return 'synth'
return 'unknown'
def _classify_by_filename(self, filename):
"""Classify sample type by keywords in filename."""
filename_lower = filename.lower()
for sample_type, keywords in self.TYPE_KEYWORDS.items():
for keyword in keywords:
if keyword in filename_lower:
return sample_type
return 'unknown'
def _get_default_features_by_type(self, sample_type):
"""Return default spectral features based on sample type."""
defaults = {
'kick': {'spectral_centroid': 300, 'spectral_rolloff': 800, 'zero_crossing_rate': 0.05, 'rms_energy': 0.3},
'snare': {'spectral_centroid': 1500, 'spectral_rolloff': 4000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.25},
'clap': {'spectral_centroid': 2000, 'spectral_rolloff': 5000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2},
'hihat': {'spectral_centroid': 8000, 'spectral_rolloff': 15000, 'zero_crossing_rate': 0.3, 'rms_energy': 0.1},
'perc': {'spectral_centroid': 2500, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.2, 'rms_energy': 0.2},
'tom': {'spectral_centroid': 800, 'spectral_rolloff': 2000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.25},
'cymbal': {'spectral_centroid': 10000, 'spectral_rolloff': 18000, 'zero_crossing_rate': 0.35, 'rms_energy': 0.15},
'bass': {'spectral_centroid': 400, 'spectral_rolloff': 1200, 'zero_crossing_rate': 0.03, 'rms_energy': 0.2},
'synth': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.15},
'fx': {'spectral_centroid': 5000, 'spectral_rolloff': 12000, 'zero_crossing_rate': 0.25, 'rms_energy': 0.2},
'vocal': {'spectral_centroid': 2000, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.18},
'loop': {'spectral_centroid': 2500, 'spectral_rolloff': 7000, 'zero_crossing_rate': 0.12, 'rms_energy': 0.2},
'unknown': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2}
}
return defaults.get(sample_type, defaults['unknown'])
def _suggest_genres(self, bpm):
"""Suggest genres based on BPM."""
if bpm is None:
return []
suggestions = []
for genre, (min_bpm, max_bpm) in self.GENRE_BPM_RANGES.items():
if min_bpm <= bpm <= max_bpm:
suggestions.append(genre)
return suggestions
def _get_wave_info(self, file_path):
"""Try to get duration and sample rate from wave file header."""
duration = 0.0
sample_rate = 44100
try:
if file_path.lower().endswith('.wav'):
with wave.open(file_path, 'rb') as wf:
sample_rate = wf.getframerate()
n_frames = wf.getnframes()
duration = n_frames / sample_rate
except Exception:
# If wave fails, try to estimate from file size (rough)
try:
file_size = os.path.getsize(file_path)
# Rough estimate: assume 16-bit stereo at 44.1kHz = ~176KB per second
duration = file_size / (44100 * 2 * 2)
except Exception:
duration = 0.0
return duration, sample_rate
def get_backend_info(self):
"""Return information about current backend."""
return {
'backend': self.backend,
'librosa_available': self.librosa is not None,
'numpy_available': self.numpy is not None,
'version': '1.0.0'
}
# Convenience function for direct usage
def analyze_audio(file_path, backend="auto"):
"""
Analyze an audio file and return features.
Args:
file_path: Path to audio file
backend: "auto", "librosa", or "basic"
Returns:
AudioFeatures dataclass
"""
analyzer = AudioAnalyzerDual(backend=backend)
return analyzer.analyze_sample(file_path)