feat: Implement senior audio injection with 5 fallback methods

- Add _cmd_create_arrangement_audio_pattern with 5-method fallback chain
- Method 1: track.insert_arrangement_clip() [Live 12+]
- Method 2: track.create_audio_clip() [Live 11+]
- Method 3: arrangement_clips.add_new_clip() [Live 12+]
- Method 4: Session->duplicate_clip_to_arrangement [Legacy]
- Method 5: Session->Recording [Universal]

- Add _cmd_duplicate_clip_to_arrangement for session-to-arrangement workflow
- Update skills documentation
- Verified: 3 clips created at positions [0, 4, 8] in Arrangement View

Closes: Audio injection in Arrangement View
This commit is contained in:
OpenCode Agent
2026-04-12 14:02:32 -03:00
commit 5ce8187c65
118 changed files with 55075 additions and 0 deletions

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,730 @@
"""
ArrangementRecorder - Robust state machine for recording Session to Arrangement.
This module provides a reliable way to record Session View clips into Arrangement View
with proper state management, musical timing, and error handling.
"""
from enum import Enum, auto
from dataclasses import dataclass, field
from typing import Optional, Callable, List, Dict, Any, Tuple
import time
import logging
# Configure logging
logger = logging.getLogger(__name__)
class RecordingState(Enum):
"""
State machine states for arrangement recording.
Transitions:
IDLE -> ARMED (via arm())
ARMED -> PRE_ROLL (via start())
PRE_ROLL -> RECORDING (when quantized time reached)
RECORDING -> COOLDOWN (when duration elapsed or stop() called)
COOLDOWN -> COMPLETED (verification complete)
COOLDOWN -> FAILED (verification failed)
Any -> IDLE (via reset or error recovery)
"""
IDLE = auto()
ARMED = auto()
PRE_ROLL = auto()
RECORDING = auto()
COOLDOWN = auto()
COMPLETED = auto()
FAILED = auto()
@dataclass
class RecordingConfig:
"""
Configuration for arrangement recording session.
Attributes:
start_bar: Starting bar position in arrangement
duration_bars: Total duration to record in bars
pre_roll_bars: Bars to wait before recording starts (default 1.0)
tempo: Tempo in BPM for timing calculations
scene_index: Scene to fire at start (default 0)
on_state_change: Callback when state changes (old_state, new_state)
on_progress: Callback with progress 0.0-1.0
on_error: Callback with exception on failure
on_completed: Callback with list of new clip IDs on success
"""
start_bar: float
duration_bars: float
pre_roll_bars: float = 1.0
tempo: float = 95.0
scene_index: int = 0
on_state_change: Optional[Callable[[RecordingState, RecordingState], None]] = None
on_progress: Optional[Callable[[float], None]] = None
on_error: Optional[Callable[[Exception], None]] = None
on_completed: Optional[Callable[[List[str]], None]] = None
def __post_init__(self):
"""Validate configuration parameters."""
if self.start_bar < 0:
raise ValueError(f"start_bar must be >= 0, got {self.start_bar}")
if self.duration_bars <= 0:
raise ValueError(f"duration_bars must be > 0, got {self.duration_bars}")
if self.pre_roll_bars < 0:
raise ValueError(f"pre_roll_bars must be >= 0, got {self.pre_roll_bars}")
if self.tempo <= 0:
raise ValueError(f"tempo must be > 0, got {self.tempo}")
if self.scene_index < 0:
raise ValueError(f"scene_index must be >= 0, got {self.scene_index}")
@dataclass
class ArrangementBaseline:
"""
Captured state of arrangement before recording.
Used for verification after recording completes.
"""
clip_count: int
clip_ids: set
clip_positions: Dict[str, Tuple[float, float]] # id -> (start, end)
total_length: float
timestamp: float
class ArrangementRecorder:
"""
Robust recorder for Session to Arrangement with state machine.
This class manages the entire recording lifecycle:
- Pre-recording verification and setup
- Musical timing (bars/beats) instead of wall-clock
- Quantized start on bar boundaries
- Automatic stop after duration
- Post-recording verification
Usage:
recorder = ArrangementRecorder(song, ableton_connection)
config = RecordingConfig(start_bar=0, duration_bars=8, tempo=95)
if recorder.arm(config):
recorder.start() # Call from update_display() loop
# In update_display():
recorder.update() # Processes state machine
"""
def __init__(self, song, ableton_connection):
"""
Initialize the arrangement recorder.
Args:
song: Live.Song.Song object
ableton_connection: Connection object for sending commands to Live
"""
self.song = song
self.ableton = ableton_connection
# State machine
self._state = RecordingState.IDLE
self._config: Optional[RecordingConfig] = None
# Recording data
self._baseline: Optional[ArrangementBaseline] = None
self._new_clips: List[str] = []
self._new_clip_ids: set = set()
# Timing (musical - in bars/beats)
self._target_start_bar: float = 0.0
self._target_end_bar: float = 0.0
self._pre_roll_target_bar: float = 0.0
self._current_progress: float = 0.0
# Update tracking
self._last_update_time: float = 0.0
self._last_progress_emit: float = -1.0
self._state_entry_time: float = 0.0
logger.info("ArrangementRecorder initialized")
# ========================================================================
# PUBLIC API
# ========================================================================
def arm(self, config: RecordingConfig) -> bool:
"""
Arm the recorder with configuration.
Verifies preconditions and captures baseline state.
Must be called before start().
Args:
config: Recording configuration
Returns:
True if successfully armed, False otherwise
"""
if self._state != RecordingState.IDLE:
logger.warning(f"Cannot arm from state {self._state.name}")
return False
try:
# Validate config
self._config = config
# Verify preconditions
self._verify_preconditions()
# Capture baseline
self._baseline = self._capture_baseline()
# Transition to ARMED
self._transition_to(RecordingState.ARMED)
logger.info(f"Recorder armed: bar {config.start_bar}, "
f"duration {config.duration_bars} bars, "
f"pre-roll {config.pre_roll_bars} bars")
return True
except Exception as e:
logger.error(f"Failed to arm recorder: {e}")
self._handle_error(e)
return False
def start(self) -> bool:
"""
Start the recording process.
Begins pre-roll phase if armed. Recording will start
automatically on the next bar boundary after pre-roll.
Returns:
True if recording sequence started, False otherwise
"""
if self._state != RecordingState.ARMED:
logger.warning(f"Cannot start from state {self._state.name}")
return False
if not self._config:
logger.error("No configuration set")
return False
try:
# Calculate timing
current_bar = self._get_current_bar()
self._pre_roll_target_bar = current_bar + self._config.pre_roll_bars
self._target_start_bar = self._pre_roll_target_bar
self._target_end_bar = self._target_start_bar + self._config.duration_bars
# Enable arrangement overdub
self.song.arrangement_overdub = True
# Transition to PRE_ROLL
self._transition_to(RecordingState.PRE_ROLL)
logger.info(f"Recording sequence started: pre-roll until bar {self._pre_roll_target_bar}, "
f"recording until bar {self._target_end_bar}")
return True
except Exception as e:
logger.error(f"Failed to start recording: {e}")
self._handle_error(e)
return False
def stop(self) -> bool:
"""
Manually stop the recording.
Can be called during PRE_ROLL or RECORDING states.
Returns:
True if stopped successfully, False otherwise
"""
if self._state not in (RecordingState.PRE_ROLL, RecordingState.RECORDING):
logger.warning(f"Cannot stop from state {self._state.name}")
return False
try:
# Stop playback
self.song.stop_playing()
# Disable overdub
self.song.arrangement_overdub = False
# Calculate actual end position
actual_end = self._get_current_bar()
logger.info(f"Recording manually stopped at bar {actual_end}")
# Transition to cooldown for verification
self._transition_to(RecordingState.COOLDOWN)
# Trigger verification
self._verify_and_complete()
return True
except Exception as e:
logger.error(f"Failed to stop recording: {e}")
self._handle_error(e)
return False
def update(self) -> None:
"""
Update the state machine.
This method should be called regularly from Ableton's
update_display() loop. It handles:
- Pre-roll timing
- Recording start trigger
- Recording duration tracking
- Automatic stop
- Progress callbacks
"""
if self._state == RecordingState.IDLE:
return
if self._state == RecordingState.ARMED:
# Waiting for start() call
return
if self._state == RecordingState.PRE_ROLL:
self._handle_pre_roll()
return
if self._state == RecordingState.RECORDING:
self._handle_recording()
return
if self._state == RecordingState.COOLDOWN:
# Verification in progress, nothing to do
return
def reset(self) -> None:
"""
Reset the recorder to IDLE state.
Clears all recording state. Can be called from any state.
"""
was_recording = self._state == RecordingState.RECORDING
if was_recording:
try:
self.song.stop_playing()
self.song.arrangement_overdub = False
except Exception as e:
logger.warning(f"Error during reset cleanup: {e}")
old_state = self._state
self._state = RecordingState.IDLE
# Clear all recording data
self._config = None
self._baseline = None
self._new_clips = []
self._new_clip_ids = set()
self._target_start_bar = 0.0
self._target_end_bar = 0.0
self._pre_roll_target_bar = 0.0
self._current_progress = 0.0
if old_state != RecordingState.IDLE:
self._notify_state_change(old_state, RecordingState.IDLE)
logger.info("Recorder reset to IDLE")
def get_state(self) -> RecordingState:
"""Get current recording state."""
return self._state
def get_progress(self) -> float:
"""
Get recording progress from 0.0 to 1.0.
Returns:
Progress value (0.0-1.0), or -1.0 if not recording
"""
if self._state not in (RecordingState.PRE_ROLL, RecordingState.RECORDING, RecordingState.COOLDOWN):
return -1.0
return self._current_progress
def get_new_clips(self) -> List[str]:
"""
Get list of new clip IDs recorded in this session.
Returns:
List of clip identifiers (track_index:clip_index format)
"""
return self._new_clips.copy()
def is_active(self) -> bool:
"""
Check if recorder is in an active state.
Returns:
True if armed, pre-rolling, recording, or in cooldown
"""
return self._state in (
RecordingState.ARMED,
RecordingState.PRE_ROLL,
RecordingState.RECORDING,
RecordingState.COOLDOWN
)
# ========================================================================
# PRIVATE METHODS - State Machine
# ========================================================================
def _transition_to(self, new_state: RecordingState) -> None:
"""Transition to a new state with notification."""
old_state = self._state
self._state = new_state
self._state_entry_time = time.time()
logger.debug(f"State transition: {old_state.name} -> {new_state.name}")
self._notify_state_change(old_state, new_state)
def _notify_state_change(self, old: RecordingState, new: RecordingState) -> None:
"""Notify state change callback."""
if self._config and self._config.on_state_change:
try:
self._config.on_state_change(old, new)
except Exception as e:
logger.warning(f"State change callback error: {e}")
def _notify_progress(self, progress: float) -> None:
"""Notify progress callback (throttled)."""
# Throttle to avoid flooding callbacks
if abs(progress - self._last_progress_emit) < 0.01:
return
self._last_progress_emit = progress
if self._config and self._config.on_progress:
try:
self._config.on_progress(progress)
except Exception as e:
logger.warning(f"Progress callback error: {e}")
def _handle_error(self, error: Exception) -> None:
"""Handle error and transition to FAILED state."""
logger.error(f"Recording error: {error}")
# Notify error callback
if self._config and self._config.on_error:
try:
self._config.on_error(error)
except Exception as e:
logger.warning(f"Error callback failed: {e}")
# Transition to failed state
old_state = self._state
self._state = RecordingState.FAILED
self._notify_state_change(old_state, RecordingState.FAILED)
# Cleanup
try:
self.song.arrangement_overdub = False
except:
pass
def _handle_pre_roll(self) -> None:
"""Handle pre-roll phase - wait until quantized start time."""
current_bar = self._get_current_bar()
# Calculate progress through pre-roll (0.0 = start, 1.0 = recording starts)
if self._config and self._config.pre_roll_bars > 0:
pre_roll_start = self._pre_roll_target_bar - self._config.pre_roll_bars
self._current_progress = (current_bar - pre_roll_start) / self._config.pre_roll_bars
self._current_progress = max(0.0, min(0.99, self._current_progress))
else:
self._current_progress = 0.99
self._notify_progress(self._current_progress)
# Check if we've reached the target bar
if current_bar >= self._pre_roll_target_bar:
self._on_quantized_start()
def _handle_recording(self) -> None:
"""Handle recording phase - track progress and auto-stop."""
current_bar = self._get_current_bar()
# Calculate progress through recording
recording_bars = self._target_end_bar - self._target_start_bar
bars_elapsed = current_bar - self._target_start_bar
self._current_progress = min(1.0, bars_elapsed / recording_bars)
self._notify_progress(self._current_progress)
# Check if recording should end
if current_bar >= self._target_end_bar:
self._on_recording_end()
# ========================================================================
# PRIVATE METHODS - Recording Lifecycle
# ========================================================================
def _verify_preconditions(self) -> None:
"""
Verify that recording can proceed.
Raises:
RuntimeError: If preconditions are not met
"""
if not self.song:
raise RuntimeError("No song object available")
# Check that we have scenes to fire
if not hasattr(self.song, 'scenes') or len(self.song.scenes) == 0:
raise RuntimeError("No scenes available in project")
if self._config and self._config.scene_index >= len(self.song.scenes):
raise RuntimeError(f"Scene index {self._config.scene_index} out of range")
# Check that we have tracks
if not hasattr(self.song, 'tracks') or len(self.song.tracks) == 0:
raise RuntimeError("No tracks available in project")
# Check arrangement_overdub can be set
try:
# Test setting and resetting
original = self.song.arrangement_overdub
self.song.arrangement_overdub = True
self.song.arrangement_overdub = original
except Exception as e:
raise RuntimeError(f"Cannot control arrangement_overdub: {e}")
logger.debug("Preconditions verified successfully")
def _capture_baseline(self) -> ArrangementBaseline:
"""
Capture current arrangement state for later comparison.
Returns:
ArrangementBaseline with current state
"""
clip_ids = set()
clip_positions = {}
clip_count = 0
try:
for track_idx, track in enumerate(self.song.tracks):
if hasattr(track, 'arrangement_clips'):
for clip in track.arrangement_clips:
if clip:
clip_id = f"{track_idx}:{clip.start_time}"
clip_ids.add(clip_id)
clip_positions[clip_id] = (clip.start_time, clip.end_time)
clip_count += 1
# Get current arrangement length
total_length = 0.0
if hasattr(self.song, 'last_event_time'):
total_length = float(self.song.last_event_time)
baseline = ArrangementBaseline(
clip_count=clip_count,
clip_ids=clip_ids,
clip_positions=clip_positions,
total_length=total_length,
timestamp=time.time()
)
logger.debug(f"Captured baseline: {clip_count} clips, length {total_length:.2f} beats")
return baseline
except Exception as e:
logger.warning(f"Could not capture complete baseline: {e}")
return ArrangementBaseline(
clip_count=0,
clip_ids=set(),
clip_positions={},
total_length=0.0,
timestamp=time.time()
)
def _calculate_pre_roll(self) -> float:
"""
Calculate pre-roll time in beats until next bar boundary.
Returns:
Number of beats until next bar
"""
current_time = self._get_current_song_time()
beats_per_bar = 4.0 # Default 4/4
try:
if hasattr(self.song, 'signature_numerator'):
beats_per_bar = float(self.song.signature_numerator)
except:
pass
# Find next bar boundary
current_bar = current_time / beats_per_bar
next_bar_num = int(current_bar) + 1
next_bar_time = next_bar_num * beats_per_bar
pre_roll = next_bar_time - current_time
return max(0.0, pre_roll)
def _on_quantized_start(self) -> None:
"""
Fire at exact bar boundary to start recording.
Fires the scene and begins recording.
"""
try:
# Fire the scene
if self._config:
scene = self.song.scenes[self._config.scene_index]
scene.fire()
# Ensure we're playing and overdubbing
if not self.song.is_playing:
self.song.start_playing()
self.song.arrangement_overdub = True
# Transition to recording
self._transition_to(RecordingState.RECORDING)
logger.info(f"Recording started at bar {self._target_start_bar}")
except Exception as e:
logger.error(f"Failed to start recording at quantized time: {e}")
self._handle_error(e)
def _on_recording_end(self) -> None:
"""
Stop recording and transition to verification.
"""
try:
# Stop playback
self.song.stop_playing()
# Disable overdub
self.song.arrangement_overdub = False
logger.info(f"Recording ended at bar {self._target_end_bar}")
# Transition to cooldown
self._transition_to(RecordingState.COOLDOWN)
# Trigger verification
self._verify_and_complete()
except Exception as e:
logger.error(f"Error ending recording: {e}")
self._handle_error(e)
def _verify_and_complete(self) -> None:
"""
Verify recording success and transition to COMPLETED or FAILED.
"""
try:
success, new_clips = self._verify_recording_success()
if success:
self._new_clips = new_clips
self._transition_to(RecordingState.COMPLETED)
# Notify completion
if self._config and self._config.on_completed:
try:
self._config.on_completed(new_clips)
except Exception as e:
logger.warning(f"Completion callback error: {e}")
logger.info(f"Recording completed successfully with {len(new_clips)} new clips")
else:
error = RuntimeError("Recording verification failed - no new clips detected")
self._handle_error(error)
except Exception as e:
logger.error(f"Verification failed: {e}")
self._handle_error(e)
def _verify_recording_success(self) -> Tuple[bool, List[str]]:
"""
Compare before/after state to verify recording succeeded.
Returns:
Tuple of (success: bool, new_clip_ids: list)
"""
if not self._baseline:
logger.warning("No baseline captured, cannot verify")
return (True, []) # Assume success if we can't verify
try:
# Capture current state
current_count = 0
current_ids = set()
for track_idx, track in enumerate(self.song.tracks):
if hasattr(track, 'arrangement_clips'):
for clip in track.arrangement_clips:
if clip:
clip_id = f"{track_idx}:{clip.start_time}"
current_ids.add(clip_id)
current_count += 1
# Find new clips
new_clip_ids = current_ids - self._baseline.clip_ids
# Heuristic: at least one new clip should exist
# But sometimes clips are merged or extended, so we also check count
success = len(new_clip_ids) > 0 or current_count > self._baseline.clip_count
if not success:
logger.warning(f"Verification failed: {self._baseline.clip_count} -> {current_count} clips, "
f"{len(new_clip_ids)} new")
else:
logger.debug(f"Verification passed: {len(new_clip_ids)} new clips")
return (success, list(new_clip_ids))
except Exception as e:
logger.error(f"Error during verification: {e}")
return (False, [])
# ========================================================================
# PRIVATE METHODS - Utilities
# ========================================================================
def _get_current_bar(self) -> float:
"""
Get current song position in bars (musical time).
Returns:
Current bar number (can be fractional)
"""
try:
beats = float(self.song.current_song_time)
beats_per_bar = 4.0
if hasattr(self.song, 'signature_numerator'):
beats_per_bar = float(self.song.signature_numerator)
return beats / beats_per_bar
except Exception as e:
logger.warning(f"Error getting current bar: {e}")
return 0.0
def _get_current_song_time(self) -> float:
"""
Get current song position in beats.
Returns:
Current position in beats
"""
try:
return float(self.song.current_song_time)
except Exception as e:
logger.warning(f"Error getting song time: {e}")
return 0.0
def __repr__(self) -> str:
"""String representation for debugging."""
state = self._state.name
progress = f"{self._current_progress:.1%}" if self._current_progress >= 0 else "N/A"
return f"ArrangementRecorder(state={state}, progress={progress})"

View File

@@ -0,0 +1,613 @@
"""
AudioAnalyzerDual - Dual-backend audio analyzer for AbletonMCP_AI
Primary: librosa for full spectral analysis
Fallback: filename-based inference when librosa unavailable
This module provides intelligent audio sample analysis with graceful
degradation when heavy dependencies aren't available.
"""
import os
import re
import wave
import struct
from dataclasses import dataclass, field
from typing import Optional, List, Dict, Tuple, Any
from pathlib import Path
@dataclass
class AudioFeatures:
"""Complete audio feature set for sample analysis."""
bpm: Optional[float]
key: Optional[str]
key_confidence: float
duration: float
sample_rate: int
sample_type: str
spectral_centroid: float
spectral_rolloff: float
zero_crossing_rate: float
rms_energy: float
is_harmonic: bool
is_percussive: bool
suggested_genres: List[str] = field(default_factory=list)
groove_template: Optional[Dict] = None
transients: Optional[List[float]] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert features to dictionary for serialization."""
return {
'bpm': self.bpm,
'key': self.key,
'key_confidence': self.key_confidence,
'duration': self.duration,
'sample_rate': self.sample_rate,
'sample_type': self.sample_type,
'spectral_centroid': self.spectral_centroid,
'spectral_rolloff': self.spectral_rolloff,
'zero_crossing_rate': self.zero_crossing_rate,
'rms_energy': self.rms_energy,
'is_harmonic': self.is_harmonic,
'is_percussive': self.is_percussive,
'suggested_genres': self.suggested_genres,
'groove_template': self.groove_template,
'transients': self.transients
}
class AudioAnalyzerDual:
"""
Dual-backend audio analyzer:
- Primary: librosa for full spectral analysis
- Fallback: filename-based inference when librosa unavailable
"""
# Key profiles for Krumhansl-Schmuckler algorithm (major and minor)
KRUMHANSL_MAJOR = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
KRUMHANSL_MINOR = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
# Circle of fifths positions for key detection
KEY_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
KEY_NAMES_FLAT = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']
# Genre suggestions based on BPM ranges
GENRE_BPM_RANGES = {
'reggaeton': (85, 100),
'trap': (130, 150),
'hip_hop': (85, 110),
'house': (120, 130),
'techno': (125, 140),
'dubstep': (140, 150),
'drum_and_bass': (160, 180),
'pop': (100, 130),
'rock': (120, 140),
'jazz': (120, 180),
'ambient': (60, 85),
'lofi': (70, 90)
}
# Sample type keywords for filename-based classification
TYPE_KEYWORDS = {
'kick': ['kick', 'bd', 'bass_drum', 'kck'],
'snare': ['snare', 'sd', 'rim', 'snr'],
'clap': ['clap', 'cp'],
'hihat': ['hihat', 'hat', 'hh', 'hi_hat', 'openhat', 'closedhat'],
'perc': ['perc', 'percussion', 'bongo', 'conga', 'timbal'],
'tom': ['tom', 'toms'],
'cymbal': ['cymbal', 'crash', 'ride', 'splash'],
'bass': ['bass', 'sub', '808', 'bassline'],
'synth': ['synth', 'pad', 'lead', 'pluck', 'arp'],
'fx': ['fx', 'effect', 'riser', 'downer', 'sweep', 'impact'],
'vocal': ['vocal', 'voice', 'vox', 'chant'],
'loop': ['loop', 'full', 'groove']
}
def __init__(self, backend="auto"):
"""Initialize the analyzer with specified backend."""
self.backend = self._detect_backend(backend)
self.librosa = None
self.numpy = None
self._init_libraries()
def _detect_backend(self, preferred):
"""Detect and return the appropriate backend."""
if preferred == "librosa":
try:
import librosa
import numpy as np
return "librosa"
except ImportError:
return "basic"
elif preferred == "basic":
return "basic"
else: # auto
try:
import librosa
import numpy as np
return "librosa"
except ImportError:
return "basic"
def _init_libraries(self):
"""Initialize library references if available."""
if self.backend == "librosa":
try:
import librosa
import numpy as np
self.librosa = librosa
self.numpy = np
except ImportError:
self.backend = "basic"
self.librosa = None
self.numpy = None
def analyze_sample(self, file_path):
"""
Main entry point for audio analysis.
Args:
file_path: Path to audio file
Returns:
AudioFeatures dataclass with analysis results
"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"Audio file not found: {file_path}")
if self.backend == "librosa":
try:
return self._analyze_with_librosa(file_path)
except Exception as e:
# Fall back to basic analysis if librosa fails
return self._analyze_basic(file_path, error_context=str(e))
else:
return self._analyze_basic(file_path)
def _analyze_with_librosa(self, file_path):
"""
Full analysis using librosa:
1. Load audio: librosa.load()
2. Detect BPM: librosa.beat.beat_track()
3. Extract spectral: centroid, rolloff, zcr, rms
4. Detect key: chromagram + Krumhansl-Schmuckler
5. HPSS: harmonic/percussive separation
6. Classify type based on features
7. Extract groove template (for drums)
8. Suggest genres based on BPM
"""
y, sr = self.librosa.load(file_path, sr=None)
# Basic info
duration = self.librosa.get_duration(y=y, sr=sr)
# BPM detection
bpm = self._detect_bpm_librosa(y, sr)
# Spectral features
spectral_centroid = float(self.numpy.mean(self.librosa.feature.spectral_centroid(y=y, sr=sr)))
spectral_rolloff = float(self.numpy.mean(self.librosa.feature.spectral_rolloff(y=y, sr=sr)))
zero_crossing_rate = float(self.numpy.mean(self.librosa.feature.zero_crossing_rate(y)))
rms_energy = float(self.numpy.mean(self.librosa.feature.rms(y=y)))
# Key detection
key, key_confidence = self._detect_key_librosa(y, sr)
# HPSS separation
y_harmonic, y_percussive = self.librosa.effects.hpss(y)
harmonic_energy = self.numpy.sum(y_harmonic ** 2)
percussive_energy = self.numpy.sum(y_percussive ** 2)
total_energy = harmonic_energy + percussive_energy
is_harmonic = (harmonic_energy / total_energy) > 0.6 if total_energy > 0 else False
is_percussive = (percussive_energy / total_energy) > 0.6 if total_energy > 0 else False
# Classify sample type
sample_type = self._classify_sample_type(file_path, is_harmonic, is_percussive, spectral_centroid)
# Extract groove template for drum loops
groove_template = None
transients = None
if is_percussive or sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'loop']:
groove_template = self._extract_groove_template(y, sr)
transients = groove_template.get('transient_positions', []) if groove_template else []
# Genre suggestions
suggested_genres = self._suggest_genres(bpm)
return AudioFeatures(
bpm=bpm,
key=key,
key_confidence=key_confidence,
duration=duration,
sample_rate=sr,
sample_type=sample_type,
spectral_centroid=spectral_centroid,
spectral_rolloff=spectral_rolloff,
zero_crossing_rate=zero_crossing_rate,
rms_energy=rms_energy,
is_harmonic=is_harmonic,
is_percussive=is_percussive,
suggested_genres=suggested_genres,
groove_template=groove_template,
transients=transients
)
def _analyze_basic(self, file_path, error_context=None):
"""
Filename-based analysis:
- Extract BPM from filename patterns
- Extract key from filename patterns
- Estimate duration (if wave module available)
- Classify type by keyword matching
- Set default spectral features based on type
"""
filename = os.path.basename(file_path)
# Extract info from filename
bpm = self._extract_bpm_from_name(filename)
key = self._extract_key_from_name(filename)
sample_type = self._classify_by_filename(filename)
# Try to get duration from wave header
duration, sample_rate = self._get_wave_info(file_path)
# Set default spectral features based on type
defaults = self._get_default_features_by_type(sample_type)
# Suggest genres based on BPM
suggested_genres = self._suggest_genres(bpm)
# Determine harmonic/percussive nature by type
is_harmonic = sample_type in ['synth', 'bass', 'vocal', 'pad', 'lead', 'pluck']
is_percussive = sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'tom', 'cymbal']
return AudioFeatures(
bpm=bpm,
key=key,
key_confidence=0.5 if key else 0.0, # Moderate confidence for filename-based
duration=duration,
sample_rate=sample_rate,
sample_type=sample_type,
spectral_centroid=defaults['spectral_centroid'],
spectral_rolloff=defaults['spectral_rolloff'],
zero_crossing_rate=defaults['zero_crossing_rate'],
rms_energy=defaults['rms_energy'],
is_harmonic=is_harmonic,
is_percussive=is_percussive,
suggested_genres=suggested_genres,
groove_template=None,
transients=None
)
def _detect_key_librosa(self, y, sr):
"""
Uses chromagram and Krumhansl-Schmuckler key profiles.
Returns:
(key, confidence)
"""
# Compute chromagram
chromagram = self.librosa.feature.chroma_stft(y=y, sr=sr)
chroma_mean = self.numpy.mean(chromagram, axis=1)
# Calculate correlation with major and minor profiles for all keys
best_score = -1
best_key = None
best_mode = None
for shift in range(12):
# Rotate chroma to test this key
rotated_chroma = self.numpy.roll(chroma_mean, shift)
# Normalize
rotated_chroma = rotated_chroma / (self.numpy.sum(rotated_chroma) + 1e-10)
# Correlation with major
major_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MAJOR)[0, 1]
if major_corr > best_score:
best_score = major_corr
best_key = shift
best_mode = 'major'
# Correlation with minor
minor_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MINOR)[0, 1]
if minor_corr > best_score:
best_score = minor_corr
best_key = shift
best_mode = 'minor'
# Convert to key name
key_name = self.KEY_NAMES[best_key]
if best_mode == 'minor':
key_name += 'm'
# Confidence is the correlation score (normalized to 0-1)
confidence = (best_score + 1) / 2 # Convert from [-1, 1] to [0, 1]
confidence = max(0.0, min(1.0, confidence))
return key_name, confidence
def _extract_key_from_name(self, filename):
r"""
Extract key from filename using regex patterns.
Patterns:
- [_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]
- \bin\s+([A-G][#b]?(?:m|min|minor)?)\b
- Key[_\s]?([A-G][#b]?m?)
"""
# Pattern 1: Key surrounded by separators
pattern1 = r'[_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]'
match = re.search(pattern1, filename, re.IGNORECASE)
if match:
return self._normalize_key(match.group(1))
# Pattern 2: "in Key" format
pattern2 = r'\bin\s+([A-G][#b]?(?:m|min|minor)?)\b'
match = re.search(pattern2, filename, re.IGNORECASE)
if match:
return self._normalize_key(match.group(1))
# Pattern 3: Key prefix
pattern3 = r'Key[_\s]?([A-G][#b]?m?)'
match = re.search(pattern3, filename, re.IGNORECASE)
if match:
return self._normalize_key(match.group(1))
return None
def _normalize_key(self, key_str):
"""Normalize key string to standard format."""
key_str = key_str.strip().upper()
# Handle variations
if 'MINOR' in key_str or key_str.endswith('MIN'):
root = key_str.replace('MINOR', '').replace('MIN', '').strip()
return root + 'm'
# Handle flat/sharp notation
if 'B' in key_str and '#' not in key_str and len(key_str) > 1:
# Convert flats to sharps where applicable
flat_to_sharp = {'DB': 'C#', 'EB': 'D#', 'GB': 'F#', 'AB': 'G#', 'BB': 'A#'}
root = key_str.rstrip('M').rstrip('m')
if root in flat_to_sharp:
key_str = flat_to_sharp[root] + ('m' if 'm' in key_str.lower() else '')
return key_str
def _detect_bpm_librosa(self, y, sr):
"""Detect BPM using librosa.beat.beat_track()."""
try:
tempo, _ = self.librosa.beat.beat_track(y=y, sr=sr)
if isinstance(tempo, self.numpy.ndarray):
tempo = float(tempo.item())
return float(tempo) if tempo > 0 else None
except Exception:
return None
def _extract_bpm_from_name(self, filename):
r"""
Extract BPM from filename using regex patterns.
Patterns:
- [_\s\-](\d{2,3})\s*BPM
- [_\s\-](\d{2,3})[_\s\-]
- (\d{2,3})bpm
Range validation: 60-200 BPM
"""
# Pattern 1: Explicit BPM suffix
pattern1 = r'[_\s\-](\d{2,3})\s*BPM'
match = re.search(pattern1, filename, re.IGNORECASE)
if match:
bpm = int(match.group(1))
if 60 <= bpm <= 200:
return float(bpm)
# Pattern 2: Number surrounded by separators
pattern2 = r'[_\s\-](\d{2,3})[_\s\-]'
matches = re.findall(pattern2, filename)
for m in matches:
bpm = int(m)
if 60 <= bpm <= 200:
return float(bpm)
# Pattern 3: BPM suffix without separator
pattern3 = r'(\d{2,3})bpm'
match = re.search(pattern3, filename, re.IGNORECASE)
if match:
bpm = int(match.group(1))
if 60 <= bpm <= 200:
return float(bpm)
return None
def _extract_groove_template(self, y, sr):
"""
Extract groove template for drum loops.
For drum loops:
1. Detect transients: librosa.onset.onset_detect()
2. Filter by RMS threshold
3. Categorize by velocity: kick-like, snare-like, hat-like
4. Map to beat grid
5. Return template dict
"""
# Detect onsets
onset_frames = self.librosa.onset.onset_detect(y=y, sr=sr)
onset_times = self.librosa.frames_to_time(onset_frames, sr=sr)
# Calculate RMS around each onset for velocity
hop_length = 512
rms = self.librosa.feature.rms(y=y, hop_length=hop_length)[0]
# Filter by RMS threshold
rms_threshold = self.numpy.mean(rms) * 0.5
transients = []
for onset_time in onset_times:
frame_idx = self.librosa.time_to_frames(onset_time, sr=sr, hop_length=hop_length)
if frame_idx < len(rms) and rms[frame_idx] > rms_threshold:
transients.append({
'time': float(onset_time),
'velocity': float(rms[frame_idx]),
'category': self._categorize_transient(rms[frame_idx], self.numpy.mean(rms))
})
# Map to beat grid (assume 4/4, map to 16th notes)
if transients:
max_time = max(t['time'] for t in transients)
num_beats = max(4, int(max_time / (60.0 / 95.0))) # Assume 95 BPM if unknown
grid_positions = []
for t in transients:
beat_pos = (t['time'] / max_time) * num_beats
sixteenth = int((beat_pos % 1) * 16)
grid_positions.append({
'beat': int(beat_pos),
'sixteenth': sixteenth,
'velocity': t['velocity'],
'category': t['category']
})
return {
'transient_positions': [t['time'] for t in transients],
'grid_positions': grid_positions,
'num_beats': num_beats,
'kick_positions': [p for p in grid_positions if p['category'] == 'kick'],
'snare_positions': [p for p in grid_positions if p['category'] == 'snare'],
'hat_positions': [p for p in grid_positions if p['category'] == 'hat']
}
return None
def _categorize_transient(self, velocity, mean_rms):
"""Categorize transient by velocity level."""
ratio = velocity / (mean_rms + 1e-10)
if ratio > 1.5:
return 'kick'
elif ratio > 0.8:
return 'snare'
else:
return 'hat'
def _classify_sample_type(self, file_path, is_harmonic, is_percussive, spectral_centroid):
"""Classify sample type based on analysis and filename."""
filename = os.path.basename(file_path).lower()
# First try filename matching
type_by_name = self._classify_by_filename(filename)
if type_by_name != 'unknown':
return type_by_name
# Fall back to spectral classification
if is_percussive:
if spectral_centroid < 500:
return 'kick'
elif spectral_centroid < 2000:
return 'snare'
elif spectral_centroid < 8000:
return 'hihat'
else:
return 'cymbal'
elif is_harmonic:
if spectral_centroid < 500:
return 'bass'
elif spectral_centroid < 2000:
return 'synth'
else:
return 'synth'
return 'unknown'
def _classify_by_filename(self, filename):
"""Classify sample type by keywords in filename."""
filename_lower = filename.lower()
for sample_type, keywords in self.TYPE_KEYWORDS.items():
for keyword in keywords:
if keyword in filename_lower:
return sample_type
return 'unknown'
def _get_default_features_by_type(self, sample_type):
"""Return default spectral features based on sample type."""
defaults = {
'kick': {'spectral_centroid': 300, 'spectral_rolloff': 800, 'zero_crossing_rate': 0.05, 'rms_energy': 0.3},
'snare': {'spectral_centroid': 1500, 'spectral_rolloff': 4000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.25},
'clap': {'spectral_centroid': 2000, 'spectral_rolloff': 5000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2},
'hihat': {'spectral_centroid': 8000, 'spectral_rolloff': 15000, 'zero_crossing_rate': 0.3, 'rms_energy': 0.1},
'perc': {'spectral_centroid': 2500, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.2, 'rms_energy': 0.2},
'tom': {'spectral_centroid': 800, 'spectral_rolloff': 2000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.25},
'cymbal': {'spectral_centroid': 10000, 'spectral_rolloff': 18000, 'zero_crossing_rate': 0.35, 'rms_energy': 0.15},
'bass': {'spectral_centroid': 400, 'spectral_rolloff': 1200, 'zero_crossing_rate': 0.03, 'rms_energy': 0.2},
'synth': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.15},
'fx': {'spectral_centroid': 5000, 'spectral_rolloff': 12000, 'zero_crossing_rate': 0.25, 'rms_energy': 0.2},
'vocal': {'spectral_centroid': 2000, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.18},
'loop': {'spectral_centroid': 2500, 'spectral_rolloff': 7000, 'zero_crossing_rate': 0.12, 'rms_energy': 0.2},
'unknown': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2}
}
return defaults.get(sample_type, defaults['unknown'])
def _suggest_genres(self, bpm):
"""Suggest genres based on BPM."""
if bpm is None:
return []
suggestions = []
for genre, (min_bpm, max_bpm) in self.GENRE_BPM_RANGES.items():
if min_bpm <= bpm <= max_bpm:
suggestions.append(genre)
return suggestions
def _get_wave_info(self, file_path):
"""Try to get duration and sample rate from wave file header."""
duration = 0.0
sample_rate = 44100
try:
if file_path.lower().endswith('.wav'):
with wave.open(file_path, 'rb') as wf:
sample_rate = wf.getframerate()
n_frames = wf.getnframes()
duration = n_frames / sample_rate
except Exception:
# If wave fails, try to estimate from file size (rough)
try:
file_size = os.path.getsize(file_path)
# Rough estimate: assume 16-bit stereo at 44.1kHz = ~176KB per second
duration = file_size / (44100 * 2 * 2)
except Exception:
duration = 0.0
return duration, sample_rate
def get_backend_info(self):
"""Return information about current backend."""
return {
'backend': self.backend,
'librosa_available': self.librosa is not None,
'numpy_available': self.numpy is not None,
'version': '1.0.0'
}
# Convenience function for direct usage
def analyze_audio(file_path, backend="auto"):
"""
Analyze an audio file and return features.
Args:
file_path: Path to audio file
backend: "auto", "librosa", or "basic"
Returns:
AudioFeatures dataclass
"""
analyzer = AudioAnalyzerDual(backend=backend)
return analyzer.analyze_sample(file_path)

View File

@@ -0,0 +1,996 @@
"""
Professional Bus and Return Architecture for AbletonMCP_AI
Implements professional mixing architecture with:
- Bus groups (drums, bass, music, vocal, fx)
- Return tracks with effects (space/reverb, echo/delay, heat/saturation, glue/compression)
- Role-based mix profiles
- Master chain processing
"""
from __future__ import absolute_import, print_function, unicode_literals
# =============================================================================
# BUS GAIN CALIBRATION
# =============================================================================
BUS_GAIN_CALIBRATION = {
'drums': {
'volume': 0.92,
'compressor_threshold': -16.0,
'compressor_ratio': 4.0,
'saturator_drive': 0.6,
'pan': 0.0
},
'bass': {
'volume': 0.88,
'compressor_threshold': -18.0,
'compressor_ratio': 3.0,
'saturator_drive': 0.4,
'pan': 0.0
},
'music': {
'volume': 0.85,
'compressor_threshold': -20.0,
'compressor_ratio': 2.5,
'pan': 0.0
},
'vocal': {
'volume': 0.82,
'compressor_threshold': -16.0,
'compressor_ratio': 3.0,
'pan': 0.0
},
'fx': {
'volume': 0.78,
'compressor_threshold': -22.0,
'compressor_ratio': 2.0,
'pan': 0.0
}
}
# =============================================================================
# RETURN TRACK CONFIGURATION
# =============================================================================
RETURN_CONFIG = {
'space': { # Reverb
'device': 'Reverb',
'default_params': {
'PreDelay': 20.0,
'DecayTime': 2500.0,
'Size': 0.7,
'DryWet': 0.3
}
},
'echo': { # Delay
'device': 'Delay',
'default_params': {
'DelayTime': '1/8',
'Feedback': 0.35,
'DryWet': 0.25
}
},
'heat': { # Saturation
'device': 'Saturator',
'default_params': {
'Drive': 6.0,
'Type': 0, # Analog
'DryWet': 0.2
}
},
'glue': { # Bus Compression
'device': 'Compressor',
'default_params': {
'Threshold': -20.0,
'Ratio': 2.0,
'Attack': 10.0,
'Release': 100.0,
'DryWet': 0.15
}
}
}
# =============================================================================
# ROLE MIX PROFILES
# =============================================================================
ROLE_MIX = {
'kick': {
'volume': 0.85,
'pan': 0.0,
'sends': {'glue': 0.08},
'bus': 'drums'
},
'snare': {
'volume': 0.82,
'pan': 0.0,
'sends': {'space': 0.12, 'echo': 0.05, 'glue': 0.10},
'bus': 'drums'
},
'clap': {
'volume': 0.78,
'pan': 0.0,
'sends': {'space': 0.14, 'echo': 0.04, 'heat': 0.02, 'glue': 0.10},
'bus': 'drums'
},
'hat_closed': {
'volume': 0.72,
'pan': 0.15,
'sends': {'space': 0.08, 'glue': 0.05},
'bus': 'drums'
},
'hat_open': {
'volume': 0.75,
'pan': -0.15,
'sends': {'space': 0.15, 'glue': 0.06},
'bus': 'drums'
},
'bass': {
'volume': 0.78,
'pan': 0.0,
'sends': {'heat': 0.04, 'glue': 0.12},
'bus': 'bass'
},
'sub_bass': {
'volume': 0.80,
'pan': 0.0,
'sends': {'glue': 0.10},
'bus': 'bass'
},
'lead': {
'volume': 0.76,
'pan': 0.25,
'sends': {'space': 0.20, 'echo': 0.15, 'glue': 0.08},
'bus': 'music'
},
'pad': {
'volume': 0.70,
'pan': -0.20,
'sends': {'space': 0.35, 'echo': 0.10, 'glue': 0.06},
'bus': 'music'
},
'pluck': {
'volume': 0.74,
'pan': 0.30,
'sends': {'space': 0.18, 'echo': 0.12, 'glue': 0.07},
'bus': 'music'
},
'chords': {
'volume': 0.72,
'pan': 0.0,
'sends': {'space': 0.25, 'echo': 0.08, 'glue': 0.07},
'bus': 'music'
},
'fx': {
'volume': 0.68,
'pan': 0.0,
'sends': {'space': 0.40, 'echo': 0.20},
'bus': 'fx'
},
'vocal': {
'volume': 0.80,
'pan': 0.0,
'sends': {'space': 0.25, 'echo': 0.12, 'heat': 0.03, 'glue': 0.10},
'bus': 'vocal'
}
}
# =============================================================================
# MASTER CHAIN CONFIGURATION
# =============================================================================
MASTER_CHAIN = {
'eq': {
'device': 'EQEight',
'params': {
'GainLow': 0.0,
'FreqLowest': 30.0,
'GainMid': 0.0,
'GainHigh': 0.0
}
},
'compressor': {
'device': 'Compressor',
'params': {
'Threshold': -6.0,
'Ratio': 2.0,
'Attack': 3.0,
'Release': 60.0,
'DryWet': 100.0
}
},
'limiter': {
'device': 'Limiter',
'params': {
'Gain': 0.0,
'Ceiling': -0.3
}
}
}
# =============================================================================
# BUS ARCHITECTURE IMPLEMENTATION
# =============================================================================
class BusArchitecture:
"""Professional bus and return architecture manager."""
def __init__(self, ableton_conn):
"""
Initialize with Ableton connection.
Args:
ableton_conn: The Ableton Live connection (self from __init__.py)
"""
self.conn = ableton_conn
self._song = ableton_conn._song if hasattr(ableton_conn, '_song') else None
self._bus_indices = {} # bus_name -> track_index
self._return_indices = {} # return_name -> return_track_index
def create_bus_track(self, bus_name, bus_type='audio'):
"""
Creates a bus (group) track for submixing.
Args:
bus_name: Name for the bus track (e.g., "BUS Drums")
bus_type: 'audio' or 'midi' (default 'audio')
Returns:
dict: Creation status with track_index
"""
if self._song is None:
return {"error": "No song connection available"}
try:
# Create appropriate track type
if bus_type.lower() == 'midi':
self._song.create_midi_track(-1)
else:
self._song.create_audio_track(-1)
idx = len(self._song.tracks) - 1
track = self._song.tracks[idx]
track.name = str(bus_name)
# Store the index
self._bus_indices[bus_name] = idx
return {
"bus_created": True,
"track_index": idx,
"bus_name": str(bus_name),
"bus_type": bus_type
}
except Exception as e:
return {
"bus_created": False,
"error": str(e),
"bus_name": str(bus_name)
}
def create_return_track(self, return_name, effect_type=None):
"""
Creates a return track with optional effect.
Args:
return_name: Name for the return track (e.g., "Reverb", "Delay")
effect_type: Effect device name to insert (e.g., "Reverb", "Delay")
Returns:
dict: Creation status with return_track_index
"""
if self._song is None:
return {"error": "No song connection available"}
try:
# Create return track using Live API
if hasattr(self._song, 'create_return_track'):
self._song.create_return_track(-1)
else:
# Fallback: create audio track and use as return
self._song.create_audio_track(-1)
# Return tracks are after regular tracks in Live
if hasattr(self._song, 'return_tracks'):
idx = len(self._song.return_tracks) - 1
return_track = self._song.return_tracks[idx]
else:
# Fallback: use last created track
idx = len(self._song.tracks) - 1
return_track = self._song.tracks[idx]
return_track.name = str(return_name)
# Store the index
self._return_indices[return_name] = idx
result = {
"return_created": True,
"return_index": idx,
"return_name": str(return_name)
}
# Insert effect if specified
if effect_type:
device_result = self._insert_device_on_return(idx, effect_type)
result["device_inserted"] = device_result
return result
except Exception as e:
return {
"return_created": False,
"error": str(e),
"return_name": str(return_name)
}
def _insert_device_on_return(self, return_index, device_name):
"""Insert a device on a return track."""
try:
if hasattr(self._song, 'return_tracks'):
track = self._song.return_tracks[return_index]
else:
track = self._song.tracks[return_index]
# Use the connection's device insertion if available
if hasattr(self.conn, '_browser_load_device'):
return self.conn._browser_load_device(track, device_name)
return False
except Exception as e:
return False
def route_track_to_bus(self, track_index, bus_name):
"""
Routes a track's output to a bus track.
In Ableton Live, this is typically done by grouping tracks or setting
output routing. Since direct API routing is limited, this sets up
the conceptual routing and returns guidance.
Args:
track_index: Index of the source track
bus_name: Name of the bus track to route to
Returns:
dict: Routing status
"""
if self._song is None:
return {"error": "No song connection available"}
try:
src_idx = int(track_index)
src_track = self._song.tracks[src_idx]
# Find the bus track
bus_idx = None
bus_track = None
# Check our stored indices first
if bus_name in self._bus_indices:
bus_idx = self._bus_indices[bus_name]
bus_track = self._song.tracks[bus_idx]
else:
# Search by name
for i, t in enumerate(self._song.tracks):
if bus_name.lower() in str(t.name).lower():
bus_idx = i
bus_track = t
break
if bus_track is None:
return {
"routed": False,
"error": "Bus track '%s' not found" % bus_name
}
# Try to configure output routing through mixer device
# Note: Full output routing API varies by Live version
mixer = src_track.mixer_device
# Attempt to set up sends to the bus if available
sends_configured = 0
if hasattr(mixer, 'sends'):
for send in mixer.sends:
if hasattr(send, 'target_track') and send.target_track == bus_track:
# Send already targets this bus
sends_configured += 1
break
# Try output routing if available
output_set = False
if hasattr(src_track, 'output_routing_type'):
# Some Live versions support this
try:
src_track.output_routing_type = bus_track
output_set = True
except:
pass
elif hasattr(src_track, 'output_routing_channel'):
try:
src_track.output_routing_channel = bus_track
output_set = True
except:
pass
return {
"routed": True,
"track_index": src_idx,
"track_name": str(src_track.name),
"bus_index": bus_idx,
"bus_name": str(bus_name),
"output_routing_set": output_set,
"sends_configured": sends_configured,
"note": "Manual grouping in Live may be needed for complete bus routing"
}
except Exception as e:
return {
"routed": False,
"track_index": track_index,
"error": str(e)
}
def set_track_send(self, track_index, return_name, amount):
"""
Sets send amount from a track to a return track.
Args:
track_index: Index of the source track
return_name: Name of the return track
amount: Send amount 0.0-1.0
Returns:
dict: Send configuration status
"""
if self._song is None:
return {"error": "No song connection available"}
try:
track_idx = int(track_index)
track = self._song.tracks[track_idx]
send_amount = float(amount)
# Find return track index
return_idx = None
if return_name in self._return_indices:
return_idx = self._return_indices[return_name]
else:
# Search in return tracks
if hasattr(self._song, 'return_tracks'):
for i, rt in enumerate(self._song.return_tracks):
if return_name.lower() in str(rt.name).lower():
return_idx = i
break
if return_idx is None:
return {
"send_set": False,
"error": "Return track '%s' not found" % return_name
}
# Configure send via mixer device
mixer = track.mixer_device
sends_configured = 0
if hasattr(mixer, 'sends') and return_idx < len(mixer.sends):
send = mixer.sends[return_idx]
if hasattr(send, 'value'):
send.value = send_amount
sends_configured = 1
return {
"send_set": sends_configured > 0,
"track_index": track_idx,
"track_name": str(track.name),
"return_name": str(return_name),
"return_index": return_idx,
"amount": send_amount,
"sends_configured": sends_configured
}
except Exception as e:
return {
"send_set": False,
"track_index": track_index,
"error": str(e)
}
def configure_bus_gain(self, bus_name):
"""
Configure bus track with professional gain calibration settings.
Args:
bus_name: Name of the bus (must match BUS_GAIN_CALIBRATION keys)
Returns:
dict: Configuration status
"""
if bus_name not in BUS_GAIN_CALIBRATION:
return {
"configured": False,
"error": "Unknown bus name '%s'. Valid: %s" % (bus_name, list(BUS_GAIN_CALIBRATION.keys()))
}
config = BUS_GAIN_CALIBRATION[bus_name]
# Find the bus track
bus_idx = self._bus_indices.get(bus_name)
if bus_idx is None:
# Search by name pattern
for i, t in enumerate(self._song.tracks):
if bus_name.lower() in str(t.name).lower() or ('bus' in str(t.name).lower() and bus_name.lower() in str(t.name).lower()):
bus_idx = i
break
if bus_idx is None:
return {
"configured": False,
"error": "Bus track '%s' not found" % bus_name
}
try:
track = self._song.tracks[bus_idx]
# Set volume
track.mixer_device.volume.value = config['volume']
# Set pan
track.mixer_device.panning.value = config['pan']
return {
"configured": True,
"bus_name": bus_name,
"bus_index": bus_idx,
"volume": config['volume'],
"pan": config['pan'],
"note": "Compressor and saturator settings available for manual application"
}
except Exception as e:
return {
"configured": False,
"bus_name": bus_name,
"error": str(e)
}
def configure_return_effect(self, return_name):
"""
Configure return track effect with default parameters.
Args:
return_name: Name of the return (must match RETURN_CONFIG keys)
Returns:
dict: Configuration status
"""
if return_name not in RETURN_CONFIG:
return {
"configured": False,
"error": "Unknown return name '%s'. Valid: %s" % (return_name, list(RETURN_CONFIG.keys()))
}
config = RETURN_CONFIG[return_name]
# Find the return track
return_idx = self._return_indices.get(return_name)
if return_idx is None:
# Search in return tracks
if hasattr(self._song, 'return_tracks'):
for i, rt in enumerate(self._song.return_tracks):
if return_name.lower() in str(rt.name).lower():
return_idx = i
break
if return_idx is None:
return {
"configured": False,
"error": "Return track '%s' not found" % return_name
}
try:
# Get the return track
if hasattr(self._song, 'return_tracks'):
track = self._song.return_tracks[return_idx]
else:
track = self._song.tracks[return_idx]
# Find the effect device
device = None
for d in track.devices:
if config['device'].lower() in str(d.name).lower():
device = d
break
if device is None:
return {
"configured": False,
"return_name": return_name,
"error": "Device '%s' not found on return track" % config['device']
}
# Configure parameters
params_set = 0
if hasattr(device, 'parameters'):
for param in device.parameters:
param_name = str(param.name)
for key, value in config['default_params'].items():
if key in param_name:
try:
if isinstance(value, str):
# Handle string values like '1/8' for delay time
# This may need manual adjustment in Live
pass
else:
param.value = float(value)
params_set += 1
except Exception:
pass
break
return {
"configured": True,
"return_name": return_name,
"return_index": return_idx,
"device": config['device'],
"parameters_set": params_set,
"target_params": list(config['default_params'].keys())
}
except Exception as e:
return {
"configured": False,
"return_name": return_name,
"error": str(e)
}
def apply_role_mix(self, track_index, role):
"""
Apply role-based mix settings to a track.
Args:
track_index: Index of the track
role: Role name (must match ROLE_MIX keys)
Returns:
dict: Application status
"""
if role not in ROLE_MIX:
return {
"applied": False,
"error": "Unknown role '%s'. Valid: %s" % (role, list(ROLE_MIX.keys()))
}
config = ROLE_MIX[role]
try:
track_idx = int(track_index)
track = self._song.tracks[track_idx]
# Set volume
track.mixer_device.volume.value = config['volume']
# Set pan
track.mixer_device.panning.value = config['pan']
# Configure sends
sends_configured = []
for return_name, amount in config['sends'].items():
result = self.set_track_send(track_idx, return_name, amount)
sends_configured.append({
"return": return_name,
"amount": amount,
"status": result.get("send_set", False)
})
return {
"applied": True,
"track_index": track_idx,
"track_name": str(track.name),
"role": role,
"volume": config['volume'],
"pan": config['pan'],
"target_bus": config['bus'],
"sends": sends_configured
}
except Exception as e:
return {
"applied": False,
"track_index": track_index,
"role": role,
"error": str(e)
}
def configure_master_chain(self):
"""
Configure master track with professional mastering chain.
Returns:
dict: Configuration status
"""
try:
master = self._song.master_track
devices_found = {}
# Check for existing devices
for chain_type, chain_config in MASTER_CHAIN.items():
device_name = chain_config['device']
device = None
for d in master.devices:
if device_name.lower() in str(d.name).lower():
device = d
break
devices_found[chain_type] = {
"device": device_name,
"found": device is not None,
"name": str(device.name) if device else None
}
# Configure parameters if device exists
if device and hasattr(device, 'parameters'):
params_set = 0
for param in device.parameters:
param_name = str(param.name)
for key, value in chain_config['params'].items():
if key in param_name:
try:
param.value = float(value)
params_set += 1
except Exception:
pass
break
devices_found[chain_type]["params_set"] = params_set
return {
"configured": True,
"master_track": "Master",
"devices": devices_found,
"recommendation": "Add EQ Eight, Compressor, and Limiter to master if not present"
}
except Exception as e:
return {
"configured": False,
"error": str(e)
}
# =============================================================================
# MODULE-LEVEL FUNCTIONS (for direct use)
# =============================================================================
def create_bus_track(ableton_conn, bus_name, bus_type='audio'):
"""
Creates a group/bus track.
Args:
ableton_conn: The Ableton Live connection
bus_name: Name for the bus track
bus_type: 'audio' or 'midi'
Returns:
dict: Creation status
"""
arch = BusArchitecture(ableton_conn)
return arch.create_bus_track(bus_name, bus_type)
def create_return_track(ableton_conn, return_name, effect_type=None):
"""
Creates a return track with effect.
Args:
ableton_conn: The Ableton Live connection
return_name: Name for the return track
effect_type: Effect device name to insert
Returns:
dict: Creation status
"""
arch = BusArchitecture(ableton_conn)
return arch.create_return_track(return_name, effect_type)
def route_track_to_bus(ableton_conn, track_index, bus_name):
"""
Routes a track to a bus.
Args:
ableton_conn: The Ableton Live connection
track_index: Index of the source track
bus_name: Name of the bus track
Returns:
dict: Routing status
"""
arch = BusArchitecture(ableton_conn)
return arch.route_track_to_bus(track_index, bus_name)
def set_track_send(ableton_conn, track_index, return_name, amount):
"""
Sets send amount to return track.
Args:
ableton_conn: The Ableton Live connection
track_index: Index of the source track
return_name: Name of the return track
amount: Send amount 0.0-1.0
Returns:
dict: Send configuration status
"""
arch = BusArchitecture(ableton_conn)
return arch.set_track_send(track_index, return_name, amount)
def apply_professional_mix(ableton_conn, track_assignments):
"""
Applies complete professional mix architecture.
This is the main entry point for setting up a professional mix:
1. Creates buses (drums, bass, music, vocal, fx)
2. Creates returns (space, echo, heat, glue)
3. Routes tracks to appropriate buses
4. Sets send levels per role
5. Applies master chain configuration
6. Configures bus gain calibration
Args:
ableton_conn: The Ableton Live connection
track_assignments: List of dicts with 'track_index', 'role', 'bus'
Example: [
{"track_index": 0, "role": "kick", "bus": "drums"},
{"track_index": 1, "role": "bass", "bus": "bass"},
]
Returns:
dict: Complete mix application status
"""
arch = BusArchitecture(ableton_conn)
results = {
"buses_created": [],
"returns_created": [],
"tracks_routed": [],
"sends_configured": [],
"master_configured": False,
"errors": []
}
try:
# 1. Create buses
bus_names = ['drums', 'bass', 'music', 'vocal', 'fx']
for bus_name in bus_names:
bus_result = arch.create_bus_track("BUS %s" % bus_name.capitalize())
if bus_result.get("bus_created"):
results["buses_created"].append(bus_result)
# Configure bus gain
gain_result = arch.configure_bus_gain(bus_name)
if gain_result.get("configured"):
results["buses_created"][-1]["gain_configured"] = True
else:
results["errors"].append("Bus %s: %s" % (bus_name, bus_result.get("error", "Unknown error")))
# 2. Create returns with effects
for return_name, config in RETURN_CONFIG.items():
return_result = arch.create_return_track(
return_name.capitalize(),
effect_type=config['device']
)
if return_result.get("return_created"):
results["returns_created"].append(return_result)
# Configure return effect
effect_result = arch.configure_return_effect(return_name)
if effect_result.get("configured"):
results["returns_created"][-1]["effect_configured"] = True
else:
results["errors"].append("Return %s: %s" % (return_name, return_result.get("error", "Unknown error")))
# 3. Route tracks and apply role mix
for assignment in track_assignments:
track_idx = assignment.get("track_index")
role = assignment.get("role")
bus = assignment.get("bus")
if track_idx is None or role is None:
continue
# Apply role mix (includes sends)
mix_result = arch.apply_role_mix(track_idx, role)
if mix_result.get("applied"):
results["tracks_routed"].append(mix_result)
else:
results["errors"].append("Track %s role %s: %s" % (track_idx, role, mix_result.get("error")))
# Route to bus if specified
if bus:
route_result = arch.route_track_to_bus(track_idx, "BUS %s" % bus.capitalize())
if route_result.get("routed"):
results["tracks_routed"][-1]["bus_routed"] = True
# 4. Configure master chain
master_result = arch.configure_master_chain()
results["master_configured"] = master_result.get("configured", False)
results["master_details"] = master_result
# Summary
results["summary"] = {
"buses": len(results["buses_created"]),
"returns": len(results["returns_created"]),
"tracks_processed": len(results["tracks_routed"]),
"errors": len(results["errors"])
}
return results
except Exception as e:
results["errors"].append("Fatal error: %s" % str(e))
return results
def get_bus_config(bus_name):
"""
Get bus configuration by name.
Args:
bus_name: Name of the bus (e.g., 'drums', 'bass')
Returns:
dict: Bus configuration or None
"""
return BUS_GAIN_CALIBRATION.get(bus_name)
def get_return_config(return_name):
"""
Get return track configuration by name.
Args:
return_name: Name of the return (e.g., 'space', 'echo')
Returns:
dict: Return configuration or None
"""
return RETURN_CONFIG.get(return_name)
def get_role_mix(role):
"""
Get role mix profile.
Args:
role: Role name (e.g., 'kick', 'bass', 'lead')
Returns:
dict: Role mix configuration or None
"""
return ROLE_MIX.get(role)
def get_master_chain():
"""
Get master chain configuration.
Returns:
dict: Master chain configuration
"""
return MASTER_CHAIN
def list_available_buses():
"""List all available bus names."""
return list(BUS_GAIN_CALIBRATION.keys())
def list_available_returns():
"""List all available return names."""
return list(RETURN_CONFIG.keys())
def list_available_roles():
"""List all available role names."""
return list(ROLE_MIX.keys())

View File

@@ -0,0 +1,840 @@
"""
CoherenceScorer - Advanced Coherence Calculation Engine
Calculates multi-dimensional coherence scores between audio samples using
timbre similarity (MFCC), transient compatibility, spectral balance, and
energy consistency.
Professional-grade tool with 0.90 threshold enforcement.
File: AbletonMCP_AI/mcp_server/engines/coherence_scorer.py
"""
import os
import numpy as np
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from pathlib import Path
class CoherenceError(Exception):
"""Raised when coherence score falls below professional threshold."""
def __init__(self, score: float, weak_components: List[str], suggestions: List[str]):
self.score = score
self.weak_components = weak_components
self.suggestions = suggestions
super().__init__(self._format_message())
def _format_message(self) -> str:
msg = f"\n{'='*60}\n"
msg += f"COHERENCE ERROR: Professional threshold not met\n"
msg += f"{'='*60}\n"
msg += f"Current Score: {self.score:.3f} (MIN_COHERENCE: 0.900)\n"
msg += f"Status: {'PASS ✓' if self.score >= 0.90 else 'FAIL ✗'}\n\n"
if self.weak_components:
msg += f"Weak Components ({len(self.weak_components)}):\n"
for comp in self.weak_components:
msg += f"{comp}\n"
if self.suggestions:
msg += f"\nSuggestions for Improvement:\n"
for i, sug in enumerate(self.suggestions, 1):
msg += f" {i}. {sug}\n"
msg += f"{'='*60}\n"
return msg
@dataclass
class AudioFeatures:
"""Container for extracted audio features."""
mfccs: np.ndarray # MFCC coefficients (timbre)
spectral_centroid: float # Brightness
spectral_rolloff: float # Bandwidth
spectral_flux: np.ndarray # Spectral change (transients)
zero_crossing_rate: float # Noisiness
rms_energy: np.ndarray # Loudness envelope
attack_time: float # Transient attack
sustain_level: float # Sustain level
low_energy: float # Low band energy (20-250Hz)
mid_energy: float # Mid band energy (250-2000Hz)
high_energy: float # High band energy (2000-20000Hz)
duration: float # Audio duration in seconds
sample_rate: int # Sample rate
@dataclass
class ScoreBreakdown:
"""Detailed breakdown of coherence score components."""
overall_score: float
timbre_similarity: float # MFCC cosine similarity (40%)
transient_compatibility: float # Attack characteristic match (30%)
spectral_balance: float # Low/mid/high ratio match (20%)
energy_consistency: float # RMS correlation (10%)
is_professional: bool
weak_components: List[str]
suggestions: List[str]
def to_dict(self) -> Dict:
return {
'overall_score': round(self.overall_score, 4),
'timbre_similarity': round(self.timbre_similarity, 4),
'transient_compatibility': round(self.transient_compatibility, 4),
'spectral_balance': round(self.spectral_balance, 4),
'energy_consistency': round(self.energy_consistency, 4),
'is_professional': self.is_professional,
'weak_components': self.weak_components,
'suggestions': self.suggestions
}
class CoherenceScorer:
"""
Professional coherence calculation engine.
Calculates multi-dimensional coherence scores between audio samples
using real audio feature extraction and weighted component analysis.
Weights:
- Timbre similarity (MFCC): 40%
- Transient compatibility: 30%
- Spectral balance: 20%
- Energy consistency: 10%
Professional threshold: 0.90 (MIN_COHERENCE)
"""
# Professional threshold - no compromise
MIN_COHERENCE = 0.90
# Component weights (must sum to 1.0)
WEIGHTS = {
'timbre': 0.40,
'transient': 0.30,
'spectral': 0.20,
'energy': 0.10
}
# Thresholds for component quality
THRESHOLDS = {
'timbre': 0.75,
'transient': 0.70,
'spectral': 0.65,
'energy': 0.60
}
def __init__(self, sample_rate: int = 22050):
"""
Initialize the CoherenceScorer.
Args:
sample_rate: Target sample rate for analysis (default 22050)
"""
self.sample_rate = sample_rate
self.last_breakdown: Optional[ScoreBreakdown] = None
def _load_audio(self, file_path: str) -> Tuple[np.ndarray, int]:
"""
Load audio file using librosa.
Args:
file_path: Path to audio file (.wav, .mp3, etc.)
Returns:
Tuple of (audio_array, sample_rate)
Raises:
FileNotFoundError: If file doesn't exist
ValueError: If file format unsupported or corrupted
"""
try:
import librosa
except ImportError:
raise ImportError(
"librosa is required for audio analysis. "
"Install with: pip install librosa"
)
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"Audio file not found: {file_path}")
if not path.suffix.lower() in ['.wav', '.mp3', '.aif', '.aiff', '.flac']:
raise ValueError(f"Unsupported audio format: {path.suffix}")
try:
y, sr = librosa.load(file_path, sr=self.sample_rate, mono=True)
if len(y) == 0:
raise ValueError(f"Audio file is empty: {file_path}")
return y, sr
except Exception as e:
raise ValueError(f"Failed to load audio file {file_path}: {str(e)}")
def _extract_features(self, audio: np.ndarray, sr: int) -> AudioFeatures:
"""
Extract comprehensive audio features.
Args:
audio: Audio time series
sr: Sample rate
Returns:
AudioFeatures dataclass with all extracted features
"""
import librosa
# Basic spectral features
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sr))
spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sr))
spectral_flux = librosa.onset.onset_strength(y=audio, sr=sr)
zcr = np.mean(librosa.feature.zero_crossing_rate(audio))
rms = librosa.feature.rms(y=audio)[0]
# Band energy analysis
# Low: 20-250Hz, Mid: 250-2000Hz, High: 2000-20000Hz
stft = np.abs(librosa.stft(audio))
freqs = librosa.fft_frequencies(sr=sr)
low_mask = (freqs >= 20) & (freqs <= 250)
mid_mask = (freqs > 250) & (freqs <= 2000)
high_mask = (freqs > 2000) & (freqs <= 20000)
low_energy = np.sum(stft[low_mask, :]) / stft.shape[1]
mid_energy = np.sum(stft[mid_mask, :]) / stft.shape[1]
high_energy = np.sum(stft[high_mask, :]) / stft.shape[1]
# Normalize band energies
total_energy = low_energy + mid_energy + high_energy
if total_energy > 0:
low_energy /= total_energy
mid_energy /= total_energy
high_energy /= total_energy
# Transient analysis (attack detection)
onset_env = librosa.onset.onset_strength(y=audio, sr=sr)
onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr)
if len(onset_frames) > 0:
# Calculate average attack time from first transient
first_onset = onset_frames[0]
window_start = max(0, first_onset - 10)
window_end = min(len(audio), first_onset + 50)
if window_end > window_start:
attack_segment = audio[window_start:window_end]
# Attack time: time from 10% to 90% of peak
peak_idx = np.argmax(np.abs(attack_segment))
peak_val = np.abs(attack_segment[peak_idx])
if peak_val > 0:
# Find 10% and 90% points
ten_percent = 0.1 * peak_val
ninety_percent = 0.9 * peak_val
ten_idx = np.where(np.abs(attack_segment[:peak_idx]) >= ten_percent)[0]
ninety_idx = np.where(np.abs(attack_segment[:peak_idx]) >= ninety_percent)[0]
if len(ten_idx) > 0 and len(ninety_idx) > 0:
attack_time = (ninety_idx[0] - ten_idx[0]) / sr * 1000 # ms
else:
attack_time = 10.0 # Default 10ms
else:
attack_time = 10.0
# Sustain level: average after attack
sustain_start = peak_idx + int(0.01 * sr) # 10ms after peak
if sustain_start < len(attack_segment):
sustain_level = np.mean(np.abs(attack_segment[sustain_start:]))
else:
sustain_level = 0.0
else:
attack_time = 10.0
sustain_level = np.mean(np.abs(audio)) * 0.5
else:
attack_time = 50.0 # Long attack for non-transient sounds
sustain_level = np.mean(np.abs(audio))
return AudioFeatures(
mfccs=mfccs,
spectral_centroid=spectral_centroid,
spectral_rolloff=spectral_rolloff,
spectral_flux=spectral_flux,
zero_crossing_rate=zcr,
rms_energy=rms,
attack_time=attack_time,
sustain_level=float(sustain_level),
low_energy=float(low_energy),
mid_energy=float(mid_energy),
high_energy=float(high_energy),
duration=len(audio) / sr,
sample_rate=sr
)
def _calculate_timbre_similarity(self, feat1: AudioFeatures, feat2: AudioFeatures) -> float:
"""
Calculate timbre similarity using MFCC cosine similarity.
Uses mean MFCC vectors and accounts for temporal evolution.
Args:
feat1: Features from first sample
feat2: Features from second sample
Returns:
Similarity score 0.0-1.0
"""
# Mean MFCC vectors
mfcc1_mean = np.mean(feat1.mfccs, axis=1)
mfcc2_mean = np.mean(feat2.mfccs, axis=1)
# Cosine similarity
dot_product = np.dot(mfcc1_mean, mfcc2_mean)
norm1 = np.linalg.norm(mfcc1_mean)
norm2 = np.linalg.norm(mfcc2_mean)
if norm1 == 0 or norm2 == 0:
return 0.0
cosine_sim = dot_product / (norm1 * norm2)
# Convert from [-1, 1] to [0, 1]
similarity = (cosine_sim + 1) / 2
# Also compare spectral centroid (brightness match)
centroid_diff = abs(feat1.spectral_centroid - feat2.spectral_centroid)
max_centroid = max(feat1.spectral_centroid, feat2.spectral_centroid)
if max_centroid > 0:
centroid_sim = 1 - (centroid_diff / max_centroid)
else:
centroid_sim = 1.0
# Weighted combination: 80% MFCC, 20% centroid
final_similarity = 0.8 * similarity + 0.2 * centroid_sim
return float(np.clip(final_similarity, 0.0, 1.0))
def _calculate_transient_compatibility(self, feat1: AudioFeatures, feat2: AudioFeatures) -> float:
"""
Calculate transient/attack characteristic compatibility.
Compares attack times, sustain levels, and spectral flux patterns.
Args:
feat1: Features from first sample
feat2: Features from second sample
Returns:
Compatibility score 0.0-1.0
"""
# Attack time compatibility
attack_diff = abs(feat1.attack_time - feat2.attack_time)
max_attack = max(feat1.attack_time, feat2.attack_time, 1.0)
attack_compatibility = 1 - (attack_diff / max_attack)
# Sustain level compatibility
max_sustain = max(feat1.sustain_level, feat2.sustain_level, 0.001)
sustain_diff = abs(feat1.sustain_level - feat2.sustain_level)
sustain_compatibility = 1 - (sustain_diff / max_sustain)
# Spectral flux pattern correlation
flux1 = feat1.spectral_flux
flux2 = feat2.spectral_flux
# Normalize lengths
min_len = min(len(flux1), len(flux2))
if min_len > 1:
flux1_norm = flux1[:min_len]
flux2_norm = flux2[:min_len]
# Normalize to unit vectors
flux1_norm = flux1_norm / (np.linalg.norm(flux1_norm) + 1e-10)
flux2_norm = flux2_norm / (np.linalg.norm(flux2_norm) + 1e-10)
flux_corr = np.corrcoef(flux1_norm, flux2_norm)[0, 1]
if np.isnan(flux_corr):
flux_corr = 0.0
else:
flux_corr = 0.5
# Weighted combination
# Attack: 40%, Sustain: 30%, Flux correlation: 30%
compatibility = (
0.4 * attack_compatibility +
0.3 * sustain_compatibility +
0.3 * max(0, flux_corr) # Clip negative correlations
)
return float(np.clip(compatibility, 0.0, 1.0))
def _calculate_spectral_balance(self, feat1: AudioFeatures, feat2: AudioFeatures) -> float:
"""
Calculate spectral balance match (low/mid/high ratio comparison).
Args:
feat1: Features from first sample
feat2: Features from second sample
Returns:
Balance score 0.0-1.0
"""
# Energy band ratios
bands1 = np.array([feat1.low_energy, feat1.mid_energy, feat1.high_energy])
bands2 = np.array([feat2.low_energy, feat2.mid_energy, feat2.high_energy])
# Cosine similarity of band distributions
dot = np.dot(bands1, bands2)
norm1 = np.linalg.norm(bands1)
norm2 = np.linalg.norm(bands2)
if norm1 == 0 or norm2 == 0:
return 0.5
balance_sim = dot / (norm1 * norm2)
# Also compare rolloff (high-frequency content boundary)
rolloff_diff = abs(feat1.spectral_rolloff - feat2.spectral_rolloff)
max_rolloff = max(feat1.spectral_rolloff, feat2.spectral_rolloff, 1.0)
rolloff_sim = 1 - (rolloff_diff / max_rolloff)
# Combined: 70% band balance, 30% rolloff match
final_balance = 0.7 * balance_sim + 0.3 * rolloff_sim
return float(np.clip(final_balance, 0.0, 1.0))
def _calculate_energy_consistency(self, feat1: AudioFeatures, feat2: AudioFeatures) -> float:
"""
Calculate energy envelope consistency.
Compares RMS energy patterns and overall loudness.
Args:
feat1: Features from first sample
feat2: Features from second sample
Returns:
Consistency score 0.0-1.0
"""
rms1 = feat1.rms_energy
rms2 = feat2.rms_energy
# Match lengths
min_len = min(len(rms1), len(rms2))
if min_len < 2:
return 0.5
rms1_norm = rms1[:min_len]
rms2_norm = rms2[:min_len]
# Normalize
max_rms1 = np.max(rms1_norm) + 1e-10
max_rms2 = np.max(rms2_norm) + 1e-10
rms1_norm = rms1_norm / max_rms1
rms2_norm = rms2_norm / max_rms2
# Correlation of energy envelopes
corr = np.corrcoef(rms1_norm, rms2_norm)[0, 1]
if np.isnan(corr):
corr = 0.0
# Mean energy similarity
mean1 = np.mean(feat1.rms_energy)
mean2 = np.mean(feat2.rms_energy)
max_mean = max(mean1, mean2, 0.001)
mean_sim = 1 - (abs(mean1 - mean2) / max_mean)
# Combined: 60% correlation, 40% mean level
consistency = 0.6 * max(0, corr) + 0.4 * mean_sim
return float(np.clip(consistency, 0.0, 1.0))
def score_pair(self, sample1_path: str, sample2_path: str, enforce_threshold: bool = True) -> float:
"""
Calculate coherence score between two samples.
Args:
sample1_path: Path to first audio file
sample2_path: Path to second audio file
enforce_threshold: If True, raises CoherenceError if score < 0.90
Returns:
Overall coherence score (0.0-1.0)
Raises:
CoherenceError: If score < MIN_COHERENCE and enforce_threshold=True
FileNotFoundError: If audio files not found
ValueError: If audio loading fails
"""
# Load and extract features
audio1, sr1 = self._load_audio(sample1_path)
audio2, sr2 = self._load_audio(sample2_path)
feat1 = self._extract_features(audio1, sr1)
feat2 = self._extract_features(audio2, sr2)
# Calculate component scores
timbre_score = self._calculate_timbre_similarity(feat1, feat2)
transient_score = self._calculate_transient_compatibility(feat1, feat2)
spectral_score = self._calculate_spectral_balance(feat1, feat2)
energy_score = self._calculate_energy_consistency(feat1, feat2)
# Calculate weighted overall score
overall_score = (
self.WEIGHTS['timbre'] * timbre_score +
self.WEIGHTS['transient'] * transient_score +
self.WEIGHTS['spectral'] * spectral_score +
self.WEIGHTS['energy'] * energy_score
)
# Identify weak components
weak_components = []
suggestions = []
scores = {
'timbre_similarity': timbre_score,
'transient_compatibility': transient_score,
'spectral_balance': spectral_score,
'energy_consistency': energy_score
}
for component, score in scores.items():
threshold = self.THRESHOLDS.get(component.replace('_similarity', 'timbre')
.replace('_compatibility', 'transient')
.replace('_balance', 'spectral')
.replace('_consistency', 'energy'), 0.6)
if score < threshold:
weak_components.append(f"{component}: {score:.3f} (threshold: {threshold:.2f})")
# Add specific suggestions
if 'timbre' in component:
suggestions.append(
"Consider samples from the same source/pack for timbral consistency. "
"Try layering with a shared reverb bus."
)
elif 'transient' in component:
suggestions.append(
"Adjust transient timing with warp markers or apply transient shaping. "
"Samples have different attack characteristics."
)
elif 'spectral' in component:
suggestions.append(
"Use EQ to match frequency profiles. "
"Check if samples occupy different frequency ranges."
)
elif 'energy' in component:
suggestions.append(
"Adjust clip gain to match perceived loudness. "
"Apply compression for consistent dynamics."
)
# Create breakdown
self.last_breakdown = ScoreBreakdown(
overall_score=overall_score,
timbre_similarity=timbre_score,
transient_compatibility=transient_score,
spectral_balance=spectral_score,
energy_consistency=energy_score,
is_professional=overall_score >= self.MIN_COHERENCE,
weak_components=weak_components,
suggestions=list(set(suggestions)) # Remove duplicates
)
# Enforce professional threshold
if enforce_threshold and overall_score < self.MIN_COHERENCE:
raise CoherenceError(overall_score, weak_components, suggestions)
return overall_score
def score_kit(self, sample_paths: List[str], enforce_threshold: bool = True) -> float:
"""
Calculate overall kit coherence (average of all pairwise scores).
Args:
sample_paths: List of audio file paths
enforce_threshold: If True, raises CoherenceError if score < 0.90
Returns:
Kit coherence score (0.0-1.0)
Raises:
CoherenceError: If score < MIN_COHERENCE and enforce_threshold=True
ValueError: If fewer than 2 samples provided
"""
if len(sample_paths) < 2:
raise ValueError("Need at least 2 samples to calculate kit coherence")
# Calculate all pairwise scores
scores = []
pair_details = []
for i in range(len(sample_paths)):
for j in range(i + 1, len(sample_paths)):
try:
score = self.score_pair(
sample_paths[i],
sample_paths[j],
enforce_threshold=False # Don't raise until we check all
)
scores.append(score)
pair_details.append({
'pair': (Path(sample_paths[i]).name, Path(sample_paths[j]).name),
'score': score
})
except Exception as e:
print(f"Warning: Could not compare {sample_paths[i]} vs {sample_paths[j]}: {e}")
scores.append(0.0)
if not scores:
raise ValueError("No valid pairwise comparisons could be made")
# Average score
kit_score = np.mean(scores)
# Find worst pairs
sorted_pairs = sorted(pair_details, key=lambda x: x['score'])
weak_pairs = [p for p in sorted_pairs if p['score'] < 0.75]
# Build suggestions
suggestions = []
if weak_pairs:
worst = weak_pairs[:3] # Top 3 worst
suggestions.append(
f"{len(weak_pairs)} weak pair(s) detected. "
f"Worst: {worst[0]['pair']} = {worst[0]['score']:.3f}"
)
suggestions.append(
"Consider replacing or processing weak pairs for better cohesion."
)
self.last_breakdown = ScoreBreakdown(
overall_score=kit_score,
timbre_similarity=0.0, # Not meaningful for kit average
transient_compatibility=0.0,
spectral_balance=0.0,
energy_consistency=0.0,
is_professional=kit_score >= self.MIN_COHERENCE,
weak_components=[f"Weak pair: {p['pair']} ({p['score']:.3f})" for p in weak_pairs[:3]],
suggestions=suggestions
)
if enforce_threshold and kit_score < self.MIN_COHERENCE:
raise CoherenceError(kit_score, self.last_breakdown.weak_components, suggestions)
return kit_score
def score_section_transition(self, samples_a: List[str], samples_b: List[str],
enforce_threshold: bool = True) -> float:
"""
Calculate coherence of transition between two sections.
Compares all samples in section A against all samples in section B
to ensure smooth transition.
Args:
samples_a: List of sample paths in first section
samples_b: List of sample paths in second section
enforce_threshold: If True, raises CoherenceError if score < 0.90
Returns:
Transition coherence score (0.0-1.0)
"""
if not samples_a or not samples_b:
raise ValueError("Both sections must contain at least one sample")
# Cross-section comparisons
scores = []
for sample_a in samples_a:
for sample_b in samples_b:
try:
score = self.score_pair(sample_a, sample_b, enforce_threshold=False)
scores.append(score)
except Exception as e:
print(f"Warning: Cross-section comparison failed: {e}")
if not scores:
raise ValueError("No valid cross-section comparisons")
transition_score = np.mean(scores)
# Analyze worst transitions
if scores:
min_score = min(scores)
weak_count = sum(1 for s in scores if s < 0.75)
else:
min_score = 0.0
weak_count = 0
suggestions = []
if min_score < 0.70:
suggestions.append(
f"Poor transition detected (worst pair: {min_score:.3f}). "
"Consider using transition FX or crossfade."
)
if weak_count > len(scores) * 0.3:
suggestions.append(
f"{weak_count}/{len(scores)} transitions are weak. "
"Sections may be harmonically or sonically incompatible."
)
self.last_breakdown = ScoreBreakdown(
overall_score=transition_score,
timbre_similarity=0.0,
transient_compatibility=0.0,
spectral_balance=0.0,
energy_consistency=0.0,
is_professional=transition_score >= self.MIN_COHERENCE,
weak_components=[f"Weak transitions: {weak_count}"] if weak_count > 0 else [],
suggestions=suggestions if suggestions else ["Transition coherence is acceptable"]
)
if enforce_threshold and transition_score < self.MIN_COHERENCE:
raise CoherenceError(transition_score, self.last_breakdown.weak_components, suggestions)
return transition_score
def get_score_breakdown(self) -> Dict:
"""
Get detailed breakdown of the last coherence calculation.
Returns:
Dictionary with component scores and analysis
"""
if self.last_breakdown is None:
return {
'error': 'No coherence calculation performed yet. '
'Call score_pair(), score_kit(), or score_section_transition() first.'
}
return self.last_breakdown.to_dict()
@staticmethod
def is_professional_grade(score: float) -> bool:
"""
Check if a coherence score meets professional standards.
Args:
score: Coherence score to evaluate
Returns:
True if score >= MIN_COHERENCE (0.90)
"""
return score >= CoherenceScorer.MIN_COHERENCE
def batch_score(self, sample_paths: List[str], mode: str = 'pairwise') -> Dict:
"""
Batch coherence analysis for multiple samples.
Args:
sample_paths: List of sample paths to analyze
mode: 'pairwise' for all pairs, 'kit' for overall coherence
Returns:
Dictionary with scores and analysis
"""
if mode == 'pairwise':
results = {
'mode': 'pairwise',
'pairs': [],
'min_score': 1.0,
'max_score': 0.0,
'avg_score': 0.0
}
scores = []
for i in range(len(sample_paths)):
for j in range(i + 1, len(sample_paths)):
try:
score = self.score_pair(
sample_paths[i],
sample_paths[j],
enforce_threshold=False
)
scores.append(score)
results['pairs'].append({
'sample_a': Path(sample_paths[i]).name,
'sample_b': Path(sample_paths[j]).name,
'score': round(score, 4),
'professional': score >= self.MIN_COHERENCE
})
except Exception as e:
results['pairs'].append({
'sample_a': Path(sample_paths[i]).name,
'sample_b': Path(sample_paths[j]).name,
'error': str(e)
})
if scores:
results['min_score'] = round(min(scores), 4)
results['max_score'] = round(max(scores), 4)
results['avg_score'] = round(np.mean(scores), 4)
return results
elif mode == 'kit':
score = self.score_kit(sample_paths, enforce_threshold=False)
return {
'mode': 'kit',
'kit_score': round(score, 4),
'professional': score >= self.MIN_COHERENCE,
'sample_count': len(sample_paths),
'breakdown': self.get_score_breakdown()
}
else:
raise ValueError(f"Unknown mode: {mode}. Use 'pairwise' or 'kit'")
# Convenience functions for quick access
def check_coherence(sample1: str, sample2: str) -> Dict:
"""
Quick coherence check between two samples.
Args:
sample1: Path to first audio file
sample2: Path to second audio file
Returns:
Dictionary with score and breakdown
"""
scorer = CoherenceScorer()
try:
score = scorer.score_pair(sample1, sample2, enforce_threshold=False)
return {
'coherent': score >= CoherenceScorer.MIN_COHERENCE,
'score': round(score, 4),
'details': scorer.get_score_breakdown()
}
except Exception as e:
return {
'coherent': False,
'error': str(e)
}
def check_kit_coherence(sample_paths: List[str]) -> Dict:
"""
Quick kit coherence check.
Args:
sample_paths: List of sample paths
Returns:
Dictionary with kit score and analysis
"""
scorer = CoherenceScorer()
try:
score = scorer.score_kit(sample_paths, enforce_threshold=False)
return {
'coherent': score >= CoherenceScorer.MIN_COHERENCE,
'score': round(score, 4),
'details': scorer.get_score_breakdown()
}
except Exception as e:
return {
'coherent': False,
'error': str(e)
}

View File

@@ -0,0 +1,843 @@
"""
coherence_system.py - Advanced Coherence Scoring System
Implements sophisticated sample coherence tracking and scoring for the
AbletonMCP_AI music production engine. Provides cross-generation memory,
fatigue tracking, section-aware selection, and palette locking.
Author: AbletonMCP_AI
Date: 2026-04-11
Version: 1.0.0
"""
from typing import Dict, List, Tuple, Optional, Any, Set
from dataclasses import dataclass, field
from pathlib import Path
import json
import time
# ============================================================================
# CROSS-GENERATION MEMORY
# ============================================================================
# Global storage for tracking sample usage across song generations
_cross_generation_family_memory: Dict[str, Dict[str, Any]] = {}
_cross_generation_path_memory: Dict[str, Dict[str, Any]] = {}
# Fatigue tracking: path -> usage count
_fatigue_memory: Dict[str, int] = {}
# Palette lock state: role -> locked folder
_palette_locks: Dict[str, str] = {}
# ============================================================================
# SECTION-AWARE CONFIGURATION
# ============================================================================
ROLE_ACTIVITY: Dict[str, Dict[str, int]] = {
'kick': {'intro': 2, 'build': 3, 'drop': 4, 'break': 1, 'outro': 2},
'clap': {'intro': 0, 'build': 2, 'drop': 4, 'break': 1, 'outro': 1},
'snare': {'intro': 1, 'build': 2, 'drop': 3, 'break': 0, 'outro': 1},
'hat': {'intro': 1, 'build': 3, 'drop': 4, 'break': 2, 'outro': 1},
'bass': {'intro': 0, 'build': 2, 'drop': 4, 'break': 1, 'outro': 1},
'lead': {'intro': 0, 'build': 1, 'drop': 4, 'break': 0, 'outro': 0},
'pad': {'intro': 3, 'build': 2, 'drop': 1, 'break': 3, 'outro': 2},
'fx': {'intro': 1, 'build': 4, 'drop': 2, 'break': 2, 'outro': 1},
'perc': {'intro': 1, 'build': 2, 'drop': 4, 'break': 1, 'outro': 2},
}
SECTION_DENSITY_PROFILES: Dict[str, Dict[str, Any]] = {
'intro': {'density': 0.3, 'complexity': 'low', 'energy_target': 0.25},
'build': {'density': 0.7, 'complexity': 'high', 'energy_target': 0.72},
'drop': {'density': 1.0, 'complexity': 'high', 'energy_target': 1.0},
'break': {'density': 0.4, 'complexity': 'low', 'energy_target': 0.38},
'outro': {'density': 0.35, 'complexity': 'low', 'energy_target': 0.32},
'verse': {'density': 0.5, 'complexity': 'medium', 'energy_target': 0.5},
'chorus': {'density': 0.9, 'complexity': 'high', 'energy_target': 0.85},
'bridge': {'density': 0.6, 'complexity': 'medium', 'energy_target': 0.65},
}
# Family compatibility matrix (0.0 - 1.0)
FAMILY_COMPATIBILITY: Dict[str, Dict[str, float]] = {
'kick': {'kick': 1.0, 'snare': 0.95, 'clap': 0.9, 'perc': 0.85, 'hat': 0.7, 'bass': 0.8, 'lead': 0.4, 'pad': 0.3, 'fx': 0.5},
'snare': {'kick': 0.95, 'snare': 1.0, 'clap': 0.98, 'perc': 0.9, 'hat': 0.85, 'bass': 0.75, 'lead': 0.4, 'pad': 0.3, 'fx': 0.5},
'clap': {'kick': 0.9, 'snare': 0.98, 'clap': 1.0, 'perc': 0.85, 'hat': 0.8, 'bass': 0.75, 'lead': 0.4, 'pad': 0.3, 'fx': 0.55},
'hat': {'kick': 0.7, 'snare': 0.85, 'clap': 0.8, 'perc': 0.8, 'hat': 1.0, 'bass': 0.65, 'lead': 0.45, 'pad': 0.4, 'fx': 0.5},
'perc': {'kick': 0.85, 'snare': 0.9, 'clap': 0.85, 'perc': 1.0, 'hat': 0.8, 'bass': 0.7, 'lead': 0.4, 'pad': 0.35, 'fx': 0.6},
'bass': {'kick': 0.8, 'snare': 0.75, 'clap': 0.75, 'perc': 0.7, 'hat': 0.65, 'bass': 1.0, 'lead': 0.85, 'pad': 0.9, 'fx': 0.6},
'lead': {'kick': 0.4, 'snare': 0.4, 'clap': 0.4, 'perc': 0.4, 'hat': 0.45, 'bass': 0.85, 'lead': 1.0, 'pad': 0.95, 'fx': 0.7},
'pad': {'kick': 0.3, 'snare': 0.3, 'clap': 0.3, 'perc': 0.35, 'hat': 0.4, 'bass': 0.9, 'lead': 0.95, 'pad': 1.0, 'fx': 0.6},
'fx': {'kick': 0.5, 'snare': 0.5, 'clap': 0.55, 'perc': 0.6, 'hat': 0.5, 'bass': 0.6, 'lead': 0.7, 'pad': 0.6, 'fx': 1.0},
}
# ============================================================================
# JOINT SCORING SYSTEM
# ============================================================================
def calculate_joint_score(
candidate_sample: Dict[str, Any],
role: str,
current_selections: Dict[str, Dict[str, Any]]
) -> float:
"""
Calculates coherence between candidate and already-selected samples.
Returns a score in the range 1.0-1.3+ based on:
- Same folder/pack bonus (1.2x-1.4x)
- Family compatibility (1.1x-1.3x)
- Duration matching
Args:
candidate_sample: Dict with sample metadata including 'path', 'folder', 'pack',
'family', 'duration', etc.
role: The role this sample would fill (kick, snare, bass, etc.)
current_selections: Dict of already-selected samples by role
Returns:
Float score where:
- 1.0 = neutral (no coherence bonus)
- 1.2-1.4x = folder/pack matching
- 1.1-1.3x = family compatibility
- Combined score can exceed 1.3 for highly coherent selections
Example:
>>> candidate = {'path': '/kick/808.wav', 'folder': 'kick', 'pack': 'trap_kit',
... 'family': 'drums', 'duration': 0.5}
>>> current = {'snare': {'folder': 'kick', 'pack': 'trap_kit', 'family': 'drums',
... 'duration': 0.5}}
>>> calculate_joint_score(candidate, 'kick', current)
1.35 # High coherence from folder, pack, and family match
"""
if not current_selections:
return 1.0
candidate_path = str(candidate_sample.get('path', ''))
candidate_folder = candidate_sample.get('folder', '')
candidate_pack = candidate_sample.get('pack', '')
candidate_family = candidate_sample.get('family', 'unknown')
candidate_duration = candidate_sample.get('duration', 1.0)
scores = []
compatibilities = []
for selected_role, selected_sample in current_selections.items():
selected_path = str(selected_sample.get('path', ''))
selected_folder = selected_sample.get('folder', '')
selected_pack = selected_sample.get('pack', '')
selected_family = selected_sample.get('family', 'unknown')
selected_duration = selected_sample.get('duration', 1.0)
# Same folder bonus (1.2x-1.4x)
if candidate_folder and candidate_folder == selected_folder:
scores.append(1.3)
# Same pack bonus (1.2x-1.4x) - slightly higher than folder
if candidate_pack and candidate_pack == selected_pack:
scores.append(1.35)
# Family compatibility (1.1x-1.3x based on matrix)
family_score = _get_family_compatibility(candidate_family, selected_family)
if family_score > 0.8:
compatibilities.append(family_score)
# Duration matching (0.95x-1.15x)
duration_score = _calculate_duration_match(candidate_duration, selected_duration)
if duration_score > 1.0:
scores.append(duration_score)
# Combine scores multiplicatively for high coherence
base_score = 1.0
if scores:
# Use the top 2 scores to calculate bonus
top_scores = sorted(scores, reverse=True)[:2]
for s in top_scores:
base_score *= min(s, 1.15) # Cap individual multipliers at 1.15x
if compatibilities:
avg_compat = sum(compatibilities) / len(compatibilities)
base_score *= (0.9 + (avg_compat * 0.4)) # Scale 1.0-1.3x range
# Cap at reasonable maximum
return min(round(base_score, 3), 1.5)
def _get_family_compatibility(family1: str, family2: str) -> float:
"""
Get compatibility score between two families from the compatibility matrix.
Args:
family1: First family name
family2: Second family name
Returns:
Compatibility score 0.0-1.0
"""
if family1 in FAMILY_COMPATIBILITY:
return FAMILY_COMPATIBILITY[family1].get(family2, 0.5)
if family2 in FAMILY_COMPATIBILITY:
return FAMILY_COMPATIBILITY[family2].get(family1, 0.5)
return 0.5
def _calculate_duration_match(duration1: float, duration2: float) -> float:
"""
Calculate duration matching score between two samples.
Args:
duration1: First sample duration in seconds
duration2: Second sample duration in seconds
Returns:
Match score 0.95x-1.15x
"""
if duration1 <= 0 or duration2 <= 0:
return 1.0
ratio = min(duration1, duration2) / max(duration1, duration2)
# Scale ratio to 0.95-1.15 range
if ratio > 0.9:
return 1.15
elif ratio > 0.7:
return 1.05
elif ratio > 0.5:
return 1.0
else:
return 0.95
# ============================================================================
# CROSS-GENERATION MEMORY
# ============================================================================
def update_cross_generation_memory(
selections: Dict[str, Dict[str, Any]],
sample_paths: List[str]
) -> None:
"""
Tracks sample usage across song generations.
Updates both family memory and path memory with timestamp and
usage count information.
Args:
selections: Dict of selected samples by role
sample_paths: List of all sample paths used in generation
Example:
>>> selections = {'kick': {'family': 'drums', 'path': '/kick.wav'}}
>>> update_cross_generation_memory(selections, ['/kick.wav', '/snare.wav'])
"""
timestamp = time.time()
# Update family memory
for role, sample in selections.items():
family = sample.get('family', 'unknown')
path = str(sample.get('path', ''))
if family not in _cross_generation_family_memory:
_cross_generation_family_memory[family] = {
'count': 0,
'last_used': 0,
'roles': set(),
'paths': set()
}
memory = _cross_generation_family_memory[family]
memory['count'] += 1
memory['last_used'] = timestamp
memory['roles'].add(role)
if path:
memory['paths'].add(path)
# Update path memory
for path in sample_paths:
path_str = str(path)
if path_str not in _cross_generation_path_memory:
_cross_generation_path_memory[path_str] = {
'count': 0,
'last_used': 0,
'generations': []
}
path_memory = _cross_generation_path_memory[path_str]
path_memory['count'] += 1
path_memory['last_used'] = timestamp
path_memory['generations'].append(timestamp)
# Also update fatigue memory
for path in sample_paths:
path_str = str(path)
_fatigue_memory[path_str] = _fatigue_memory.get(path_str, 0) + 1
def get_cross_generation_penalty(sample_path: str, role: str) -> float:
"""
Returns penalty factor 0.5-1.0 based on usage history.
Samples used in recent generations receive higher penalties.
Args:
sample_path: Path to the sample file
role: The role being filled
Returns:
Penalty factor where:
- 1.0 = no penalty (never used)
- 0.5 = maximum penalty (very recently used)
Example:
>>> get_cross_generation_penalty('/kick.wav', 'kick')
0.75 # Moderate penalty
"""
path_str = str(sample_path)
if path_str not in _cross_generation_path_memory:
return 1.0
memory = _cross_generation_path_memory[path_str]
count = memory.get('count', 0)
last_used = memory.get('last_used', 0)
# Calculate recency factor (decays over time)
time_since_use = time.time() - last_used
hours_since_use = time_since_use / 3600
# Recency decay: 1.0 at 0 hours, 0.5 at 24+ hours
recency_factor = max(0.5, 1.0 - (hours_since_use / 48))
# Count factor: more uses = more penalty
# 1 use = 0.95, 5 uses = 0.65, 10+ uses = 0.5
if count == 1:
count_factor = 0.95
elif count <= 5:
count_factor = 0.95 - ((count - 1) * 0.075)
else:
count_factor = 0.5
# Combine factors
penalty = (recency_factor * 0.4) + (count_factor * 0.6)
return round(max(0.5, min(1.0, penalty)), 3)
def get_cross_generation_memory_stats() -> Dict[str, Any]:
"""
Get statistics about cross-generation memory.
Returns:
Dict with family memory and path memory statistics
"""
return {
'family_memory_count': len(_cross_generation_family_memory),
'path_memory_count': len(_cross_generation_path_memory),
'fatigue_memory_count': len(_fatigue_memory),
'top_used_families': sorted(
_cross_generation_family_memory.items(),
key=lambda x: x[1]['count'],
reverse=True
)[:5],
'top_used_paths': sorted(
_cross_generation_path_memory.items(),
key=lambda x: x[1]['count'],
reverse=True
)[:5]
}
# ============================================================================
# FATIGUE TRACKING
# ============================================================================
def get_persistent_fatigue(sample_path: str, role: str) -> float:
"""
Returns fatigue factor 0.5-1.0 based on usage count.
Fatigue represents how "worn out" a sample is from overuse:
- 5 uses = 50% fatigue (0.5 factor)
- 0 uses = 100% fresh (1.0 factor)
Args:
sample_path: Path to the sample file
role: The role being filled (for role-specific fatigue tracking)
Returns:
Fatigue factor 0.5-1.0 where higher is better (less fatigued)
Example:
>>> get_persistent_fatigue('/kick.wav', 'kick')
0.6 # 40% fatigued from previous uses
"""
path_str = str(sample_path)
# Get usage count
usage_count = _fatigue_memory.get(path_str, 0)
# Calculate fatigue factor
if usage_count == 0:
return 1.0
elif usage_count == 1:
return 0.9
elif usage_count == 2:
return 0.8
elif usage_count == 3:
return 0.7
elif usage_count == 4:
return 0.6
else: # 5+ uses
return 0.5
def reset_fatigue_for_path(sample_path: str) -> None:
"""
Reset fatigue for a specific sample path.
Args:
sample_path: Path to reset fatigue for
"""
path_str = str(sample_path)
if path_str in _fatigue_memory:
del _fatigue_memory[path_str]
def reset_all_fatigue() -> None:
"""Reset all fatigue tracking memory."""
global _fatigue_memory
_fatigue_memory = {}
def get_fatigue_report() -> Dict[str, Any]:
"""
Get a report of current fatigue levels.
Returns:
Dict with fatigue statistics by usage level
"""
fatigue_levels = {
'fresh': [], # 0 uses, 1.0
'slight': [], # 1 use, 0.9
'moderate': [], # 2 uses, 0.8
'significant': [], # 3 uses, 0.7
'high': [], # 4 uses, 0.6
'exhausted': [] # 5+ uses, 0.5
}
for path, count in _fatigue_memory.items():
if count == 0:
fatigue_levels['fresh'].append(path)
elif count == 1:
fatigue_levels['slight'].append(path)
elif count == 2:
fatigue_levels['moderate'].append(path)
elif count == 3:
fatigue_levels['significant'].append(path)
elif count == 4:
fatigue_levels['high'].append(path)
else:
fatigue_levels['exhausted'].append(path)
return {
'total_tracked': len(_fatigue_memory),
'fresh_count': len(fatigue_levels['fresh']),
'slight_count': len(fatigue_levels['slight']),
'moderate_count': len(fatigue_levels['moderate']),
'significant_count': len(fatigue_levels['significant']),
'high_count': len(fatigue_levels['high']),
'exhausted_count': len(fatigue_levels['exhausted']),
'by_level': fatigue_levels
}
# ============================================================================
# SECTION-AWARE SELECTION
# ============================================================================
def get_section_role_bonus(role: str, section_type: str) -> float:
"""
Returns bonus/penalty based on role appropriateness for section.
Uses ROLE_ACTIVITY table to determine how suitable a role is for
a given section type.
Args:
role: The sample role (kick, snare, bass, lead, etc.)
section_type: The section type (intro, build, drop, break, outro, verse, chorus, bridge)
Returns:
Bonus factor 0.5-1.5 where:
- 1.5 = highly appropriate (strong bonus)
- 1.0 = neutral
- 0.5 = inappropriate (penalty)
Example:
>>> get_section_role_bonus('kick', 'drop')
1.4 # Kick highly appropriate in drop
>>> get_section_role_bonus('lead', 'intro')
0.5 # Lead not appropriate in intro
"""
# Normalize inputs
role = role.lower()
section_type = section_type.lower()
# Check if role exists in activity table
if role not in ROLE_ACTIVITY:
return 1.0
# Check if section exists for this role
if section_type not in ROLE_ACTIVITY[role]:
return 1.0
# Get activity level (0-4 scale)
activity_level = ROLE_ACTIVITY[role][section_type]
# Convert to bonus factor
# 0 = 0.5 (penalty), 1 = 0.75, 2 = 1.0, 3 = 1.25, 4 = 1.5
bonus_map = {0: 0.5, 1: 0.75, 2: 1.0, 3: 1.25, 4: 1.5}
return bonus_map.get(activity_level, 1.0)
def get_section_density_profile(section_type: str) -> Dict[str, Any]:
"""
Get the density profile for a section type.
Args:
section_type: The section type (intro, build, drop, etc.)
Returns:
Dict with density, complexity, and energy_target
Example:
>>> get_section_density_profile('drop')
{'density': 1.0, 'complexity': 'high', 'energy_target': 1.0}
"""
section_type = section_type.lower()
if section_type not in SECTION_DENSITY_PROFILES:
return {'density': 0.5, 'complexity': 'medium', 'energy_target': 0.5}
return SECTION_DENSITY_PROFILES[section_type].copy()
def calculate_section_appropriateness(
sample_features: Dict[str, Any],
role: str,
section_type: str
) -> float:
"""
Calculate how appropriate a sample is for a specific section.
Considers role activity, energy characteristics, and density.
Args:
sample_features: Dict with sample characteristics (energy, density, etc.)
role: The sample role
section_type: The target section type
Returns:
Appropriateness score 0.0-1.5
"""
# Get base role bonus
role_bonus = get_section_role_bonus(role, section_type)
# Get section profile
section_profile = get_section_density_profile(section_type)
# Compare sample features to section needs
sample_energy = sample_features.get('energy', 0.5)
section_energy_target = section_profile['energy_target']
# Energy matching (closer = better)
energy_diff = abs(sample_energy - section_energy_target)
energy_match = max(0.5, 1.0 - (energy_diff * 2))
# Combine scores
final_score = role_bonus * energy_match
return round(min(final_score, 1.5), 3)
def get_section_role_recommendations(section_type: str) -> List[Tuple[str, float]]:
"""
Get a ranked list of recommended roles for a section.
Args:
section_type: The section type
Returns:
List of (role, bonus) tuples sorted by bonus descending
"""
section_type = section_type.lower()
recommendations = []
for role, sections in ROLE_ACTIVITY.items():
if section_type in sections:
bonus = get_section_role_bonus(role, section_type)
recommendations.append((role, bonus))
return sorted(recommendations, key=lambda x: x[1], reverse=True)
# ============================================================================
# PALETTE LOCK SYSTEM
# ============================================================================
def set_palette_lock(folders_by_role: Dict[str, str]) -> None:
"""
Locks selection to specific folders for coherence.
Once locked, sample selection will be biased towards samples
from the locked folder for each role.
Args:
folders_by_role: Dict mapping role -> folder path to lock to
Example:
>>> set_palette_lock({
... 'kick': 'reggaeton/kick',
... 'snare': 'reggaeton/snare',
... 'bass': 'reggaeton/bass'
... })
"""
global _palette_locks
_palette_locks.update(folders_by_role)
def clear_palette_lock(role: Optional[str] = None) -> None:
"""
Clear palette lock for a specific role or all roles.
Args:
role: Role to clear lock for, or None to clear all
"""
global _palette_locks
if role is None:
_palette_locks = {}
elif role in _palette_locks:
del _palette_locks[role]
def get_palette_locks() -> Dict[str, str]:
"""
Get currently active palette locks.
Returns:
Dict of role -> locked folder
"""
return _palette_locks.copy()
def calculate_palette_bonus(sample_path: str, locked_folder: str) -> float:
"""
Returns bonus based on palette lock matching.
Bonus structure:
- Exact folder match: 1.4x
- Sibling folder (same parent): 1.2x
- Different: 0.9x (penalty)
Args:
sample_path: Path to the candidate sample
locked_folder: The locked folder path to compare against
Returns:
Bonus factor 0.9-1.4
Example:
>>> calculate_palette_bonus('/kick/808.wav', 'kick')
1.4 # Exact match
>>> calculate_palette_bonus('/snare/clap.wav', 'drums')
1.2 # Sibling (both in drums)
"""
if not sample_path or not locked_folder:
return 1.0
path_str = str(sample_path).lower()
folder_str = str(locked_folder).lower()
# Normalize paths
path_parts = path_str.replace('\\', '/').split('/')
folder_parts = folder_str.replace('\\', '/').split('/')
# Check for exact match
if folder_str in path_str:
return 1.4
# Check for sibling (same parent)
if len(path_parts) >= 2 and len(folder_parts) >= 1:
sample_parent = path_parts[-2] if len(path_parts) > 1 else ''
locked_parent = folder_parts[-2] if len(folder_parts) > 1 else folder_parts[0]
if sample_parent and sample_parent == locked_parent:
return 1.2
# No match - apply slight penalty
return 0.9
def is_sample_in_palette(sample_path: str, role: str) -> bool:
"""
Check if a sample matches the palette lock for a role.
Args:
sample_path: Path to the sample
role: The role to check palette lock for
Returns:
True if sample matches palette (or no lock exists)
"""
if role not in _palette_locks:
return True
locked_folder = _palette_locks[role]
bonus = calculate_palette_bonus(sample_path, locked_folder)
# Consider it "in palette" if bonus >= 1.2 (exact or sibling match)
return bonus >= 1.2
def get_palette_coherence_score(
selections: Dict[str, Dict[str, Any]]
) -> float:
"""
Calculate overall coherence score for a set of selections based on palette locks.
Args:
selections: Dict of selected samples by role
Returns:
Average coherence score across all selections
"""
if not selections or not _palette_locks:
return 1.0
scores = []
for role, sample in selections.items():
if role in _palette_locks:
path = str(sample.get('path', ''))
locked_folder = _palette_locks[role]
bonus = calculate_palette_bonus(path, locked_folder)
scores.append(bonus)
if not scores:
return 1.0
return round(sum(scores) / len(scores), 3)
# ============================================================================
# COMPREHENSIVE COHERENCE CALCULATION
# ============================================================================
def calculate_comprehensive_coherence(
candidate_sample: Dict[str, Any],
role: str,
current_selections: Dict[str, Dict[str, Any]],
section_type: Optional[str] = None
) -> Dict[str, Any]:
"""
Calculate comprehensive coherence score with all factors.
Combines joint scoring, section awareness, palette locking,
fatigue, and cross-generation penalties.
Args:
candidate_sample: Sample to evaluate
role: Role for this sample
current_selections: Already-selected samples
section_type: Optional section type for section-aware scoring
Returns:
Dict with individual scores and final composite
Example:
>>> result = calculate_comprehensive_coherence(
... candidate, 'kick', current, 'drop'
... )
>>> result['final_score']
1.25
"""
sample_path = str(candidate_sample.get('path', ''))
# Calculate individual scores
joint_score = calculate_joint_score(candidate_sample, role, current_selections)
section_score = 1.0
if section_type:
section_score = get_section_role_bonus(role, section_type)
palette_score = 1.0
if role in _palette_locks:
palette_score = calculate_palette_bonus(sample_path, _palette_locks[role])
fatigue_factor = get_persistent_fatigue(sample_path, role)
generation_penalty = get_cross_generation_penalty(sample_path, role)
# Calculate composite score
# Joint and section are multiplicative bonuses
# Fatigue and generation are penalties applied at the end
base_score = joint_score * section_score * palette_score
# Apply penalties
final_score = base_score * fatigue_factor * generation_penalty
# Normalize to 0-1.5 range
final_score = min(1.5, max(0.0, final_score))
return {
'joint_score': joint_score,
'section_score': section_score,
'palette_score': palette_score,
'fatigue_factor': fatigue_factor,
'generation_penalty': generation_penalty,
'base_score': round(base_score, 3),
'final_score': round(final_score, 3),
'role': role,
'section_type': section_type,
'sample_path': sample_path
}
def reset_all_memory() -> None:
"""Reset all coherence system memory (for testing)."""
global _cross_generation_family_memory, _cross_generation_path_memory
global _fatigue_memory, _palette_locks
_cross_generation_family_memory = {}
_cross_generation_path_memory = {}
_fatigue_memory = {}
_palette_locks = {}
# Export all public functions
__all__ = [
'calculate_joint_score',
'update_cross_generation_memory',
'get_cross_generation_penalty',
'get_cross_generation_memory_stats',
'get_persistent_fatigue',
'reset_fatigue_for_path',
'reset_all_fatigue',
'get_fatigue_report',
'get_section_role_bonus',
'get_section_density_profile',
'calculate_section_appropriateness',
'get_section_role_recommendations',
'set_palette_lock',
'clear_palette_lock',
'get_palette_locks',
'calculate_palette_bonus',
'is_sample_in_palette',
'get_palette_coherence_score',
'calculate_comprehensive_coherence',
'reset_all_memory',
'ROLE_ACTIVITY',
'SECTION_DENSITY_PROFILES',
'FAMILY_COMPATIBILITY',
]

View File

@@ -0,0 +1,635 @@
"""
Embedding Engine - Vector embeddings for audio samples
Crea embeddings vectoriales normalizados para samples usando features espectrales.
"""
import json
import os
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import numpy as np
# Intentar importar libreria_analyzer para integración
# Si no existe, funcionar independientemente
try:
from .libreria_analyzer import LibreriaAnalyzer, NOTE_TO_NUMBER
HAS_ANALYZER = True
except ImportError:
HAS_ANALYZER = False
NOTE_TO_NUMBER = {
'C': 0, 'C#': 1, 'Db': 1, 'D': 2, 'D#': 3, 'Eb': 3,
'E': 4, 'F': 5, 'F#': 6, 'Gb': 6, 'G': 7, 'G#': 8,
'Ab': 8, 'A': 9, 'A#': 10, 'Bb': 10, 'B': 11
}
class EmbeddingEngine:
"""
Motor de embeddings vectoriales para samples de audio.
Crea vectores de ~20 dimensiones combinando:
- BPM (normalizado)
- Key (convertido a número 0-11)
- RMS
- Spectral Centroid
- Spectral Rolloff
- Zero Crossing Rate
- MFCCs (13 coeficientes)
- Onset Strength
- Duration
Todos los embeddings son normalizados usando min-max scaling.
"""
EMBEDDING_DIM = 20 # 1 BPM + 1 Key + 1 RMS + 1 SC + 1 SR + 1 ZCR + 13 MFCCs + 1 OS + 1 Duration
EMBEDDINGS_FILE = Path("C:/ProgramData/Ableton/Live 12 Suite/Resources/MIDI Remote Scripts/libreria/reggaeton/.embeddings_index.json")
FEATURES_CACHE = Path("C:/ProgramData/Ableton/Live 12 Suite/Resources/MIDI Remote Scripts/libreria/reggaeton/.features_cache.json")
def __init__(self, features_data: Optional[Dict] = None):
"""
Inicializa el motor de embeddings.
Args:
features_data: Datos de features precargados (opcional)
"""
self.embeddings: Dict[str, np.ndarray] = {}
self.normalized_embeddings: Dict[str, np.ndarray] = {}
self.min_values: Optional[np.ndarray] = None
self.max_values: Optional[np.ndarray] = None
self.features_data = features_data or {}
# Cargar embeddings existentes si hay
self._load_embeddings()
def _key_to_number(self, key: str) -> float:
"""
Convierte una key musical (ej: 'C#m', 'F', 'Ab') a número 0-11.
Args:
key: Key en formato string (puede incluir 'm' para menor)
Returns:
float: Número de la key (0-11) o 0 si no se reconoce
"""
if not key or key == "":
return 0.0
# Limpiar (quitar espacios, 'm' de menor, números)
key_clean = key.strip().upper()
key_clean = key_clean.replace('M', '').replace('MINOR', '').replace('MAJOR', '')
key_clean = ''.join([c for c in key_clean if c.isalpha() or c == '#'])
# Extraer nota base (1-2 caracteres)
if len(key_clean) >= 2 and key_clean[1] in ['#', 'B']:
note = key_clean[:2]
else:
note = key_clean[:1] if key_clean else 'C'
return float(NOTE_TO_NUMBER.get(note, 0))
def _bpm_to_normalized(self, bpm: float) -> float:
"""
Normaliza BPM a rango 0-1 (asumiendo rango típico 60-200).
Args:
bpm: BPM del sample
Returns:
float: BPM normalizado (0-1)
"""
if bpm <= 0:
return 0.5 # Valor neutral si no hay BPM
# Rango típico de música electrónica: 60-200 BPM
min_bpm, max_bpm = 60.0, 200.0
normalized = (bpm - min_bpm) / (max_bpm - min_bpm)
return np.clip(normalized, 0.0, 1.0)
def create_embedding(self, features: Dict) -> np.ndarray:
"""
Crea un vector de embedding de ~20 dimensiones a partir de features.
Args:
features: Diccionario con features del sample
Returns:
np.ndarray: Vector de embedding (20 dimensiones)
"""
embedding = np.zeros(self.EMBEDDING_DIM, dtype=np.float32)
# 1. BPM normalizado (índice 0)
bpm = features.get('bpm', 0)
embedding[0] = self._bpm_to_normalized(bpm)
# 2. Key convertida a número (índice 1)
key = features.get('key', '')
embedding[1] = self._key_to_number(key) / 11.0 # Normalizar 0-1
# 3. RMS (índice 2) - ya viene en dB, normalizar -60 a 0 dB
rms = features.get('rms', -30)
embedding[2] = np.clip((rms - (-60)) / 60.0, 0.0, 1.0)
# 4. Spectral Centroid (índice 3) - normalizar 0-10000 Hz
sc = features.get('spectral_centroid', 2000)
embedding[3] = np.clip(sc / 10000.0, 0.0, 1.0)
# 5. Spectral Rolloff (índice 4) - normalizar 0-20000 Hz
sr = features.get('spectral_rolloff', 8000)
embedding[4] = np.clip(sr / 20000.0, 0.0, 1.0)
# 6. Zero Crossing Rate (índice 5) - ya está en 0-1
zcr = features.get('zero_crossing_rate', 0.1)
embedding[5] = np.clip(zcr, 0.0, 1.0)
# 7-19. MFCCs (13 coeficientes) - índices 6-18
mfccs = features.get('mfccs', [0] * 13)
if len(mfccs) < 13:
mfccs = list(mfccs) + [0] * (13 - len(mfccs))
# Los MFCCs típicamente están en rango -100 a 100, normalizar
for i in range(13):
embedding[6 + i] = np.clip((mfccs[i] + 100) / 200.0, 0.0, 1.0)
# 20. Onset Strength (índice 19) - ya está en 0-1 típicamente
onset = features.get('onset_strength', 0.5)
embedding[19] = np.clip(onset, 0.0, 1.0)
# 21. Duration (índice 20, pero no hay espacio... incluir en índice 0?)
# Reemplazar: usar índice 0 como duración normalizada en lugar de BPM
# o expandir dimensión... vamos a usar índice 0 como duración
# y mover BPM al final si hay espacio
# Ajuste: usar los primeros valores de forma diferente
# Recalcular con ajuste:
# 0: Duration, 1: BPM, 2: Key, 3: RMS, 4: SC, 5: SR, 6: ZCR, 7-19: MFCCs
duration = features.get('duration', 1.0)
embedding = np.zeros(self.EMBEDDING_DIM, dtype=np.float32)
embedding[0] = np.clip(duration / 10.0, 0.0, 1.0) # Normalizar 0-10 segundos
embedding[1] = self._bpm_to_normalized(bpm)
embedding[2] = self._key_to_number(key) / 11.0
embedding[3] = np.clip((rms - (-60)) / 60.0, 0.0, 1.0)
embedding[4] = np.clip(sc / 10000.0, 0.0, 1.0)
embedding[5] = np.clip(sr / 20000.0, 0.0, 1.0)
embedding[6] = np.clip(zcr, 0.0, 1.0)
# MFCCs en índices 7-19 (13 coeficientes)
for i in range(13):
if i < len(mfccs):
embedding[7 + i] = np.clip((mfccs[i] + 100) / 200.0, 0.0, 1.0)
else:
embedding[7 + i] = 0.5
return embedding
def normalize_embeddings(self) -> None:
"""
Normaliza todos los embeddings usando min-max scaling.
Cada dimensión se escala independientemente al rango [0, 1].
"""
if not self.embeddings:
return
# Convertir a matriz numpy
paths = list(self.embeddings.keys())
matrix = np.array([self.embeddings[p] for p in paths], dtype=np.float32)
# Calcular min y max por dimensión
self.min_values = matrix.min(axis=0)
self.max_values = matrix.max(axis=0)
# Evitar división por cero
ranges = self.max_values - self.min_values
ranges[ranges == 0] = 1.0
# Normalizar
normalized_matrix = (matrix - self.min_values) / ranges
# Guardar embeddings normalizados
self.normalized_embeddings = {
path: normalized_matrix[i]
for i, path in enumerate(paths)
}
def build_from_features(self, features_data: Optional[Dict] = None) -> None:
"""
Construye embeddings a partir de datos de features.
Args:
features_data: Diccionario con features de samples
"""
if features_data is None:
features_data = self.features_data
if not features_data or 'samples' not in features_data:
# Intentar cargar desde archivo
if self.FEATURES_CACHE.exists():
with open(self.FEATURES_CACHE, 'r') as f:
features_data = json.load(f)
if not features_data or 'samples' not in features_data:
print("[EmbeddingEngine] No features data available")
return
samples = features_data.get('samples', {})
print(f"[EmbeddingEngine] Building embeddings for {len(samples)} samples...")
self.embeddings = {}
for path, features in samples.items():
try:
embedding = self.create_embedding(features)
self.embeddings[path] = embedding
except Exception as e:
print(f"[EmbeddingEngine] Error creating embedding for {path}: {e}")
# Normalizar
self.normalize_embeddings()
print(f"[EmbeddingEngine] Created {len(self.embeddings)} embeddings")
def save_embeddings(self) -> None:
"""
Guarda los embeddings normalizados en archivo JSON.
"""
if not self.normalized_embeddings:
print("[EmbeddingEngine] No embeddings to save")
return
# Serializar embeddings como listas
data = {
'version': '1.0',
'dimensions': self.EMBEDDING_DIM,
'total_samples': len(self.normalized_embeddings),
'created_at': str(np.datetime64('now')),
'min_values': self.min_values.tolist() if self.min_values is not None else None,
'max_values': self.max_values.tolist() if self.max_values is not None else None,
'embeddings': {
path: embedding.tolist()
for path, embedding in self.normalized_embeddings.items()
}
}
# Asegurar que existe el directorio
self.EMBEDDINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(self.EMBEDDINGS_FILE, 'w') as f:
json.dump(data, f, indent=2)
print(f"[EmbeddingEngine] Saved {len(self.normalized_embeddings)} embeddings to {self.EMBEDDINGS_FILE}")
def _load_embeddings(self) -> bool:
"""
Carga embeddings desde archivo si existe.
Returns:
bool: True si se cargaron exitosamente
"""
if not self.EMBEDDINGS_FILE.exists():
return False
try:
with open(self.EMBEDDINGS_FILE, 'r') as f:
data = json.load(f)
self.EMBEDDING_DIM = data.get('dimensions', 20)
self.min_values = np.array(data.get('min_values')) if data.get('min_values') else None
self.max_values = np.array(data.get('max_values')) if data.get('max_values') else None
self.normalized_embeddings = {
path: np.array(emb, dtype=np.float32)
for path, emb in data.get('embeddings', {}).items()
}
self.embeddings = self.normalized_embeddings.copy()
print(f"[EmbeddingEngine] Loaded {len(self.normalized_embeddings)} embeddings from cache")
return True
except Exception as e:
print(f"[EmbeddingEngine] Error loading embeddings: {e}")
return False
def cosine_distance(self, emb1: np.ndarray, emb2: np.ndarray) -> float:
"""
Calcula la distancia coseno entre dos embeddings.
Args:
emb1: Primer embedding
emb2: Segundo embedding
Returns:
float: Distancia coseno (0 = idénticos, 1 = opuestos)
"""
# Normalizar vectores
norm1 = np.linalg.norm(emb1)
norm2 = np.linalg.norm(emb2)
if norm1 == 0 or norm2 == 0:
return 1.0
similarity = np.dot(emb1, emb2) / (norm1 * norm2)
# Convertir a distancia (0 = similar, 1 = diferente)
return 1.0 - np.clip(similarity, -1.0, 1.0)
def euclidean_distance(self, emb1: np.ndarray, emb2: np.ndarray) -> float:
"""
Calcula la distancia euclidiana entre dos embeddings.
Args:
emb1: Primer embedding
emb2: Segundo embedding
Returns:
float: Distancia euclidiana normalizada
"""
diff = emb1 - emb2
return np.sqrt(np.sum(diff ** 2)) / np.sqrt(self.EMBEDDING_DIM)
def find_similar(self, sample_path: str, top_n: int = 10,
use_cosine: bool = True) -> List[Tuple[str, float]]:
"""
Encuentra los samples más similares a un sample dado.
Args:
sample_path: Ruta del sample de referencia
top_n: Número de resultados a retornar
use_cosine: True para usar distancia coseno, False para euclidiana
Returns:
List[Tuple[str, float]]: Lista de (path, distancia) ordenada por similitud
"""
if not self.normalized_embeddings:
print("[EmbeddingEngine] No embeddings available")
return []
# Usar path absoluto
sample_path = str(Path(sample_path).resolve())
if sample_path not in self.normalized_embeddings:
print(f"[EmbeddingEngine] Sample not found: {sample_path}")
return []
reference_emb = self.normalized_embeddings[sample_path]
# Calcular distancias
distances = []
distance_func = self.cosine_distance if use_cosine else self.euclidean_distance
for path, emb in self.normalized_embeddings.items():
if path != sample_path: # Excluir el propio sample
dist = distance_func(reference_emb, emb)
distances.append((path, dist))
# Ordenar por distancia (menor = más similar)
distances.sort(key=lambda x: x[1])
return distances[:top_n]
def find_by_audio_reference(self, audio_file_path: str, top_n: int = 20,
use_cosine: bool = True) -> List[Tuple[str, float]]:
"""
Analiza un archivo de audio y encuentra samples similares.
Args:
audio_file_path: Ruta del archivo de audio a analizar
top_n: Número de samples similares a retornar
use_cosine: True para usar distancia coseno
Returns:
List[Tuple[str, float]]: Lista de (path, distancia) ordenada por similitud
"""
if not self.normalized_embeddings:
print("[EmbeddingEngine] No embeddings available")
return []
# Intentar usar el analyzer para extraer features
features = None
if HAS_ANALYZER:
try:
analyzer = LibreriaAnalyzer()
features = analyzer.analyze_single_file(audio_file_path)
except Exception as e:
print(f"[EmbeddingEngine] Error analyzing reference: {e}")
if features is None:
# Fallback: crear features mínimas
print("[EmbeddingEngine] Using fallback analysis")
features = self._fallback_analyze(audio_file_path)
if features is None:
print(f"[EmbeddingEngine] Could not analyze: {audio_file_path}")
return []
# Crear embedding para el audio de referencia
reference_emb = self.create_embedding(features)
# Normalizar usando los mismos min/max que el índice
if self.min_values is not None and self.max_values is not None:
ranges = self.max_values - self.min_values
ranges[ranges == 0] = 1.0
reference_emb = (reference_emb - self.min_values) / ranges
# Calcular distancias
distances = []
distance_func = self.cosine_distance if use_cosine else self.euclidean_distance
for path, emb in self.normalized_embeddings.items():
dist = distance_func(reference_emb, emb)
distances.append((path, dist))
# Ordenar por distancia
distances.sort(key=lambda x: x[1])
return distances[:top_n]
def _fallback_analyze(self, audio_file_path: str) -> Optional[Dict]:
"""
Análisis fallback básico cuando librosa no está disponible.
Args:
audio_file_path: Ruta del archivo
Returns:
Dict con features mínimas o None
"""
try:
# Información básica del archivo
stat = os.stat(audio_file_path)
# Valores por defecto basados en reggaetón típico
return {
'bpm': 95.0,
'key': 'C',
'rms': -12.0,
'spectral_centroid': 3000.0,
'spectral_rolloff': 8000.0,
'zero_crossing_rate': 0.1,
'mfccs': [0.0] * 13,
'onset_strength': 0.6,
'duration': 4.0,
'sample_rate': 44100,
'channels': 2
}
except Exception:
return None
def get_embedding(self, sample_path: str) -> Optional[np.ndarray]:
"""
Obtiene el embedding de un sample específico.
Args:
sample_path: Ruta del sample
Returns:
np.ndarray: Embedding del sample o None si no existe
"""
sample_path = str(Path(sample_path).resolve())
return self.normalized_embeddings.get(sample_path)
def get_stats(self) -> Dict:
"""
Retorna estadísticas de los embeddings.
Returns:
Dict con estadísticas
"""
if not self.normalized_embeddings:
return {'total_samples': 0}
matrix = np.array(list(self.normalized_embeddings.values()))
return {
'total_samples': len(self.normalized_embeddings),
'dimensions': self.EMBEDDING_DIM,
'mean_per_dim': matrix.mean(axis=0).tolist(),
'std_per_dim': matrix.std(axis=0).tolist(),
'min_per_dim': matrix.min(axis=0).tolist(),
'max_per_dim': matrix.max(axis=0).tolist()
}
# Funciones de conveniencia para uso directo
def create_embeddings_index(features_file: Optional[str] = None,
output_file: Optional[str] = None) -> EmbeddingEngine:
"""
Crea el índice de embeddings completo.
Args:
features_file: Ruta al archivo de features (default: .features_cache.json)
output_file: Ruta de salida (default: .embeddings_index.json)
Returns:
EmbeddingEngine configurado con embeddings creados
"""
engine = EmbeddingEngine()
if features_file:
with open(features_file, 'r') as f:
features_data = json.load(f)
engine.build_from_features(features_data)
else:
engine.build_from_features()
if output_file:
engine.EMBEDDINGS_FILE = Path(output_file)
engine.save_embeddings()
return engine
def find_similar_samples(sample_path: str, top_n: int = 10,
embeddings_file: Optional[str] = None) -> List[Tuple[str, float]]:
"""
Función de conveniencia para encontrar samples similares.
Args:
sample_path: Ruta del sample de referencia
top_n: Número de resultados
embeddings_file: Ruta al archivo de embeddings (opcional)
Returns:
Lista de (path, distancia)
"""
engine = EmbeddingEngine()
if embeddings_file:
engine.EMBEDDINGS_FILE = Path(embeddings_file)
engine._load_embeddings()
return engine.find_similar(sample_path, top_n)
def find_samples_like_audio(audio_path: str, top_n: int = 20,
embeddings_file: Optional[str] = None) -> List[Tuple[str, float]]:
"""
Función de conveniencia para encontrar samples similares a un audio.
Args:
audio_path: Ruta del audio de referencia
top_n: Número de resultados
embeddings_file: Ruta al archivo de embeddings (opcional)
Returns:
Lista de (path, distancia)
"""
engine = EmbeddingEngine()
if embeddings_file:
engine.EMBEDDINGS_FILE = Path(embeddings_file)
engine._load_embeddings()
return engine.find_by_audio_reference(audio_path, top_n)
def cosine_similarity(emb1, emb2) -> float:
"""Compatibility helper used by server.py."""
v1 = np.asarray(emb1, dtype=float)
v2 = np.asarray(emb2, dtype=float)
denom = np.linalg.norm(v1) * np.linalg.norm(v2)
if denom == 0:
return 0.0
return float(np.dot(v1, v2) / denom)
# Test simple
if __name__ == '__main__':
print("[EmbeddingEngine] Running basic tests...")
# Test 1: Crear embedding de features dummy
dummy_features = {
'bpm': 95,
'key': 'C',
'rms': -12.5,
'spectral_centroid': 2500.0,
'spectral_rolloff': 8000.0,
'zero_crossing_rate': 0.15,
'mfccs': [0.5, -0.3, 0.1, 0.2, -0.1, 0.0, 0.3, -0.2, 0.1, 0.0, -0.1, 0.2, 0.1],
'onset_strength': 0.85,
'duration': 0.5,
'sample_rate': 44100,
'channels': 1
}
engine = EmbeddingEngine()
emb = engine.create_embedding(dummy_features)
print(f"[Test] Created embedding with shape: {emb.shape}")
print(f"[Test] Embedding values: {emb[:5]}...")
print(f"[Test] Embedding range: [{emb.min():.3f}, {emb.max():.3f}]")
# Test 2: Normalización
engine.embeddings = {
'sample1.wav': emb,
'sample2.wav': emb * 0.8,
'sample3.wav': emb * 1.2
}
engine.normalize_embeddings()
print(f"[Test] Normalized {len(engine.normalized_embeddings)} embeddings")
# Test 3: Distancia coseno
dist = engine.cosine_distance(emb, emb * 0.9)
print(f"[Test] Cosine distance (emb vs 0.9*emb): {dist:.4f}")
print("[EmbeddingEngine] All tests passed!")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,645 @@
"""
IntelligentSampleSelector - Coherent Sample Selection Engine
Uses embeddings from .embeddings_index.json to select samples that work
together musically based on cosine similarity.
Architecture:
- Embeddings-based similarity using cosine distance
- Energy matching for intensity coherence
- Coherence threshold: 0.90 (configurable)
- Never falls back to random selection
"""
import json
import os
import logging
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple, NamedTuple
from dataclasses import dataclass
import numpy as np
logger = logging.getLogger(__name__)
class CoherenceError(Exception):
"""Raised when no samples meet the coherence threshold."""
def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
super().__init__(message)
self.details = details or {}
@dataclass
class SelectionRationale:
"""Tracks why a sample was selected."""
sample_path: str
similarity_to_anchor: float
energy_match: bool
energy_delta: float
selection_reason: str
@dataclass
class SelectedSample:
"""A selected sample with metadata."""
path: str
role: str
energy: float
coherence_score: float
rationale: SelectionRationale
class IntelligentSampleSelector:
"""
Selects coherent sample sets using embedding-based similarity.
Uses embeddings from .embeddings_index.json and calculates
cosine similarity to find samples that work together musically.
Coherence threshold: 0.90 (samples must be 90% similar)
Energy matching: ±10% of target energy
Never falls back to random selection - raises CoherenceError if
no samples meet criteria.
"""
def __init__(
self,
embeddings_path: Optional[str] = None,
coherence_threshold: float = 0.90,
energy_tolerance: float = 0.10
):
"""
Initialize the selector.
Args:
embeddings_path: Path to .embeddings_index.json
coherence_threshold: Minimum cosine similarity (default 0.90)
energy_tolerance: Energy matching tolerance (default 0.10 = ±10%)
"""
self.coherence_threshold = coherence_threshold
self.energy_tolerance = energy_tolerance
self.embeddings: Dict[str, np.ndarray] = {}
self.metadata: Dict[str, Dict[str, Any]] = {}
self.rationale_log: List[SelectionRationale] = []
# Default path: project root / .embeddings_index.json
if embeddings_path is None:
# Try to find embeddings in project root
script_dir = Path(__file__).parent.parent.parent
embeddings_path = str(script_dir / ".." / "libreria" / "reggaeton" / ".embeddings_index.json")
self.embeddings_path = embeddings_path
self._load_embeddings()
def _load_embeddings(self) -> None:
"""Load embeddings and metadata from JSON file."""
if not os.path.exists(self.embeddings_path):
raise FileNotFoundError(
f"Embeddings file not found: {self.embeddings_path}. "
f"Run sample analysis first to generate embeddings."
)
try:
with open(self.embeddings_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Load embeddings (support both formats)
if "embeddings" in data:
# Format: { "embeddings": { "path": [vector], ... } }
for sample_path, vector in data["embeddings"].items():
if vector and len(vector) > 0:
self.embeddings[sample_path] = np.array(vector, dtype=np.float32)
# Infer role from folder name
folder = os.path.basename(os.path.dirname(sample_path))
self.metadata[sample_path] = {
"path": sample_path,
"energy": vector[3] if len(vector) > 3 else 0.0, # RMS is typically index 3
"bpm": vector[1] * 200 if len(vector) > 1 else 0.0, # Denormalize BPM
"key": "", # Not stored in this format
"role": folder,
}
elif "samples" in data:
# Format: { "samples": { "id": { "embedding": [...], ... } } }
for sample_id, info in data["samples"].items():
embedding = info.get("embedding")
if embedding:
self.embeddings[sample_id] = np.array(embedding, dtype=np.float32)
self.metadata[sample_id] = {
"path": info.get("path", ""),
"energy": info.get("energy", 0.0),
"bpm": info.get("bpm", 0.0),
"key": info.get("key", ""),
"role": info.get("role", "unknown"),
}
logger.info(
f"Loaded {len(self.embeddings)} embeddings from {self.embeddings_path}"
)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid embeddings JSON: {e}")
except Exception as e:
raise RuntimeError(f"Failed to load embeddings: {e}")
def _cosine_similarity(self, a: np.ndarray, b: np.ndarray) -> float:
"""
Calculate cosine similarity between two vectors.
Formula: dot(a, b) / (norm(a) * norm(b))
Args:
a: First embedding vector
b: Second embedding vector
Returns:
Cosine similarity in range [-1, 1], typically [0, 1]
"""
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
if norm_a == 0 or norm_b == 0:
return 0.0
return float(np.dot(a, b) / (norm_a * norm_b))
def _get_sample_energy(self, sample_id: str) -> float:
"""Get RMS energy for a sample."""
return self.metadata.get(sample_id, {}).get("energy", 0.0)
def _energy_matches(self, sample_energy: float, target_energy: float) -> Tuple[bool, float]:
"""
Check if sample energy matches target within tolerance.
Args:
sample_energy: Sample's RMS energy
target_energy: Target energy level
Returns:
Tuple of (matches, delta) where delta is the relative difference
"""
if target_energy == 0:
return True, 0.0
delta = abs(sample_energy - target_energy) / target_energy
matches = delta <= self.energy_tolerance
return matches, delta
def _get_samples_by_role(self, role: str) -> List[str]:
"""Get all sample IDs matching a role."""
return [
sid for sid, meta in self.metadata.items()
if meta.get("role", "").lower() == role.lower()
]
def select_anchor_sample(
self,
role: str,
target_energy: float
) -> Tuple[str, SelectionRationale]:
"""
Find the most representative sample for a role and energy level.
The anchor is the sample that best represents the target characteristics
and has the most similar samples around it (highest local density).
Args:
role: Sample role (e.g., "kick", "snare", "bass")
target_energy: Target RMS energy level
Returns:
Tuple of (sample_id, rationale)
Raises:
CoherenceError: If no samples found for role or no energy matches
"""
role_samples = self._get_samples_by_role(role)
if not role_samples:
available_roles = set(
m.get("role", "unknown") for m in self.metadata.values()
)
raise CoherenceError(
f"No samples found for role: {role}",
details={
"requested_role": role,
"available_roles": list(available_roles),
"total_samples": len(self.metadata)
}
)
# Score each sample by: energy match + similarity to other samples
scored_samples: List[Tuple[str, float, float]] = [] # (id, score, energy)
for sample_id in role_samples:
sample_energy = self._get_sample_energy(sample_id)
energy_matches, energy_delta = self._energy_matches(
sample_energy, target_energy
)
# Skip samples with wildly different energy (optional, can be disabled)
if not energy_matches:
continue
# Calculate average similarity to other samples in role
if sample_id not in self.embeddings:
continue
similarities = []
for other_id in role_samples:
if other_id != sample_id and other_id in self.embeddings:
sim = self._cosine_similarity(
self.embeddings[sample_id],
self.embeddings[other_id]
)
similarities.append(sim)
avg_similarity = np.mean(similarities) if similarities else 0.0
# Score: high similarity + energy match
# Weight: 70% similarity, 30% energy match
energy_score = 1.0 - energy_delta
total_score = (0.7 * avg_similarity) + (0.3 * energy_score)
scored_samples.append((sample_id, total_score, sample_energy))
if not scored_samples:
raise CoherenceError(
f"No samples match energy target for role '{role}'",
details={
"role": role,
"target_energy": target_energy,
"tolerance": self.energy_tolerance,
"candidates": len(role_samples),
"sample_energies": [
self._get_sample_energy(sid) for sid in role_samples[:10]
]
}
)
# Select best sample
scored_samples.sort(key=lambda x: x[1], reverse=True)
anchor_id, score, anchor_energy = scored_samples[0]
rationale = SelectionRationale(
sample_path=self.metadata[anchor_id].get("path", anchor_id),
similarity_to_anchor=1.0, # Self-similarity
energy_match=True,
energy_delta=abs(anchor_energy - target_energy) / target_energy if target_energy else 0.0,
selection_reason=f"Highest representativeness score ({score:.3f}) for role '{role}' at energy {target_energy:.3f}"
)
logger.info(
f"Selected anchor for {role}: {anchor_id} (score={score:.3f}, energy={anchor_energy:.3f})"
)
return anchor_id, rationale
def find_similar_samples(
self,
reference_path: str,
count: int = 5,
min_similarity: float = 0.90,
role_filter: Optional[str] = None
) -> List[Tuple[str, float, SelectionRationale]]:
"""
Find samples similar to a reference sample.
Args:
reference_path: Path or ID of reference sample
count: Number of similar samples to return
min_similarity: Minimum cosine similarity threshold
role_filter: Optional role to filter by
Returns:
List of (sample_id, similarity, rationale) tuples, sorted by similarity
Raises:
CoherenceError: If no samples meet the similarity threshold
"""
# Find reference sample
reference_id = None
for sid, meta in self.metadata.items():
if meta.get("path") == reference_path or sid == reference_path:
reference_id = sid
break
if reference_id is None:
raise CoherenceError(
f"Reference sample not found: {reference_path}",
details={
"reference": reference_path,
"available_samples": len(self.metadata)
}
)
if reference_id not in self.embeddings:
raise CoherenceError(
f"Reference sample has no embedding: {reference_path}",
details={"reference_id": reference_id}
)
reference_embedding = self.embeddings[reference_id]
reference_energy = self._get_sample_energy(reference_id)
# Calculate similarity to all samples
similarities: List[Tuple[str, float, float]] = [] # (id, similarity, energy)
for sample_id, embedding in self.embeddings.items():
if sample_id == reference_id:
continue
# Apply role filter
if role_filter:
sample_role = self.metadata.get(sample_id, {}).get("role", "")
if sample_role.lower() != role_filter.lower():
continue
sim = self._cosine_similarity(reference_embedding, embedding)
energy = self._get_sample_energy(sample_id)
similarities.append((sample_id, sim, energy))
# Filter by minimum similarity
above_threshold = [(sid, sim, e) for sid, sim, e in similarities if sim >= min_similarity]
if not above_threshold:
# Find closest match for error details
similarities.sort(key=lambda x: x[1], reverse=True)
best_match = similarities[0] if similarities else (None, 0.0, 0.0)
raise CoherenceError(
f"No samples meet similarity threshold {min_similarity} for {reference_path}",
details={
"reference": reference_path,
"min_similarity": min_similarity,
"best_match_similarity": best_match[1] if best_match[0] else 0.0,
"best_match_id": best_match[0],
"candidates_checked": len(similarities),
"similarity_distribution": {
"above_95": len([s for s in similarities if s[1] >= 0.95]),
"above_90": len([s for s in similarities if s[1] >= 0.90]),
"above_85": len([s for s in similarities if s[1] >= 0.85]),
"above_80": len([s for s in similarities if s[1] >= 0.80]),
}
}
)
# Sort and select top matches
above_threshold.sort(key=lambda x: x[1], reverse=True)
top_matches = above_threshold[:count]
results: List[Tuple[str, float, SelectionRationale]] = []
for sample_id, similarity, sample_energy in top_matches:
energy_matches, energy_delta = self._energy_matches(
sample_energy, reference_energy
)
rationale = SelectionRationale(
sample_path=self.metadata[sample_id].get("path", sample_id),
similarity_to_anchor=similarity,
energy_match=energy_matches,
energy_delta=energy_delta,
selection_reason=f"Cosine similarity {similarity:.3f} >= {min_similarity} to reference"
)
results.append((sample_id, similarity, rationale))
logger.info(
f"Found {len(results)} samples similar to {reference_id} "
f"(threshold={min_similarity})"
)
return results
def calculate_kit_coherence(self, sample_paths: List[str]) -> float:
"""
Calculate the coherence score of a kit (set of samples).
Coherence is defined as the average pairwise cosine similarity
between all samples in the set. Range: 0.0 to 1.0
Args:
sample_paths: List of sample paths or IDs
Returns:
Coherence score from 0.0 (no coherence) to 1.0 (perfect coherence)
"""
if len(sample_paths) < 2:
return 1.0 # Single sample is perfectly coherent with itself
# Resolve paths to IDs
sample_ids = []
for path in sample_paths:
found_id = None
for sid, meta in self.metadata.items():
if meta.get("path") == path or sid == path:
found_id = sid
break
if found_id:
sample_ids.append(found_id)
if len(sample_ids) < 2:
logger.warning(f"Only {len(sample_ids)} valid samples for coherence calculation")
return 0.0
# Calculate pairwise similarities
similarities = []
for i, id1 in enumerate(sample_ids):
if id1 not in self.embeddings:
continue
for id2 in sample_ids[i+1:]:
if id2 not in self.embeddings:
continue
sim = self._cosine_similarity(
self.embeddings[id1],
self.embeddings[id2]
)
similarities.append(sim)
if not similarities:
return 0.0
coherence = float(np.mean(similarities))
logger.info(
f"Kit coherence: {coherence:.3f} (from {len(similarities)} pairwise comparisons)"
)
return coherence
def select_coherent_kit(
self,
role: str,
target_energy: float,
count: int = 4
) -> List[SelectedSample]:
"""
Select a coherent kit of samples for a role.
Selects an anchor sample and finds variations that are:
1. Similar to the anchor (cosine similarity >= 0.90)
2. Within ±10% of target energy
3. Coherent with each other
Args:
role: Sample role (e.g., "kick", "snare", "hihat", "bass")
target_energy: Target RMS energy level
count: Number of samples to select (default 4: 1 anchor + 3 variations)
Returns:
List of SelectedSample objects with coherence scores and rationale
Raises:
CoherenceError: If no coherent kit can be formed
"""
logger.info(
f"Selecting coherent kit for role='{role}', energy={target_energy:.3f}, count={count}"
)
# Clear rationale log for this selection
self.rationale_log = []
# Step 1: Select anchor sample
anchor_id, anchor_rationale = self.select_anchor_sample(role, target_energy)
selected_ids = [anchor_id]
# Step 2: Find similar samples to anchor
anchor_path = self.metadata[anchor_id].get("path", anchor_id)
try:
similar = self.find_similar_samples(
reference_path=anchor_path,
count=count - 1, # Exclude anchor
min_similarity=self.coherence_threshold,
role_filter=role # Must be same role
)
except CoherenceError as e:
# Enhance error with kit context
raise CoherenceError(
f"Cannot form coherent kit for '{role}': {str(e)}",
details={
**getattr(e, 'details', {}),
"anchor_sample": anchor_id,
"target_count": count,
"role": role
}
)
# Step 3: Build selected samples list with rationale
selected: List[SelectedSample] = []
# Add anchor
anchor_energy = self._get_sample_energy(anchor_id)
selected.append(SelectedSample(
path=self.metadata[anchor_id].get("path", anchor_id),
role=role,
energy=anchor_energy,
coherence_score=1.0,
rationale=anchor_rationale
))
self.rationale_log.append(anchor_rationale)
# Add variations
for sample_id, similarity, rationale in similar:
if len(selected) >= count:
break
sample_energy = self._get_sample_energy(sample_id)
selected.append(SelectedSample(
path=self.metadata[sample_id].get("path", sample_id),
role=role,
energy=sample_energy,
coherence_score=similarity,
rationale=rationale
))
self.rationale_log.append(rationale)
# Step 4: Verify kit coherence
kit_paths = [s.path for s in selected]
kit_coherence = self.calculate_kit_coherence(kit_paths)
if kit_coherence < self.coherence_threshold:
raise CoherenceError(
f"Selected kit coherence {kit_coherence:.3f} below threshold {self.coherence_threshold}",
details={
"kit_coherence": kit_coherence,
"threshold": self.coherence_threshold,
"samples_selected": len(selected),
"role": role,
"sample_paths": kit_paths
}
)
logger.info(
f"Selected coherent kit: {len(selected)} samples, coherence={kit_coherence:.3f}"
)
return selected
def get_selection_log(self) -> List[Dict[str, Any]]:
"""Get the rationale log as a list of dictionaries."""
return [
{
"sample_path": r.sample_path,
"similarity_to_anchor": round(r.similarity_to_anchor, 4),
"energy_match": r.energy_match,
"energy_delta": round(r.energy_delta, 4),
"selection_reason": r.selection_reason
}
for r in self.rationale_log
]
def get_available_roles(self) -> List[str]:
"""Get list of available sample roles in the embeddings."""
roles = set()
for meta in self.metadata.values():
role = meta.get("role", "")
if role:
roles.add(role)
return sorted(list(roles))
def get_stats(self) -> Dict[str, Any]:
"""Get statistics about the embeddings database."""
role_counts = {}
for meta in self.metadata.values():
role = meta.get("role", "unknown")
role_counts[role] = role_counts.get(role, 0) + 1
return {
"total_samples": len(self.embeddings),
"embeddings_path": self.embeddings_path,
"coherence_threshold": self.coherence_threshold,
"energy_tolerance": self.energy_tolerance,
"roles": role_counts,
"embedding_dim": len(next(iter(self.embeddings.values())))
if self.embeddings else 0
}
# Convenience functions for direct usage
def select_kick_kit(target_energy: float, count: int = 4) -> List[SelectedSample]:
"""Select a coherent kick drum kit."""
selector = IntelligentSampleSelector()
return selector.select_coherent_kit("kick", target_energy, count)
def select_snare_kit(target_energy: float, count: int = 4) -> List[SelectedSample]:
"""Select a coherent snare drum kit."""
selector = IntelligentSampleSelector()
return selector.select_coherent_kit("snare", target_energy, count)
def select_bass_kit(target_energy: float, count: int = 4) -> List[SelectedSample]:
"""Select a coherent bass kit."""
selector = IntelligentSampleSelector()
return selector.select_coherent_kit("bass", target_energy, count)
def find_similar(reference_path: str, count: int = 5) -> List[Tuple[str, float]]:
"""Find samples similar to a reference."""
selector = IntelligentSampleSelector()
results = selector.find_similar_samples(reference_path, count)
return [(r.path, score) for _, score, r in results]

View File

@@ -0,0 +1,888 @@
"""
IterationEngine - Achieves target coherence through intelligent retries.
This module implements professional-grade iteration strategies to achieve
coherence scores >= 0.90 for sample selections. Never accepts sub-standard
results - either achieves target or fails explicitly.
Usage:
from engines.iteration_engine import IterationEngine, ProfessionalCoherenceError
engine = IterationEngine()
try:
result = engine.iterate_until_coherence(
selection_func=select_samples,
target_coherence=0.90
)
except ProfessionalCoherenceError as e:
# Handle professional-grade failure
print(f"Failed to achieve coherence: {e}")
Architecture:
- Iteration strategies with progressive relaxation
- Automatic failure analysis and recovery suggestions
- Integration with CoherenceScorer and RationaleLogger
- Professional-grade: No shortcuts, achieves target or fails explicitly
"""
import time
import logging
from typing import Optional, Dict, List, Any, Callable, Union, Tuple
from dataclasses import dataclass, field
from enum import Enum
logger = logging.getLogger("IterationEngine")
# =============================================================================
# PROFESSIONAL COHERENCE ERROR
# =============================================================================
class ProfessionalCoherenceError(Exception):
"""
Exception raised when professional-grade coherence cannot be achieved.
This error is raised after all iteration strategies have been exhausted
without achieving the minimum acceptable coherence threshold (0.90).
Attributes:
best_score: Highest coherence score achieved across all attempts
attempts_made: Number of iteration strategies tried
suggestions: List of recommendations for manual curation
message: Detailed error message with all context
"""
def __init__(
self,
best_score: float,
attempts_made: int,
suggestions: List[str],
message: Optional[str] = None
):
self.best_score = best_score
self.attempts_made = attempts_made
self.suggestions = suggestions
if message is None:
message = self._build_message()
super().__init__(message)
def _build_message(self) -> str:
"""Build comprehensive error message."""
lines = [
f"ProfessionalCoherenceError: Failed to achieve coherence >= 0.90",
f"",
f"Best score achieved: {self.best_score:.3f}",
f"Attempts made: {self.attempts_made}",
f"",
f"Recommendations:",
]
for i, suggestion in enumerate(self.suggestions, 1):
lines.append(f" {i}. {suggestion}")
lines.append(f"")
lines.append(f"Consider:")
lines.append(f" - Adding more high-quality samples to the library")
lines.append(f" - Manual curation of samples for this genre")
lines.append(f" - Checking sample quality and consistency")
return "\n".join(lines)
def to_dict(self) -> Dict[str, Any]:
"""Convert error to dictionary for serialization."""
return {
"error_type": "ProfessionalCoherenceError",
"best_score": self.best_score,
"attempts_made": self.attempts_made,
"suggestions": self.suggestions,
"message": str(self)
}
# =============================================================================
# ITERATION STRATEGIES
# =============================================================================
ITERATION_STRATEGIES = [
{
"attempt": 1,
"params": {
"coherence_threshold": 0.90,
"energy_tolerance": 0.10
},
"note": "Standard professional parameters"
},
{
"attempt": 2,
"params": {
"coherence_threshold": 0.88,
"energy_tolerance": 0.15
},
"note": "Slightly relaxed but still professional"
},
{
"attempt": 3,
"params": {
"coherence_threshold": 0.85,
"energy_tolerance": 0.20
},
"note": "Minimum professional grade"
},
{
"attempt": 4,
"params": {
"strategy": "reduce_count",
"count": 2,
"coherence_threshold": 0.90
},
"note": "Fewer samples but more coherent"
},
{
"attempt": 5,
"params": {
"strategy": "single_sample",
"count": 1,
"coherence_threshold": 0.90
},
"note": "Single high-quality sample only"
},
]
# =============================================================================
# DATA CLASSES
# =============================================================================
class IterationStatus(Enum):
"""Status of iteration attempt."""
PENDING = "pending"
IN_PROGRESS = "in_progress"
SUCCESS = "success"
FAILED = "failed"
ABORTED = "aborted"
@dataclass
class IterationAttempt:
"""Record of a single iteration attempt."""
attempt_number: int
strategy: Dict[str, Any]
status: IterationStatus = IterationStatus.PENDING
coherence_score: float = 0.0
duration_ms: float = 0.0
failure_reason: Optional[str] = None
kit_data: Optional[Any] = None
timestamp: float = field(default_factory=time.time)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
"attempt_number": self.attempt_number,
"strategy": self.strategy,
"status": self.status.value,
"coherence_score": self.coherence_score,
"duration_ms": self.duration_ms,
"failure_reason": self.failure_reason,
"timestamp": self.timestamp
}
@dataclass
class IterationResult:
"""Result of iteration process."""
success: bool
final_coherence: float
attempts: List[IterationAttempt]
successful_strategy: Optional[Dict[str, Any]] = None
total_duration_ms: float = 0.0
selected_kit: Optional[Any] = None
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
"success": self.success,
"final_coherence": self.final_coherence,
"attempts": [a.to_dict() for a in self.attempts],
"successful_strategy": self.successful_strategy,
"total_duration_ms": self.total_duration_ms,
"metadata": self.metadata
}
# =============================================================================
# PLACEHOLDER CLASSES (for when dependencies are not available)
# =============================================================================
class CoherenceScorer:
"""
Placeholder/Actual CoherenceScorer for sample kit evaluation.
When the real CoherenceScorer is available, this will be replaced
or enhanced. For now, implements basic coherence calculation based
on sample metadata consistency.
"""
def __init__(self):
self.weights = {
"bpm_consistency": 0.30,
"key_consistency": 0.25,
"energy_balance": 0.25,
"spectral_compatibility": 0.20
}
def score_kit(self, kit: Any) -> float:
"""
Calculate coherence score for a kit.
Returns:
Coherence score between 0.0 and 1.0
"""
# If kit has pre-calculated coherence, use it
if hasattr(kit, 'coherence_score') and kit.coherence_score > 0:
return kit.coherence_score
# Calculate based on available metadata
scores = []
# BPM consistency
bpm_score = self._check_bpm_consistency(kit)
scores.append(bpm_score * self.weights["bpm_consistency"])
# Key consistency
key_score = self._check_key_consistency(kit)
scores.append(key_score * self.weights["key_consistency"])
# Energy balance
energy_score = self._check_energy_balance(kit)
scores.append(energy_score * self.weights["energy_balance"])
# Spectral compatibility (placeholder)
spectral_score = 0.85 # Default assumption
scores.append(spectral_score * self.weights["spectral_compatibility"])
total = sum(scores)
return min(1.0, max(0.0, total))
def _check_bpm_consistency(self, kit: Any) -> float:
"""Check BPM consistency across kit samples."""
bpms = []
if hasattr(kit, 'drums') and kit.drums:
for attr in ['kick', 'snare', 'clap', 'hat_closed', 'hat_open']:
sample = getattr(kit.drums, attr, None)
if sample and hasattr(sample, 'bpm') and sample.bpm > 0:
bpms.append(sample.bpm)
if hasattr(kit, 'bass') and kit.bass:
for sample in kit.bass:
if hasattr(sample, 'bpm') and sample.bpm > 0:
bpms.append(sample.bpm)
if len(bpms) < 2:
return 0.5 # Insufficient data
# Calculate variance
mean_bpm = sum(bpms) / len(bpms)
variance = sum((bpm - mean_bpm) ** 2 for bpm in bpms) / len(bpms)
# Convert to score (lower variance = higher score)
if variance == 0:
return 1.0
return max(0.0, 1.0 - (variance / 100))
def _check_key_consistency(self, kit: Any) -> float:
"""Check key consistency across kit samples."""
keys = []
if hasattr(kit, 'drums') and kit.drums:
for attr in ['kick', 'snare', 'clap', 'hat_closed', 'hat_open']:
sample = getattr(kit.drums, attr, None)
if sample and hasattr(sample, 'key') and sample.key:
keys.append(sample.key)
if hasattr(kit, 'bass') and kit.bass:
for sample in kit.bass:
if hasattr(sample, 'key') and sample.key:
keys.append(sample.key)
if len(keys) < 2:
return 0.5 # Insufficient data
# Count key occurrences
key_counts = {}
for key in keys:
key_counts[key] = key_counts.get(key, 0) + 1
# Score based on most common key frequency
max_count = max(key_counts.values())
return max_count / len(keys)
def _check_energy_balance(self, kit: Any) -> float:
"""Check energy balance across kit components."""
# This is a placeholder - real implementation would analyze
# actual audio energy levels
component_count = 0
if hasattr(kit, 'drums') and kit.drums:
for attr in ['kick', 'snare', 'clap', 'hat_closed', 'hat_open']:
if getattr(kit.drums, attr, None):
component_count += 1
if hasattr(kit, 'bass') and kit.bass:
component_count += len(kit.bass)
# Score based on completeness
if component_count >= 5:
return 0.95
elif component_count >= 3:
return 0.80
else:
return 0.60
class RationaleLogger:
"""
Placeholder/Actual RationaleLogger for logging iteration decisions.
Records the reasoning behind iteration choices for debugging
and audit purposes.
"""
def __init__(self, verbose: bool = False):
self.verbose = verbose
self.entries = []
def log_iteration_start(self, attempt: int, strategy: Dict[str, Any]):
"""Log start of iteration attempt."""
entry = {
"event": "iteration_start",
"attempt": attempt,
"strategy": strategy,
"timestamp": time.time()
}
self.entries.append(entry)
if self.verbose:
logger.info(f"[Rationale] Starting attempt {attempt}: {strategy.get('note', '')}")
def log_iteration_result(
self,
attempt: int,
coherence: float,
success: bool
):
"""Log result of iteration attempt."""
entry = {
"event": "iteration_result",
"attempt": attempt,
"coherence": coherence,
"success": success,
"timestamp": time.time()
}
self.entries.append(entry)
if self.verbose:
status = "SUCCESS" if success else "FAILED"
logger.info(f"[Rationale] Attempt {attempt}: {status} (coherence={coherence:.3f})")
def log_strategy_switch(
self,
from_attempt: int,
to_attempt: int,
reason: str
):
"""Log strategy switch."""
entry = {
"event": "strategy_switch",
"from": from_attempt,
"to": to_attempt,
"reason": reason,
"timestamp": time.time()
}
self.entries.append(entry)
if self.verbose:
logger.info(f"[Rationale] Switching from {from_attempt} to {to_attempt}: {reason}")
def log_final_result(self, result: IterationResult):
"""Log final iteration result."""
entry = {
"event": "final_result",
"success": result.success,
"coherence": result.final_coherence,
"attempts_count": len(result.attempts),
"timestamp": time.time()
}
self.entries.append(entry)
logger.info(
f"[Rationale] Final result: success={result.success}, "
f"coherence={result.final_coherence:.3f}, "
f"attempts={len(result.attempts)}"
)
def get_entries(self) -> List[Dict[str, Any]]:
"""Get all logged entries."""
return self.entries.copy()
# =============================================================================
# ITERATION ENGINE
# =============================================================================
class IterationEngine:
"""
Professional-grade iteration engine for achieving target coherence.
This engine implements intelligent retry strategies to achieve coherence
scores >= 0.90. It never accepts sub-standard results - either achieves
the target or fails explicitly with actionable recommendations.
Features:
- Progressive iteration strategies with graceful degradation
- Automatic failure analysis and recovery suggestions
- Success tracking with detailed logging
- Integration with sample selection and coherence scoring
Usage:
engine = IterationEngine(target_coherence=0.90, max_attempts=5)
result = engine.iterate_until_coherence(selection_func)
if result.success:
kit = result.selected_kit
else:
# Handle failure - error already raised
pass
"""
def __init__(
self,
target_coherence: float = 0.90,
max_attempts: int = 5,
coherence_scorer: Optional[CoherenceScorer] = None,
rationale_logger: Optional[RationaleLogger] = None,
verbose: bool = False
):
"""
Initialize iteration engine.
Args:
target_coherence: Minimum acceptable coherence (default: 0.90)
max_attempts: Maximum iteration attempts (default: 5)
coherence_scorer: Optional custom coherence scorer
rationale_logger: Optional custom rationale logger
verbose: Enable verbose logging
"""
self.target_coherence = target_coherence
self.max_attempts = max(1, min(max_attempts, len(ITERATION_STRATEGIES)))
self.coherence_scorer = coherence_scorer or CoherenceScorer()
self.rationale_logger = rationale_logger or RationaleLogger(verbose=verbose)
self.verbose = verbose
# Tracking
self._attempts_history: List[IterationAttempt] = []
self._iteration_count = 0
self._start_time: Optional[float] = None
if verbose:
logger.info(
f"[IterationEngine] Initialized: target={target_coherence}, "
f"max_attempts={max_attempts}"
)
def iterate_until_coherence(
self,
selection_func: Callable[[Dict[str, Any]], Any],
target_coherence: Optional[float] = None,
max_attempts: Optional[int] = None
) -> IterationResult:
"""
Iterate until target coherence is achieved or max attempts reached.
Args:
selection_func: Function that takes strategy params and returns kit
target_coherence: Override default target (optional)
max_attempts: Override default max attempts (optional)
Returns:
IterationResult with success status and selected kit
Raises:
ProfessionalCoherenceError: If max attempts reached without success
"""
target = target_coherence or self.target_coherence
max_att = max_attempts or self.max_attempts
self._start_time = time.time()
self._attempts_history = []
self._iteration_count = 0
best_score = 0.0
best_kit = None
logger.info(f"[IterationEngine] Starting iteration loop: target={target}")
for attempt_idx in range(max_att):
self._iteration_count += 1
# Get strategy for this attempt
strategy = ITERATION_STRATEGIES[attempt_idx]
attempt = IterationAttempt(
attempt_number=attempt_idx + 1,
strategy=strategy
)
self.rationale_logger.log_iteration_start(
attempt.attempt_number,
strategy
)
try:
# Execute strategy
kit, coherence = self.try_strategy(strategy, selection_func)
attempt.kit_data = kit
attempt.coherence_score = coherence
attempt.duration_ms = (time.time() - attempt.timestamp) * 1000
# Track best result
if coherence > best_score:
best_score = coherence
best_kit = kit
# Check success
if coherence >= target:
attempt.status = IterationStatus.SUCCESS
self._attempts_history.append(attempt)
self.rationale_logger.log_iteration_result(
attempt.attempt_number,
coherence,
True
)
result = self._build_success_result(
coherence,
attempt,
kit
)
self.rationale_logger.log_final_result(result)
logger.info(
f"[IterationEngine] SUCCESS on attempt {attempt.attempt_number}: "
f"coherence={coherence:.3f}"
)
return result
else:
attempt.status = IterationStatus.FAILED
attempt.failure_reason = f"Coherence {coherence:.3f} < target {target}"
self.rationale_logger.log_iteration_result(
attempt.attempt_number,
coherence,
False
)
if attempt_idx < max_att - 1:
self.rationale_logger.log_strategy_switch(
attempt.attempt_number,
attempt.attempt_number + 1,
f"Coherence too low ({coherence:.3f}), trying next strategy"
)
self._attempts_history.append(attempt)
except Exception as e:
attempt.status = IterationStatus.FAILED
attempt.failure_reason = str(e)
attempt.duration_ms = (time.time() - attempt.timestamp) * 1000
self._attempts_history.append(attempt)
logger.warning(
f"[IterationEngine] Attempt {attempt.attempt_number} failed: {e}"
)
if attempt_idx < max_att - 1:
self.rationale_logger.log_strategy_switch(
attempt.attempt_number,
attempt.attempt_number + 1,
f"Exception: {str(e)[:50]}"
)
# All attempts exhausted
total_duration = (time.time() - self._start_time) * 1000
failure_reason = self.analyze_failure_reason(best_kit, best_score)
suggestions = self.suggest_improvements(failure_reason)
result = IterationResult(
success=False,
final_coherence=best_score,
attempts=self._attempts_history.copy(),
total_duration_ms=total_duration,
selected_kit=best_kit,
metadata={
"failure_reason": failure_reason,
"suggestions": suggestions,
"target_coherence": target
}
)
self.rationale_logger.log_final_result(result)
logger.error(
f"[IterationEngine] All {max_att} attempts failed. "
f"Best score: {best_score:.3f}"
)
raise ProfessionalCoherenceError(
best_score=best_score,
attempts_made=max_att,
suggestions=suggestions
)
def try_strategy(
self,
strategy: Dict[str, Any],
selection_func: Callable[[Dict[str, Any]], Any]
) -> Tuple[Any, float]:
"""
Execute a single iteration strategy.
Args:
strategy: Strategy configuration from ITERATION_STRATEGIES
selection_func: Function to select samples with given params
Returns:
Tuple of (selected_kit, coherence_score)
Raises:
Exception: If selection or scoring fails
"""
params = strategy.get("params", {}).copy()
if self.verbose:
logger.info(
f"[IterationEngine] Trying strategy {strategy.get('attempt')}: "
f"{strategy.get('note', '')}"
)
# Call selection function with strategy parameters
kit = selection_func(params)
if kit is None:
raise ValueError("Selection function returned None")
# Score the resulting kit
coherence = self.coherence_scorer.score_kit(kit)
# Attach coherence to kit for reference
if hasattr(kit, 'coherence_score'):
kit.coherence_score = coherence
if self.verbose:
logger.info(f"[IterationEngine] Strategy result: coherence={coherence:.3f}")
return kit, coherence
def analyze_failure_reason(
self,
kit: Optional[Any],
coherence_score: float
) -> str:
"""
Determine why coherence target was not achieved.
Args:
kit: Best kit achieved (may be None)
coherence_score: Best coherence score achieved
Returns:
Failure reason classification string
"""
if kit is None:
return "no_valid_selection"
if coherence_score < 0.50:
return "severe_inconsistency"
elif coherence_score < 0.70:
return "major_inconsistency"
elif coherence_score < 0.85:
return "moderate_inconsistency"
elif coherence_score < 0.90:
return "minor_inconsistency"
else:
return "target_not_met"
def suggest_improvements(self, failure_reason: str) -> List[str]:
"""
Suggest adjustments based on failure reason.
Args:
failure_reason: Reason classification from analyze_failure_reason
Returns:
List of actionable suggestions
"""
suggestions = {
"no_valid_selection": [
"Check that sample library has samples for all required roles",
"Verify selection function is working correctly",
"Ensure library path is accessible"
],
"severe_inconsistency": [
"Library may have fundamentally incompatible samples",
"Consider organizing samples by pack or producer",
"Run library analysis to identify outliers",
"Add more samples from the same genre/style"
],
"major_inconsistency": [
"Check for mixed genres in sample selection",
"Verify BPM and key metadata accuracy",
"Consider using reference-based selection",
"Filter samples by more specific criteria"
],
"moderate_inconsistency": [
"Some samples may need key adjustment",
"Check energy levels across drum components",
"Consider manual sample curation",
"Try with smaller sample sets from same source"
],
"minor_inconsistency": [
"Close to target - try with samples from same pack",
"Verify sample quality and bitrate",
"Slightly adjust target coherence if acceptable",
"Consider manual fine-tuning"
],
"target_not_met": [
"Target may be too strict for current library",
"Consider slightly lower professional threshold",
"Add more high-quality reference samples"
]
}
return suggestions.get(failure_reason, [
"Review sample library quality and consistency",
"Try reference-based selection",
"Consider adding more professional-grade samples"
])
def _build_success_result(
self,
coherence: float,
successful_attempt: IterationAttempt,
kit: Any
) -> IterationResult:
"""Build success result object."""
total_duration = (time.time() - self._start_time) * 1000 if self._start_time else 0
return IterationResult(
success=True,
final_coherence=coherence,
attempts=self._attempts_history.copy(),
successful_strategy=successful_attempt.strategy,
total_duration_ms=total_duration,
selected_kit=kit,
metadata={
"successful_attempt": successful_attempt.attempt_number,
"strategy_note": successful_attempt.strategy.get("note", ""),
"iterations_required": self._iteration_count
}
)
# -------------------------------------------------------------------------
# Tracking and Metrics
# -------------------------------------------------------------------------
def get_iteration_count(self) -> int:
"""Get number of iterations performed in last run."""
return self._iteration_count
def get_attempts_history(self) -> List[IterationAttempt]:
"""Get history of all attempts from last run."""
return self._attempts_history.copy()
def get_success_rate(self) -> float:
"""Get success rate across all attempts in last run."""
if not self._attempts_history:
return 0.0
successful = sum(
1 for a in self._attempts_history
if a.status == IterationStatus.SUCCESS
)
return successful / len(self._attempts_history)
def reset(self):
"""Reset engine state for new iteration cycle."""
self._attempts_history = []
self._iteration_count = 0
self._start_time = None
if self.verbose:
logger.info("[IterationEngine] State reset")
# =============================================================================
# CONVENIENCE FUNCTIONS
# =============================================================================
def iterate_for_coherence(
selection_func: Callable[[Dict[str, Any]], Any],
target: float = 0.90,
max_attempts: int = 5,
verbose: bool = False
) -> Any:
"""
Convenience function for one-shot iteration.
Args:
selection_func: Function to select samples
target: Target coherence score
max_attempts: Maximum attempts
verbose: Enable verbose logging
Returns:
Selected kit if successful
Raises:
ProfessionalCoherenceError: If coherence cannot be achieved
"""
engine = IterationEngine(
target_coherence=target,
max_attempts=max_attempts,
verbose=verbose
)
result = engine.iterate_until_coherence(selection_func)
return result.selected_kit
def quick_coherence_check(kit: Any) -> float:
"""
Quick coherence check for a kit.
Args:
kit: Kit to evaluate
Returns:
Coherence score (0.0 - 1.0)
"""
scorer = CoherenceScorer()
return scorer.score_kit(kit)
# =============================================================================
# EXPORTS
# =============================================================================
__all__ = [
"IterationEngine",
"ProfessionalCoherenceError",
"CoherenceScorer",
"RationaleLogger",
"IterationResult",
"IterationAttempt",
"IterationStatus",
"ITERATION_STRATEGIES",
"iterate_for_coherence",
"quick_coherence_check",
]

View File

@@ -0,0 +1,639 @@
"""
LibreriaAnalyzer - Análisis espectral de samples de audio
Escanea recursivamente la librería de samples y extrae features espectrales
usando librosa (con fallback a scipy si no está disponible).
Uso:
from engines.libreria_analyzer import LibreriaAnalyzer
analyzer = LibreriaAnalyzer()
analyzer.analyze_all() # Analiza toda la librería
# O consultar features de un sample específico
features = analyzer.get_features("C:/.../kick_808.wav")
"""
import os
import json
import time
from pathlib import Path
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any
# Audio analysis libraries
try:
import numpy as np
import librosa
import librosa.feature
LIBROSA_AVAILABLE = True
except ImportError:
LIBROSA_AVAILABLE = False
try:
import numpy as np
from scipy.io import wavfile
from scipy import signal
SCIPY_AVAILABLE = True
except ImportError:
SCIPY_AVAILABLE = False
np = None
class LibreriaAnalyzer:
"""
Analizador espectral de librería de samples.
Extrae features de audio para todos los samples encontrados
y los guarda en caché para evitar re-análisis.
"""
# Extensiones de audio soportadas
SUPPORTED_EXTENSIONS = {'.wav', '.mp3', '.aif', '.aiff', '.flac'}
# Caché de features
CACHE_FILENAME = '.features_cache.json'
CACHE_MAX_AGE_DAYS = 7
# Mapeo de carpetas a roles
ROLE_MAPPING = {
'kick': 'kick',
'snare': 'snare',
'bass': 'bass',
'fx': 'fx',
'drumloops': 'drum_loop',
'drumloop': 'drum_loop',
'hi-hat': 'hat_closed',
'hihat': 'hat_closed',
'hat': 'hat_closed',
'oneshots': 'oneshot',
'oneshot': 'oneshot',
'perc loop': 'perc_loop',
'perc_loop': 'perc_loop',
'reggaeton 3': 'synth',
'sentimientolatino2025': 'multi',
'sounds presets': 'preset',
'extra': 'extra',
'flp': 'project',
}
def __init__(self, library_path: str = None, verbose: bool = True):
"""
Inicializa el analizador.
Args:
library_path: Ruta base de la librería. Por defecto: libreria/reggaeton/
verbose: Si True, muestra progreso del análisis
"""
if library_path is None:
# Default path según la estructura del proyecto
base_path = Path("C:/ProgramData/Ableton/Live 12 Suite/Resources/MIDI Remote Scripts")
self.library_path = base_path / "libreria" / "reggaeton"
else:
self.library_path = Path(library_path)
self.verbose = verbose
self.features: Dict[str, Dict[str, Any]] = {}
self.cache_path = self.library_path / self.CACHE_FILENAME
# Verificar disponibilidad de librerías
if not LIBROSA_AVAILABLE and not SCIPY_AVAILABLE:
raise ImportError(
"Se requiere librosa o scipy para análisis de audio. "
"Instala: pip install librosa numpy"
)
# Cargar caché existente si está disponible
self._load_cache()
def _load_cache(self) -> bool:
"""
Carga el caché de features si existe y es reciente.
Returns:
True si se cargó el caché, False en caso contrario
"""
if not self.cache_path.exists():
return False
try:
# Verificar edad del caché
cache_age = datetime.now() - datetime.fromtimestamp(
self.cache_path.stat().st_mtime
)
if cache_age > timedelta(days=self.CACHE_MAX_AGE_DAYS):
if self.verbose:
print(f"[LibreriaAnalyzer] Caché expirado ({cache_age.days} días). Re-analizando...")
return False
# Cargar caché
with open(self.cache_path, 'r', encoding='utf-8') as f:
cache_data = json.load(f)
self.features = cache_data.get('samples', {})
if self.verbose:
total = cache_data.get('total_samples', len(self.features))
scan_date = cache_data.get('scan_date', 'unknown')
print(f"[LibreriaAnalyzer] Caché cargado: {total} samples (desde {scan_date})")
return True
except (json.JSONDecodeError, IOError, KeyError) as e:
if self.verbose:
print(f"[LibreriaAnalyzer] Error cargando caché: {e}")
return False
def _save_cache(self) -> None:
"""Guarda las features actuales en el caché."""
cache_data = {
"version": "1.0",
"total_samples": len(self.features),
"scan_date": datetime.now().isoformat(),
"library_path": str(self.library_path),
"samples": self.features
}
try:
with open(self.cache_path, 'w', encoding='utf-8') as f:
json.dump(cache_data, f, indent=2, ensure_ascii=False)
if self.verbose:
print(f"[LibreriaAnalyzer] Caché guardado: {len(self.features)} samples")
except IOError as e:
if self.verbose:
print(f"[LibreriaAnalyzer] Error guardando caché: {e}")
def _detect_role(self, file_path: Path) -> str:
"""
Detecta el rol del sample basado en la carpeta contenedora.
Args:
file_path: Ruta al archivo de audio
Returns:
Rol detectado (kick, snare, bass, etc.)
"""
# Obtener partes del path en minúsculas
path_parts = [p.lower() for p in file_path.parts]
# Buscar coincidencias en el mapeo
for part in path_parts:
# Remover caracteres especiales para matching
clean_part = part.replace(' ', '_').replace('-', '_').replace('(', '').replace(')', '')
if part in self.ROLE_MAPPING:
return self.ROLE_MAPPING[part]
if clean_part in self.ROLE_MAPPING:
return self.ROLE_MAPPING[clean_part]
# Buscar substrings
for key, role in self.ROLE_MAPPING.items():
if key in part or key in clean_part:
return role
return "unknown"
def _get_pack_name(self, file_path: Path) -> str:
"""
Obtiene el nombre del pack/carpeta padre del sample.
Args:
file_path: Ruta al archivo de audio
Returns:
Nombre del pack/carpeta
"""
# El pack es el directorio padre inmediato
parent = file_path.parent.name
return parent if parent else "root"
def _extract_features_librosa(self, file_path: Path) -> Optional[Dict[str, Any]]:
"""
Extrae features de audio usando librosa.
Args:
file_path: Ruta al archivo de audio
Returns:
Diccionario con features o None si hay error
"""
try:
# Cargar audio
y, sr = librosa.load(str(file_path), sr=None, mono=True)
# Duración
duration = librosa.get_duration(y=y, sr=sr)
# RMS (energía)
rms = float(np.mean(librosa.feature.rms(y=y)))
rms_db = 20 * np.log10(rms + 1e-10) # Convertir a dB
# Spectral Centroid (brillo)
spectral_centroid = float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)))
# Spectral Rolloff
spectral_rolloff = float(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)))
# Zero Crossing Rate
zcr = float(np.mean(librosa.feature.zero_crossing_rate(y)))
# MFCCs (13 coeficientes)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
mfccs_mean = [float(np.mean(coef)) for coef in mfccs]
# Onset Strength (qué tan rítmico es)
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
onset_strength = float(np.mean(onset_env))
# BPM detection
try:
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
bpm = float(tempo) if isinstance(tempo, (int, float, np.number)) else float(tempo[0])
except:
bpm = 0.0
# Key detection via chromagram
try:
chromagram = librosa.feature.chroma_cqt(y=y, sr=sr)
# Sumar a lo largo del tiempo para obtener el perfil de pitch
chroma_avg = np.sum(chromagram, axis=1)
# Notas musicales
notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
# Encontrar la nota dominante
key_index = np.argmax(chroma_avg)
key = notes[key_index]
# Detectar si es mayor o menor (heurística simple)
# Si el tercer grado está presente, es menor
minor_third_idx = (key_index + 3) % 12
if chroma_avg[minor_third_idx] > chroma_avg[(key_index + 4) % 12]:
key += 'm'
except:
key = ""
# Determinar canales (asumimos mono después de librosa.load con mono=True)
# Para saber si era stereo originalmente, tendríamos que cargar de nuevo
try:
y_orig, _ = librosa.load(str(file_path), sr=None, mono=False)
channels = y_orig.shape[0] if len(y_orig.shape) > 1 else 1
except:
channels = 1
return {
"rms": round(rms_db, 2),
"spectral_centroid": round(spectral_centroid, 2),
"spectral_rolloff": round(spectral_rolloff, 2),
"zero_crossing_rate": round(zcr, 4),
"mfccs": [round(m, 4) for m in mfccs_mean],
"onset_strength": round(onset_strength, 4),
"duration": round(duration, 3),
"sample_rate": sr,
"channels": channels,
"bpm": round(bpm, 1) if bpm > 0 else 0,
"key": key
}
except Exception as e:
if self.verbose:
print(f"[LibreriaAnalyzer] Error analizando {file_path}: {e}")
return None
def _extract_features_scipy(self, file_path: Path) -> Optional[Dict[str, Any]]:
"""
Extrae features básicas usando scipy (fallback cuando librosa no está).
Solo soporta archivos WAV.
Args:
file_path: Ruta al archivo de audio
Returns:
Diccionario con features básicas o None si hay error
"""
try:
# scipy solo soporta WAV nativamente
if file_path.suffix.lower() not in {'.wav'}:
return None
# Cargar audio
sr, data = wavfile.read(str(file_path))
# Convertir a float y mono si es necesario
if data.ndim > 1:
channels = data.shape[1]
data = np.mean(data, axis=1) # Convertir a mono
else:
channels = 1
# Normalizar a float [-1, 1]
if data.dtype == np.int16:
data = data.astype(np.float32) / 32768.0
elif data.dtype == np.int32:
data = data.astype(np.float32) / 2147483648.0
else:
data = data.astype(np.float32)
# Duración
duration = len(data) / sr
# RMS
rms = np.sqrt(np.mean(data ** 2))
rms_db = 20 * np.log10(rms + 1e-10)
# Spectral Centroid usando FFT
fft = np.fft.fft(data)
freqs = np.fft.fftfreq(len(data), 1/sr)
magnitude = np.abs(fft)
# Solo frecuencias positivas
positive_freqs = freqs[:len(freqs)//2]
positive_magnitude = magnitude[:len(magnitude)//2]
spectral_centroid = np.sum(positive_freqs * positive_magnitude) / np.sum(positive_magnitude)
# Zero Crossing Rate
zcr = np.mean(np.diff(np.sign(data)) != 0)
# No podemos hacer análisis avanzado sin librosa
return {
"rms": round(rms_db, 2),
"spectral_centroid": round(float(spectral_centroid), 2),
"spectral_rolloff": 0.0, # No disponible sin librosa
"zero_crossing_rate": round(float(zcr), 4),
"mfccs": [], # No disponible sin librosa
"onset_strength": 0.0, # No disponible sin librosa
"duration": round(duration, 3),
"sample_rate": sr,
"channels": channels,
"bpm": 0, # No disponible sin librosa
"key": "" # No disponible sin librosa
}
except Exception as e:
if self.verbose:
print(f"[LibreriaAnalyzer] Error (scipy) analizando {file_path}: {e}")
return None
def _extract_features(self, file_path: Path) -> Optional[Dict[str, Any]]:
"""
Extrae features de un archivo de audio.
Usa librosa si está disponible, de lo contrario usa scipy.
Args:
file_path: Ruta al archivo de audio
Returns:
Diccionario con features o None si hay error
"""
if LIBROSA_AVAILABLE:
return self._extract_features_librosa(file_path)
elif SCIPY_AVAILABLE:
return self._extract_features_scipy(file_path)
else:
return None
def _scan_samples(self) -> List[Path]:
"""
Escanea recursivamente la librería buscando samples de audio.
Returns:
Lista de rutas a archivos de audio encontrados
"""
samples = []
if not self.library_path.exists():
if self.verbose:
print(f"[LibreriaAnalyzer] Librería no encontrada: {self.library_path}")
return samples
for ext in self.SUPPORTED_EXTENSIONS:
samples.extend(self.library_path.rglob(f"*{ext}"))
return samples
def analyze_sample(self, file_path: str) -> Optional[Dict[str, Any]]:
"""
Analiza un sample individual y extrae sus features.
Args:
file_path: Ruta al archivo de audio
Returns:
Diccionario con todas las features del sample
"""
path = Path(file_path)
if not path.exists():
if self.verbose:
print(f"[LibreriaAnalyzer] Archivo no encontrado: {file_path}")
return None
if path.suffix.lower() not in self.SUPPORTED_EXTENSIONS:
if self.verbose:
print(f"[LibreriaAnalyzer] Formato no soportado: {path.suffix}")
return None
# Extraer features de audio
audio_features = self._extract_features(path)
if audio_features is None:
return None
# Construir el objeto completo de features
abs_path = str(path.resolve())
role = self._detect_role(path)
pack = self._get_pack_name(path)
features = {
"name": path.name,
"pack": pack,
"role": role,
**audio_features
}
# Guardar en caché interno
self.features[abs_path] = features
return features
def analyze_all(self, force_reanalyze: bool = False) -> Dict[str, Dict[str, Any]]:
"""
Analiza todos los samples de la librería.
Args:
force_reanalyze: Si True, re-analiza incluso si hay caché
Returns:
Diccionario con todas las features indexadas por path
"""
# Verificar si ya tenemos caché válido
if not force_reanalyze and self.features:
if self.verbose:
print(f"[LibreriaAnalyzer] Usando caché existente con {len(self.features)} samples")
return self.features
# Escanear samples
samples = self._scan_samples()
if not samples:
if self.verbose:
print(f"[LibreriaAnalyzer] No se encontraron samples en {self.library_path}")
return {}
if self.verbose:
print(f"[LibreriaAnalyzer] Encontrados {len(samples)} samples para analizar")
# Analizar cada sample
total = len(samples)
analyzed = 0
failed = 0
for i, sample_path in enumerate(samples, 1):
abs_path = str(sample_path.resolve())
# Verificar si ya está en caché y no es force_reanalyze
if not force_reanalyze and abs_path in self.features:
continue
# Analizar sample
features = self.analyze_sample(abs_path)
if features:
analyzed += 1
else:
failed += 1
# Mostrar progreso
if self.verbose and i % 10 == 0:
pct = (i / total) * 100
print(f"[LibreriaAnalyzer] Progreso: {i}/{total} ({pct:.1f}%) - OK: {analyzed}, Fallos: {failed}")
if self.verbose:
print(f"[LibreriaAnalyzer] Análisis completo: {analyzed} analizados, {failed} fallidos")
# Guardar caché
self._save_cache()
return self.features
def get_features(self, sample_path: str) -> Optional[Dict[str, Any]]:
"""
Obtiene las features de un sample específico.
Si el sample no está en caché, lo analiza.
Args:
sample_path: Ruta al archivo de audio
Returns:
Diccionario con features o None si no se puede analizar
"""
abs_path = str(Path(sample_path).resolve())
# Verificar si está en caché
if abs_path in self.features:
return self.features[abs_path]
# Analizar si no está en caché
return self.analyze_sample(sample_path)
def get_all_features(self) -> Dict[str, Dict[str, Any]]:
"""
Obtiene todas las features cargadas/analizadas.
Returns:
Diccionario con todas las features
"""
return self.features
def clear_cache(self) -> None:
"""Elimina el archivo de caché y limpia las features en memoria."""
self.features = {}
if self.cache_path.exists():
try:
self.cache_path.unlink()
if self.verbose:
print(f"[LibreriaAnalyzer] Caché eliminado: {self.cache_path}")
except IOError as e:
if self.verbose:
print(f"[LibreriaAnalyzer] Error eliminando caché: {e}")
def get_stats(self) -> Dict[str, Any]:
"""
Obtiene estadísticas de la librería analizada.
Returns:
Diccionario con estadísticas
"""
if not self.features:
return {
"total_samples": 0,
"by_role": {},
"avg_duration": 0,
"avg_rms": 0
}
# Contar por rol
by_role = {}
total_duration = 0
total_rms = 0
for path, features in self.features.items():
role = features.get("role", "unknown")
by_role[role] = by_role.get(role, 0) + 1
total_duration += features.get("duration", 0)
total_rms += features.get("rms", 0)
total = len(self.features)
return {
"total_samples": total,
"by_role": by_role,
"avg_duration": round(total_duration / total, 3) if total > 0 else 0,
"avg_rms": round(total_rms / total, 2) if total > 0 else 0
}
# Función de conveniencia para uso directo
def analyze_library(library_path: str = None, verbose: bool = True) -> LibreriaAnalyzer:
"""
Analiza toda la librería y retorna el analizador configurado.
Args:
library_path: Ruta a la librería (default: libreria/reggaeton/)
verbose: Mostrar progreso
Returns:
Instancia de LibreriaAnalyzer con todas las features cargadas
"""
analyzer = LibreriaAnalyzer(library_path=library_path, verbose=verbose)
analyzer.analyze_all()
return analyzer
if __name__ == "__main__":
# Test básico
print("[LibreriaAnalyzer] Test de inicialización...")
try:
analyzer = LibreriaAnalyzer(verbose=True)
print(f"Librería: {analyzer.library_path}")
print(f"Caché: {analyzer.cache_path}")
print(f"Librosa disponible: {LIBROSA_AVAILABLE}")
print(f"Scipy disponible: {SCIPY_AVAILABLE}")
# Intentar cargar/analizar
features = analyzer.analyze_all()
print(f"\nTotal samples en caché: {len(features)}")
# Mostrar estadísticas
stats = analyzer.get_stats()
print(f"\nEstadísticas: {json.dumps(stats, indent=2)}")
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,619 @@
"""
SampleMetadataStore - SQLite database for audio sample metadata.
Stores analyzed audio features for the sample library to enable
fast similarity search and intelligent sample selection.
"""
import sqlite3
import logging
import json
from dataclasses import dataclass, asdict
from datetime import datetime
from pathlib import Path
from typing import Optional, List, Dict, Any, Tuple
# Configure logging
logger = logging.getLogger(__name__)
@dataclass
class SampleFeatures:
"""Dataclass containing all audio features for a sample."""
path: str
bpm: Optional[float] = None
key: Optional[str] = None
duration: Optional[float] = None
rms: Optional[float] = None
spectral_centroid: Optional[float] = None
spectral_rolloff: Optional[float] = None
zero_crossing_rate: Optional[float] = None
# MFCC coefficients 1-13
mfcc_1: Optional[float] = None
mfcc_2: Optional[float] = None
mfcc_3: Optional[float] = None
mfcc_4: Optional[float] = None
mfcc_5: Optional[float] = None
mfcc_6: Optional[float] = None
mfcc_7: Optional[float] = None
mfcc_8: Optional[float] = None
mfcc_9: Optional[float] = None
mfcc_10: Optional[float] = None
mfcc_11: Optional[float] = None
mfcc_12: Optional[float] = None
mfcc_13: Optional[float] = None
analyzed_at: Optional[str] = None
categories: Optional[List[str]] = None
def to_db_dict(self) -> Dict[str, Any]:
"""Convert to dictionary suitable for database insertion."""
data = asdict(self)
# Remove categories from samples table data (stored separately)
data.pop('categories', None)
# Handle None values for database
for key, value in data.items():
if value is None and key != 'path':
data[key] = None
return data
@classmethod
def from_db_row(cls, row: sqlite3.Row, categories: Optional[List[str]] = None) -> 'SampleFeatures':
"""Create SampleFeatures from a database row."""
features = cls(
path=row['path'],
bpm=row['bpm'],
key=row['key'],
duration=row['duration'],
rms=row['rms'],
spectral_centroid=row['spectral_centroid'],
spectral_rolloff=row['spectral_rolloff'],
zero_crossing_rate=row['zero_crossing_rate'],
mfcc_1=row['mfcc_1'],
mfcc_2=row['mfcc_2'],
mfcc_3=row['mfcc_3'],
mfcc_4=row['mfcc_4'],
mfcc_5=row['mfcc_5'],
mfcc_6=row['mfcc_6'],
mfcc_7=row['mfcc_7'],
mfcc_8=row['mfcc_8'],
mfcc_9=row['mfcc_9'],
mfcc_10=row['mfcc_10'],
mfcc_11=row['mfcc_11'],
mfcc_12=row['mfcc_12'],
mfcc_13=row['mfcc_13'],
analyzed_at=row['analyzed_at'],
categories=categories or []
)
return features
class SampleMetadataStore:
"""
SQLite-based store for sample metadata and audio features.
Manages three tables:
- samples: Core audio features for each sample
- sample_categories: Many-to-many relationship for categories
- analysis_metadata: Store-wide statistics and versioning
"""
def __init__(self, db_path: str = "sample_metadata.db"):
"""
Initialize the metadata store.
Args:
db_path: Path to SQLite database file
"""
self.db_path = Path(db_path)
self._connection: Optional[sqlite3.Connection] = None
def _get_connection(self) -> sqlite3.Connection:
"""Get or create database connection."""
if self._connection is None:
self._connection = sqlite3.connect(str(self.db_path))
self._connection.row_factory = sqlite3.Row
self._connection.execute("PRAGMA foreign_keys = ON")
return self._connection
def close(self):
"""Close database connection."""
if self._connection:
self._connection.close()
self._connection = None
def init_database(self) -> bool:
"""
Initialize database schema. Creates tables if they don't exist.
Returns:
True if successful, False otherwise
"""
try:
conn = self._get_connection()
cursor = conn.cursor()
# Main samples table
cursor.execute("""
CREATE TABLE IF NOT EXISTS samples (
path TEXT PRIMARY KEY,
bpm REAL,
key TEXT,
duration REAL,
rms REAL,
spectral_centroid REAL,
spectral_rolloff REAL,
zero_crossing_rate REAL,
mfcc_1 REAL,
mfcc_2 REAL,
mfcc_3 REAL,
mfcc_4 REAL,
mfcc_5 REAL,
mfcc_6 REAL,
mfcc_7 REAL,
mfcc_8 REAL,
mfcc_9 REAL,
mfcc_10 REAL,
mfcc_11 REAL,
mfcc_12 REAL,
mfcc_13 REAL,
analyzed_at TEXT
)
""")
# Index on key for fast key-based queries
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_samples_key ON samples(key)
""")
# Index on bpm for fast BPM-based queries
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_samples_bpm ON samples(bpm)
""")
# Sample categories table (many-to-many)
cursor.execute("""
CREATE TABLE IF NOT EXISTS sample_categories (
path TEXT NOT NULL,
category TEXT NOT NULL,
PRIMARY KEY (path, category),
FOREIGN KEY (path) REFERENCES samples(path) ON DELETE CASCADE
)
""")
# Index on category for fast category-based queries
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_categories_category ON sample_categories(category)
""")
# Analysis metadata table
cursor.execute("""
CREATE TABLE IF NOT EXISTS analysis_metadata (
id INTEGER PRIMARY KEY CHECK (id = 1),
version INTEGER DEFAULT 1,
total_samples INTEGER DEFAULT 0,
last_updated TEXT
)
""")
# Initialize metadata row if not exists
cursor.execute("""
INSERT OR IGNORE INTO analysis_metadata (id, version, total_samples, last_updated)
VALUES (1, 1, 0, ?)
""", (datetime.now().isoformat(),))
conn.commit()
logger.info(f"Database initialized at {self.db_path}")
return True
except sqlite3.Error as e:
logger.error(f"Failed to initialize database: {e}")
return False
def get_sample_features(self, sample_path: str) -> Optional[SampleFeatures]:
"""
Get features for a specific sample.
Args:
sample_path: Path to the sample file
Returns:
SampleFeatures object or None if not found
"""
try:
conn = self._get_connection()
cursor = conn.cursor()
# Get sample features
cursor.execute(
"SELECT * FROM samples WHERE path = ?",
(sample_path,)
)
row = cursor.fetchone()
if row is None:
return None
# Get categories
cursor.execute(
"SELECT category FROM sample_categories WHERE path = ?",
(sample_path,)
)
categories = [r['category'] for r in cursor.fetchall()]
return SampleFeatures.from_db_row(row, categories)
except sqlite3.Error as e:
logger.error(f"Error retrieving features for {sample_path}: {e}")
return None
def save_sample_features(self, sample_path: str, features: SampleFeatures) -> bool:
"""
Save or update features for a sample.
Args:
sample_path: Path to the sample file
features: SampleFeatures object with all audio features
Returns:
True if successful, False otherwise
"""
try:
conn = self._get_connection()
cursor = conn.cursor()
# Prepare data for samples table
data = features.to_db_dict()
data['path'] = sample_path
data['analyzed_at'] = datetime.now().isoformat()
# Insert or update sample
cursor.execute("""
INSERT INTO samples VALUES (
:path, :bpm, :key, :duration, :rms, :spectral_centroid,
:spectral_rolloff, :zero_crossing_rate,
:mfcc_1, :mfcc_2, :mfcc_3, :mfcc_4, :mfcc_5, :mfcc_6,
:mfcc_7, :mfcc_8, :mfcc_9, :mfcc_10, :mfcc_11, :mfcc_12, :mfcc_13,
:analyzed_at
)
ON CONFLICT(path) DO UPDATE SET
bpm = excluded.bpm,
key = excluded.key,
duration = excluded.duration,
rms = excluded.rms,
spectral_centroid = excluded.spectral_centroid,
spectral_rolloff = excluded.spectral_rolloff,
zero_crossing_rate = excluded.zero_crossing_rate,
mfcc_1 = excluded.mfcc_1,
mfcc_2 = excluded.mfcc_2,
mfcc_3 = excluded.mfcc_3,
mfcc_4 = excluded.mfcc_4,
mfcc_5 = excluded.mfcc_5,
mfcc_6 = excluded.mfcc_6,
mfcc_7 = excluded.mfcc_7,
mfcc_8 = excluded.mfcc_8,
mfcc_9 = excluded.mfcc_9,
mfcc_10 = excluded.mfcc_10,
mfcc_11 = excluded.mfcc_11,
mfcc_12 = excluded.mfcc_12,
mfcc_13 = excluded.mfcc_13,
analyzed_at = excluded.analyzed_at
""", data)
# Handle categories if present
if features.categories:
# Remove existing categories
cursor.execute(
"DELETE FROM sample_categories WHERE path = ?",
(sample_path,)
)
# Insert new categories
for category in features.categories:
cursor.execute(
"INSERT OR IGNORE INTO sample_categories (path, category) VALUES (?, ?)",
(sample_path, category)
)
# Update metadata stats
cursor.execute(
"UPDATE analysis_metadata SET total_samples = (SELECT COUNT(*) FROM samples), last_updated = ? WHERE id = 1",
(datetime.now().isoformat(),)
)
conn.commit()
logger.debug(f"Saved features for {sample_path}")
return True
except sqlite3.Error as e:
logger.error(f"Error saving features for {sample_path}: {e}")
return False
def get_samples_by_category(self, category: str) -> List[str]:
"""
Get all sample paths for a specific category.
Args:
category: Category name (e.g., 'kick', 'snare', 'bass')
Returns:
List of sample paths
"""
try:
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute(
"SELECT path FROM sample_categories WHERE category = ?",
(category,)
)
return [row['path'] for row in cursor.fetchall()]
except sqlite3.Error as e:
logger.error(f"Error retrieving samples for category {category}: {e}")
return []
def get_all_samples(self, limit: Optional[int] = None) -> List[SampleFeatures]:
"""
Get all samples with their features.
Args:
limit: Optional limit on number of results
Returns:
List of SampleFeatures objects
"""
try:
conn = self._get_connection()
cursor = conn.cursor()
query = "SELECT * FROM samples"
if limit:
query += f" LIMIT {limit}"
cursor.execute(query)
rows = cursor.fetchall()
# Get categories for all samples
result = []
for row in rows:
path = row['path']
cursor.execute(
"SELECT category FROM sample_categories WHERE path = ?",
(path,)
)
categories = [r['category'] for r in cursor.fetchall()]
result.append(SampleFeatures.from_db_row(row, categories))
return result
except sqlite3.Error as e:
logger.error(f"Error retrieving all samples: {e}")
return []
def sample_exists(self, sample_path: str) -> bool:
"""
Check if a sample has been analyzed and exists in database.
Args:
sample_path: Path to the sample file
Returns:
True if sample exists in database
"""
try:
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute(
"SELECT 1 FROM samples WHERE path = ?",
(sample_path,)
)
return cursor.fetchone() is not None
except sqlite3.Error as e:
logger.error(f"Error checking existence of {sample_path}: {e}")
return False
def get_stats(self) -> Dict[str, Any]:
"""
Get database statistics including count by category.
Returns:
Dictionary with stats: total_samples, version, last_updated, categories
"""
try:
conn = self._get_connection()
cursor = conn.cursor()
# Get metadata
cursor.execute("SELECT * FROM analysis_metadata WHERE id = 1")
metadata_row = cursor.fetchone()
# Get count by category
cursor.execute("""
SELECT category, COUNT(*) as count
FROM sample_categories
GROUP BY category
""")
categories = {row['category']: row['count'] for row in cursor.fetchall()}
# Get total (more accurate than metadata)
cursor.execute("SELECT COUNT(*) as total FROM samples")
total = cursor.fetchone()['total']
if metadata_row:
return {
'total_samples': total,
'version': metadata_row['version'],
'last_updated': metadata_row['last_updated'],
'categories': categories
}
else:
return {
'total_samples': total,
'version': 1,
'last_updated': None,
'categories': categories
}
except sqlite3.Error as e:
logger.error(f"Error retrieving stats: {e}")
return {
'total_samples': 0,
'version': 1,
'last_updated': None,
'categories': {}
}
def delete_sample(self, sample_path: str) -> bool:
"""
Delete a sample and its categories from the database.
Args:
sample_path: Path to the sample file
Returns:
True if successful, False otherwise
"""
try:
conn = self._get_connection()
cursor = conn.cursor()
cursor.execute("DELETE FROM samples WHERE path = ?", (sample_path,))
# Update metadata stats
cursor.execute(
"UPDATE analysis_metadata SET total_samples = (SELECT COUNT(*) FROM samples), last_updated = ? WHERE id = 1",
(datetime.now().isoformat(),)
)
conn.commit()
logger.debug(f"Deleted sample {sample_path}")
return True
except sqlite3.Error as e:
logger.error(f"Error deleting sample {sample_path}: {e}")
return False
def search_samples(
self,
category: Optional[str] = None,
key: Optional[str] = None,
bpm_min: Optional[float] = None,
bpm_max: Optional[float] = None,
limit: int = 50
) -> List[SampleFeatures]:
"""
Search samples with optional filters.
Args:
category: Filter by category
key: Filter by musical key
bpm_min: Minimum BPM
bpm_max: Maximum BPM
limit: Maximum results to return
Returns:
List of matching SampleFeatures
"""
try:
conn = self._get_connection()
cursor = conn.cursor()
conditions = []
params = []
if category:
# Join with categories table
base_query = """
SELECT s.* FROM samples s
INNER JOIN sample_categories sc ON s.path = sc.path
WHERE sc.category = ?
"""
params.append(category)
else:
base_query = "SELECT * FROM samples WHERE 1=1"
if key:
conditions.append("key = ?")
params.append(key)
if bpm_min is not None:
conditions.append("bpm >= ?")
params.append(bpm_min)
if bpm_max is not None:
conditions.append("bpm <= ?")
params.append(bpm_max)
if conditions:
base_query += " AND " + " AND ".join(conditions)
base_query += f" LIMIT {limit}"
cursor.execute(base_query, params)
rows = cursor.fetchall()
result = []
for row in rows:
path = row['path']
cursor.execute(
"SELECT category FROM sample_categories WHERE path = ?",
(path,)
)
categories = [r['category'] for r in cursor.fetchall()]
result.append(SampleFeatures.from_db_row(row, categories))
return result
except sqlite3.Error as e:
logger.error(f"Error searching samples: {e}")
return []
# Convenience function for quick initialization
def create_metadata_store(db_path: str = "sample_metadata.db") -> SampleMetadataStore:
"""
Create and initialize a metadata store.
Args:
db_path: Path to the database file
Returns:
Initialized SampleMetadataStore instance
"""
store = SampleMetadataStore(db_path)
store.init_database()
return store
if __name__ == "__main__":
# Simple test
logging.basicConfig(level=logging.INFO)
# Create test store
store = create_metadata_store("test_metadata.db")
# Test saving
features = SampleFeatures(
path="/test/kick.wav",
bpm=95.0,
key="Am",
duration=2.5,
rms=-12.0,
spectral_centroid=2500.0,
categories=["kick", "drums"]
)
store.save_sample_features("/test/kick.wav", features)
# Test retrieving
retrieved = store.get_sample_features("/test/kick.wav")
print(f"Retrieved: {retrieved}")
# Test stats
stats = store.get_stats()
print(f"Stats: {stats}")
store.close()
print("Tests completed successfully")

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
"""Small compatibility layer for legacy musical_intelligence imports."""
from typing import Any, Dict, List
class MusicalIntelligenceEngine:
"""Expose only the legacy methods still imported by server.py."""
def __init__(self):
self._progressions: List[Dict[str, Any]] = []
self._current_key = "Am"
def set_multiple_progressions(self, progressions_config: List[Dict[str, Any]]) -> Dict[str, Any]:
self._progressions = list(progressions_config or [])
return {
"sections": [item.get("section", "") for item in self._progressions],
"progressions": [item.get("progression", "") for item in self._progressions],
"total_chords": sum(len(str(item.get("progression", "")).split("-")) for item in self._progressions),
}
def modulate_key(self, section_index: int, new_key: str) -> Dict[str, Any]:
original_key = self._current_key
self._current_key = new_key
return {
"original_key": original_key,
"new_key": new_key,
"modulation_type": "direct",
"tracks_affected": [section_index],
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,832 @@
"""
PresetManager - Save/Load Coherent Sample Kits
Manages coherent sample kit presets with CRUD operations,
similarity matching, and usage tracking.
"""
import os
import json
import time
import hashlib
import shutil
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict
@dataclass
class SampleEntry:
"""Represents a sample in a kit with variations."""
base: str
variations: Dict[str, str] = None
def __post_init__(self):
if self.variations is None:
self.variations = {}
def to_dict(self) -> Dict:
return {
"base": self.base,
"variations": self.variations
}
@classmethod
def from_dict(cls, data: Dict) -> 'SampleEntry':
return cls(
base=data.get("base", ""),
variations=data.get("variations", {})
)
@dataclass
class CoherenceProof:
"""Coherence verification data for a kit."""
overall_score: float
pair_scores: List[Dict[str, Any]]
def to_dict(self) -> Dict:
return {
"overall_score": self.overall_score,
"pair_scores": self.pair_scores
}
@classmethod
def from_dict(cls, data: Dict) -> 'CoherenceProof':
return cls(
overall_score=data.get("overall_score", 0.0),
pair_scores=data.get("pair_scores", [])
)
@dataclass
class KitMetadata:
"""Metadata for a sample kit preset."""
genre: str
style: str
tempo: int
key: str
coherence_score: float
variation_level: str = "medium"
tags: List[str] = None
def __post_init__(self):
if self.tags is None:
self.tags = []
def to_dict(self) -> Dict:
return {
"genre": self.genre,
"style": self.style,
"tempo": self.tempo,
"key": self.key,
"coherence_score": self.coherence_score,
"variation_level": self.variation_level,
"tags": self.tags
}
@classmethod
def from_dict(cls, data: Dict) -> 'KitMetadata':
return cls(
genre=data.get("genre", "unknown"),
style=data.get("style", "standard"),
tempo=data.get("tempo", 95),
key=data.get("key", "Am"),
coherence_score=data.get("coherence_score", 0.0),
variation_level=data.get("variation_level", "medium"),
tags=data.get("tags", [])
)
@dataclass
class Preset:
"""Complete preset structure for a coherent sample kit."""
name: str
description: str
created_at: str
metadata: KitMetadata
kit: Dict[str, SampleEntry]
coherence_proof: CoherenceProof
usage_count: int = 0
last_used: str = ""
def to_dict(self) -> Dict:
return {
"name": self.name,
"description": self.description,
"created_at": self.created_at,
"metadata": self.metadata.to_dict(),
"kit": {k: v.to_dict() for k, v in self.kit.items()},
"coherence_proof": self.coherence_proof.to_dict(),
"usage_count": self.usage_count,
"last_used": self.last_used
}
@classmethod
def from_dict(cls, data: Dict) -> 'Preset':
return cls(
name=data.get("name", "Unnamed"),
description=data.get("description", ""),
created_at=data.get("created_at", ""),
metadata=KitMetadata.from_dict(data.get("metadata", {})),
kit={k: SampleEntry.from_dict(v) for k, v in data.get("kit", {}).items()},
coherence_proof=CoherenceProof.from_dict(data.get("coherence_proof", {})),
usage_count=data.get("usage_count", 0),
last_used=data.get("last_used", "")
)
class PresetManager:
"""
Manages coherent sample kit presets with save/load/search capabilities.
Features:
- CRUD operations for presets
- Search and filter by genre, style, coherence
- Similarity matching between kits
- Usage tracking
- Duplicate detection
- Import/export for sharing
"""
def __init__(self, presets_dir: Optional[str] = None):
"""
Initialize PresetManager.
Args:
presets_dir: Directory for preset storage. If None, uses default.
"""
if presets_dir is None:
# Default to AbletonMCP_AI/presets/
base_dir = Path(__file__).parent.parent.parent
self.presets_dir = base_dir / "presets"
else:
self.presets_dir = Path(presets_dir)
# Ensure directory exists
self.presets_dir.mkdir(parents=True, exist_ok=True)
# Cache for loaded presets
self._cache: Dict[str, Preset] = {}
self._cache_timestamp: Optional[datetime] = None
def _generate_filename(self, metadata: KitMetadata) -> str:
"""
Generate filename from metadata.
Format: {genre}_{style}_{coherence}_{timestamp}.json
"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
coherence_str = f"{metadata.coherence_score:.2f}"
safe_genre = metadata.genre.replace(" ", "_").lower()
safe_style = metadata.style.replace(" ", "_").lower()
return f"{safe_genre}_{safe_style}_{coherence_str}_{timestamp}.json"
def _generate_name(self, metadata: KitMetadata, kit: Dict[str, SampleEntry]) -> str:
"""
Auto-generate meaningful preset name.
Based on genre, style, key elements in kit.
"""
# Base name from style
base_name = metadata.style.replace("_", " ").title()
# Add descriptors based on kit contents
descriptors = []
if "kick" in kit:
kick_path = kit["kick"].base.lower()
if "pesado" in kick_path or "heavy" in kick_path:
descriptors.append("Pesado")
elif "sutil" in kick_path or "soft" in kick_path:
descriptors.append("Suave")
elif "estampido" in kick_path:
descriptors.append("Estampido")
if "bass" in kit:
descriptors.append("Con Bajo")
# Add coherence quality
if metadata.coherence_score >= 0.95:
descriptors.append("Ultra")
elif metadata.coherence_score >= 0.90:
descriptors.append("Premium")
# Combine
if descriptors:
descriptor_str = ", ".join(descriptors[:2]) # Max 2 descriptors
name = f"{base_name} ({descriptor_str})"
else:
name = base_name
# Add uniqueness number
existing = self._get_existing_names()
count = 1
final_name = name
while final_name in existing:
count += 1
final_name = f"{name} #{count}"
return final_name
def _generate_description(self, metadata: KitMetadata, kit: Dict[str, SampleEntry]) -> str:
"""Generate human-readable description."""
parts = [
f"{metadata.tempo}bpm {metadata.key}",
]
# Describe key elements
elements = []
if "kick" in kit:
kick_file = os.path.basename(kit["kick"].base)
elements.append(f"kick: {kick_file.replace('.wav', '').replace('_', ' ')}")
if "snare" in kit:
elements.append("snare incluido")
if "bass" in kit:
elements.append("bass presente")
if elements:
parts.append(", ".join(elements))
# Add energy description
if metadata.coherence_score >= 0.95:
parts.append("coherencia excepcional")
elif metadata.coherence_score >= 0.90:
parts.append("alta coherencia")
return " | ".join(parts)
def _get_existing_names(self) -> set:
"""Get set of existing preset names."""
names = set()
for filename in self.presets_dir.glob("*.json"):
try:
with open(filename, 'r', encoding='utf-8') as f:
data = json.load(f)
names.add(data.get("name", ""))
except:
pass
return names
def _compute_kit_hash(self, kit: Dict[str, SampleEntry]) -> str:
"""
Compute hash for kit to detect duplicates.
Uses base sample paths only (not variations).
"""
# Extract base paths and sort for consistency
base_paths = []
for role in sorted(kit.keys()):
entry = kit[role]
base_paths.append(f"{role}:{entry.base}")
# Create hash
content = "|".join(base_paths)
return hashlib.md5(content.encode()).hexdigest()[:16]
def _check_duplicate(self, kit: Dict[str, SampleEntry]) -> Optional[str]:
"""
Check if kit already exists as a preset.
Returns preset name if duplicate found, None otherwise.
"""
kit_hash = self._compute_kit_hash(kit)
for filename in self.presets_dir.glob("*.json"):
try:
with open(filename, 'r', encoding='utf-8') as f:
data = json.load(f)
existing_kit = data.get("kit", {})
existing_hash = self._compute_kit_hash(
{k: SampleEntry.from_dict(v) for k, v in existing_kit.items()}
)
if existing_hash == kit_hash:
return data.get("name")
except:
pass
return None
def save_preset(
self,
name: Optional[str],
kit: Dict[str, Any],
coherence_score: float,
metadata: Dict[str, Any],
coherence_proof: Optional[Dict] = None,
allow_duplicates: bool = False
) -> Tuple[bool, str, Preset]:
"""
Save a new preset.
Args:
name: Preset name (auto-generated if None)
kit: Dictionary of role -> {base: path, variations: {context: path}}
coherence_score: Overall coherence score (0.0-1.0)
metadata: Dict with genre, style, tempo, key, etc.
coherence_proof: Optional detailed coherence data
allow_duplicates: If False, checks for existing identical kits
Returns:
Tuple of (success: bool, message: str, preset: Preset)
"""
# Convert kit to SampleEntry objects
kit_entries = {}
for role, entry_data in kit.items():
if isinstance(entry_data, dict):
kit_entries[role] = SampleEntry.from_dict(entry_data)
else:
# Assume it's just a path string
kit_entries[role] = SampleEntry(base=str(entry_data), variations={})
# Create metadata object
kit_metadata = KitMetadata.from_dict(metadata)
kit_metadata.coherence_score = coherence_score
# Check for duplicates
if not allow_duplicates:
duplicate_name = self._check_duplicate(kit_entries)
if duplicate_name:
return (False, f"Duplicate of existing preset: '{duplicate_name}'", None)
# Generate name if not provided
if not name:
name = self._generate_name(kit_metadata, kit_entries)
# Generate description
description = self._generate_description(kit_metadata, kit_entries)
# Create coherence proof
if coherence_proof is None:
coherence_proof = {
"overall_score": coherence_score,
"pair_scores": []
}
proof = CoherenceProof.from_dict(coherence_proof)
# Create preset
preset = Preset(
name=name,
description=description,
created_at=datetime.now().isoformat(),
metadata=kit_metadata,
kit=kit_entries,
coherence_proof=proof,
usage_count=0,
last_used=""
)
# Generate filename
filename = self._generate_filename(kit_metadata)
filepath = self.presets_dir / filename
# Save to file
try:
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(preset.to_dict(), f, indent=2, ensure_ascii=False)
# Update cache
self._cache[name] = preset
return (True, f"Saved preset '{name}' to {filename}", preset)
except Exception as e:
return (False, f"Failed to save preset: {str(e)}", None)
def load_preset(self, name: str) -> Tuple[bool, str, Optional[Preset]]:
"""
Load a preset by name.
Args:
name: Preset name to load
Returns:
Tuple of (success: bool, message: str, preset: Optional[Preset])
"""
# Check cache first
if name in self._cache:
return (True, "Loaded from cache", self._cache[name])
# Search files
for filename in self.presets_dir.glob("*.json"):
try:
with open(filename, 'r', encoding='utf-8') as f:
data = json.load(f)
if data.get("name") == name:
preset = Preset.from_dict(data)
self._cache[name] = preset
return (True, f"Loaded from {filename.name}", preset)
except Exception as e:
continue
return (False, f"Preset '{name}' not found", None)
def list_presets(
self,
genre: Optional[str] = None,
style: Optional[str] = None,
min_coherence: float = 0.0,
max_coherence: float = 1.0,
tags: Optional[List[str]] = None,
sort_by: str = "coherence", # "coherence", "usage", "date", "name"
limit: int = 100
) -> List[Preset]:
"""
List presets with filtering and sorting.
Args:
genre: Filter by genre
style: Filter by style
min_coherence: Minimum coherence score
max_coherence: Maximum coherence score
tags: Filter by tags (all must match)
sort_by: Sort field ("coherence", "usage", "date", "name")
limit: Maximum results to return
Returns:
List of matching Preset objects
"""
presets = []
for filename in self.presets_dir.glob("*.json"):
try:
with open(filename, 'r', encoding='utf-8') as f:
data = json.load(f)
preset = Preset.from_dict(data)
# Apply filters
if genre and preset.metadata.genre.lower() != genre.lower():
continue
if style and preset.metadata.style.lower() != style.lower():
continue
if preset.metadata.coherence_score < min_coherence:
continue
if preset.metadata.coherence_score > max_coherence:
continue
if tags:
preset_tags = set(t.lower() for t in preset.metadata.tags)
if not all(t.lower() in preset_tags for t in tags):
continue
presets.append(preset)
except:
pass
# Sort
if sort_by == "coherence":
presets.sort(key=lambda p: p.metadata.coherence_score, reverse=True)
elif sort_by == "usage":
presets.sort(key=lambda p: p.usage_count, reverse=True)
elif sort_by == "date":
presets.sort(key=lambda p: p.created_at, reverse=True)
elif sort_by == "name":
presets.sort(key=lambda p: p.name.lower())
return presets[:limit]
def find_similar_presets(
self,
reference_kit: Dict[str, Any],
count: int = 5,
min_coherence: float = 0.85
) -> List[Tuple[Preset, float]]:
"""
Find presets similar to a reference kit.
Args:
reference_kit: Dictionary of role -> sample paths
count: Number of results to return
min_coherence: Minimum coherence for candidates
Returns:
List of (preset, similarity_score) tuples
"""
# Get all presets above minimum coherence
candidates = self.list_presets(min_coherence=min_coherence)
if not candidates:
return []
# Calculate similarity scores
scored_presets = []
for preset in candidates:
score = self._calculate_similarity(reference_kit, preset)
scored_presets.append((preset, score))
# Sort by score
scored_presets.sort(key=lambda x: x[1], reverse=True)
return scored_presets[:count]
def _calculate_similarity(
self,
reference_kit: Dict[str, Any],
preset: Preset
) -> float:
"""
Calculate similarity between reference kit and preset.
Based on:
- Role overlap (same roles present)
- Sample path similarity (same pack, similar names)
- Metadata match (tempo, key)
"""
scores = []
# Role overlap
ref_roles = set(reference_kit.keys())
preset_roles = set(preset.kit.keys())
if ref_roles and preset_roles:
intersection = len(ref_roles & preset_roles)
union = len(ref_roles | preset_roles)
role_score = intersection / union if union > 0 else 0
scores.append(role_score)
# Sample name similarity for matching roles
name_scores = []
for role in ref_roles & preset_roles:
ref_entry = reference_kit[role]
if isinstance(ref_entry, dict):
ref_path = ref_entry.get("base", "")
else:
ref_path = str(ref_entry)
preset_path = preset.kit[role].base
# Extract filenames
ref_name = os.path.basename(ref_path).lower().replace(".wav", "")
preset_name = os.path.basename(preset_path).lower().replace(".wav", "")
# Check for common words
ref_words = set(ref_name.split("_"))
preset_words = set(preset_name.split("_"))
if ref_words and preset_words:
common = len(ref_words & preset_words)
total = len(ref_words | preset_words)
name_scores.append(common / total if total > 0 else 0)
if name_scores:
scores.append(sum(name_scores) / len(name_scores))
# Combine scores
return sum(scores) / len(scores) if scores else 0.0
def delete_preset(self, name: str) -> Tuple[bool, str]:
"""
Delete a preset by name.
Args:
name: Preset name to delete
Returns:
Tuple of (success: bool, message: str)
"""
# Find file
for filename in self.presets_dir.glob("*.json"):
try:
with open(filename, 'r', encoding='utf-8') as f:
data = json.load(f)
if data.get("name") == name:
# Delete file
filename.unlink()
# Remove from cache
if name in self._cache:
del self._cache[name]
return (True, f"Deleted preset '{name}'")
except:
pass
return (False, f"Preset '{name}' not found")
def increment_usage(self, name: str) -> Tuple[bool, str]:
"""
Increment usage counter for a preset.
Args:
name: Preset name
Returns:
Tuple of (success: bool, message: str)
"""
success, msg, preset = self.load_preset(name)
if not success or preset is None:
return (False, msg)
# Update usage
preset.usage_count += 1
preset.last_used = datetime.now().isoformat()
# Find and update file
for filename in self.presets_dir.glob("*.json"):
try:
with open(filename, 'r', encoding='utf-8') as f:
data = json.load(f)
if data.get("name") == name:
# Update and save
data["usage_count"] = preset.usage_count
data["last_used"] = preset.last_used
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Update cache
self._cache[name] = preset
return (True, f"Usage count: {preset.usage_count}")
except:
pass
return (False, "Failed to update usage count")
def export_preset(self, name: str, path: str) -> Tuple[bool, str]:
"""
Export a preset to an external location for sharing.
Args:
name: Preset name to export
path: Destination path
Returns:
Tuple of (success: bool, message: str)
"""
success, msg, preset = self.load_preset(name)
if not success or preset is None:
return (False, msg)
try:
dest_path = Path(path)
# Create directory if needed
dest_path.parent.mkdir(parents=True, exist_ok=True)
# Export as JSON
with open(dest_path, 'w', encoding='utf-8') as f:
json.dump(preset.to_dict(), f, indent=2, ensure_ascii=False)
return (True, f"Exported to {dest_path}")
except Exception as e:
return (False, f"Export failed: {str(e)}")
def import_preset(self, path: str, allow_overwrite: bool = False) -> Tuple[bool, str, Optional[Preset]]:
"""
Import a preset from an external file.
Args:
path: Path to external preset JSON
allow_overwrite: If True, overwrites existing preset with same name
Returns:
Tuple of (success: bool, message: str, preset: Optional[Preset])
"""
try:
source_path = Path(path)
if not source_path.exists():
return (False, f"File not found: {path}", None)
# Load preset data
with open(source_path, 'r', encoding='utf-8') as f:
data = json.load(f)
preset = Preset.from_dict(data)
# Check for existing
existing = self.load_preset(preset.name)
if existing[0] and not allow_overwrite:
return (False, f"Preset '{preset.name}' already exists (use allow_overwrite=True)", None)
# Generate new filename
filename = self._generate_filename(preset.metadata)
dest_path = self.presets_dir / filename
# Copy file
shutil.copy2(source_path, dest_path)
# Update cache
self._cache[preset.name] = preset
return (True, f"Imported preset '{preset.name}'", preset)
except Exception as e:
return (False, f"Import failed: {str(e)}", None)
def get_preset_stats(self) -> Dict[str, Any]:
"""
Get statistics about stored presets.
Returns:
Dictionary with statistics
"""
presets = self.list_presets(limit=10000)
if not presets:
return {
"total_presets": 0,
"avg_coherence": 0.0,
"genres": {},
"styles": {},
"most_used": None
}
# Calculate stats
coherence_scores = [p.metadata.coherence_score for p in presets]
genres = {}
styles = {}
for p in presets:
genres[p.metadata.genre] = genres.get(p.metadata.genre, 0) + 1
styles[p.metadata.style] = styles.get(p.metadata.style, 0) + 1
most_used = max(presets, key=lambda p: p.usage_count)
return {
"total_presets": len(presets),
"avg_coherence": sum(coherence_scores) / len(coherence_scores),
"min_coherence": min(coherence_scores),
"max_coherence": max(coherence_scores),
"genres": genres,
"styles": styles,
"most_used": {
"name": most_used.name,
"usage_count": most_used.usage_count
} if most_used.usage_count > 0 else None
}
def clear_cache(self):
"""Clear the preset cache."""
self._cache.clear()
self._cache_timestamp = None
# Convenience functions for direct usage
def get_preset_manager() -> PresetManager:
"""Get default PresetManager instance."""
return PresetManager()
# Example usage
if __name__ == "__main__":
# Create manager
manager = PresetManager()
# Example kit
example_kit = {
"kick": {
"base": "/path/to/Kick_Pesado_01.wav",
"variations": {
"intro": "/path/to/Kick_Sutil_12.wav",
"verse": "/path/to/Kick_Estampido_07.wav",
"chorus": "/path/to/Kick_Agresivo_03.wav"
}
},
"snare": {
"base": "/path/to/Snare_Corte_01.wav",
"variations": {}
},
"bass": {
"base": "/path/to/Bass_Profundo_02.wav",
"variations": {}
}
}
# Example metadata
metadata = {
"genre": "reggaeton",
"style": "perreo_intenso",
"tempo": 95,
"key": "Am",
"variation_level": "high",
"tags": ["heavy", "energetic"]
}
# Save preset
success, msg, preset = manager.save_preset(
name=None, # Auto-generate
kit=example_kit,
coherence_score=0.91,
metadata=metadata
)
print(f"Save: {success} - {msg}")
# List presets
presets = manager.list_presets(sort_by="coherence")
print(f"\nFound {len(presets)} presets:")
for p in presets:
print(f" - {p.name} ({p.metadata.coherence_score:.2f})")
# Stats
stats = manager.get_preset_stats()
print(f"\nStats: {stats}")

View File

@@ -0,0 +1,636 @@
"""
Preset System - Sistema de Presets y Templates para AbletonMCP_AI (T061-T065)
Gestión completa de presets para reggaeton: predefinidos, personalizados,
importación/exportación, y aplicación a proyectos.
"""
import json
import logging
import os
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
logger = logging.getLogger("PresetSystem")
PRESETS_DIR = Path(r"C:\ProgramData\Ableton\Live 12 Suite\Resources\MIDI Remote Scripts\AbletonMCP_AI\presets")
# =============================================================================
# DATACLASSES
# =============================================================================
@dataclass
class TrackPreset:
"""Configuración de preset para una pista individual."""
name: str
track_type: str # "midi" o "audio"
role: str
sample_criteria: Dict[str, Any] = field(default_factory=dict)
device_chain: List[Dict[str, Any]] = field(default_factory=list)
volume: float = 0.8
pan: float = 0.0
mute: bool = False
solo: bool = False
color: int = 0
def to_dict(self) -> Dict[str, Any]: return asdict(self)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "TrackPreset": return cls(**data)
@dataclass
class MixingConfig:
"""Configuración de mezcla para un preset."""
eq_low_gain: float = 0.0
eq_mid_gain: float = 0.0
eq_high_gain: float = 0.0
compressor_threshold: float = -6.0
compressor_ratio: float = 3.0
compressor_makeup: float = 3.0
send_reverb: float = 0.3
send_delay: float = 0.2
master_volume: float = 0.85
def to_dict(self) -> Dict[str, Any]: return asdict(self)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "MixingConfig": return cls(**data)
@dataclass
class SampleSelectionCriteria:
"""Criterios de selección de samples para un preset."""
preferred_packs: List[str] = field(default_factory=list)
excluded_packs: List[str] = field(default_factory=list)
min_bpm: float = 0.0
max_bpm: float = 0.0
preferred_key: str = ""
use_similarity_selection: bool = False
similarity_reference: str = ""
priority_roles: List[str] = field(default_factory=lambda: ["kick", "snare", "bass", "hat_closed"])
def to_dict(self) -> Dict[str, Any]: return asdict(self)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SampleSelectionCriteria": return cls(**data)
@dataclass
class Preset:
"""Preset completo de configuración de canción."""
name: str
description: str
version: str = "1.0"
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
updated_at: str = field(default_factory=lambda: datetime.now().isoformat())
bpm: float = 95.0
key: str = "Am"
style: str = "dembow"
structure: str = "standard"
tracks_config: List[TrackPreset] = field(default_factory=list)
mixing_config: MixingConfig = field(default_factory=MixingConfig)
sample_selection: SampleSelectionCriteria = field(default_factory=SampleSelectionCriteria)
tags: List[str] = field(default_factory=list)
author: str = ""
is_builtin: bool = False
def to_dict(self) -> Dict[str, Any]:
return {
"name": self.name, "description": self.description, "version": self.version,
"created_at": self.created_at, "updated_at": self.updated_at,
"bpm": self.bpm, "key": self.key, "style": self.style, "structure": self.structure,
"tracks_config": [t.to_dict() for t in self.tracks_config],
"mixing_config": self.mixing_config.to_dict(),
"sample_selection": self.sample_selection.to_dict(),
"tags": self.tags, "author": self.author, "is_builtin": self.is_builtin,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Preset":
tracks = [TrackPreset.from_dict(t) for t in data.get("tracks_config", [])]
mixing = MixingConfig.from_dict(data.get("mixing_config", {}))
samples = SampleSelectionCriteria.from_dict(data.get("sample_selection", {}))
return cls(
name=data["name"], description=data.get("description", ""), version=data.get("version", "1.0"),
created_at=data.get("created_at", datetime.now().isoformat()),
updated_at=data.get("updated_at", datetime.now().isoformat()),
bpm=data.get("bpm", 95.0), key=data.get("key", "Am"), style=data.get("style", "dembow"),
structure=data.get("structure", "standard"), tracks_config=tracks, mixing_config=mixing,
sample_selection=samples, tags=data.get("tags", []), author=data.get("author", ""),
is_builtin=data.get("is_builtin", False),
)
# =============================================================================
# PRESETS PREDEFINIDOS
# =============================================================================
def create_builtin_presets() -> Dict[str, Preset]:
"""Crea el diccionario de presets predefinidos del sistema."""
# 1. Reggaeton Clásico 95 BPM
reggaeton_classic = Preset(
name="reggaeton_classic_95bpm",
description="Reggaeton clásico con dembow puro. Ideal para pistas de club.",
bpm=95.0, key="Am", style="dembow", structure="standard",
tags=["classic", "club", "dembow", "standard"], is_builtin=True,
tracks_config=[
TrackPreset(name="Kick", track_type="midi", role="kick", volume=0.9, sample_criteria={"role": "kick", "pack_preference": "classic"}),
TrackPreset(name="Snare", track_type="midi", role="snare", volume=0.75, sample_criteria={"role": "snare"}),
TrackPreset(name="Hi-Hats", track_type="midi", role="hat_closed", volume=0.65, sample_criteria={"role": "hat_closed"}),
TrackPreset(name="Bass", track_type="midi", role="bass", volume=0.85, sample_criteria={"role": "bass", "pack_preference": "classic"}),
TrackPreset(name="Synth Lead", track_type="midi", role="synth_lead", volume=0.7, sample_criteria={"role": "synth"}),
],
mixing_config=MixingConfig(eq_low_gain=2.0, compressor_threshold=-4.0, compressor_ratio=2.5, send_reverb=0.25, master_volume=0.88),
)
# 2. Perreo Intenso 100 BPM
perreo_intenso = Preset(
name="perreo_intenso_100bpm",
description="Perreo intenso con kick heavy y bajo prominente. Alto impacto.",
bpm=100.0, key="Em", style="perreo", structure="standard",
tags=["perreo", "heavy", "club", "energetic"], is_builtin=True,
tracks_config=[
TrackPreset(name="Kick Heavy", track_type="midi", role="kick", volume=0.95, sample_criteria={"role": "kick", "character": "heavy"}),
TrackPreset(name="Snare", track_type="midi", role="snare", volume=0.8),
TrackPreset(name="Clap", track_type="midi", role="clap", volume=0.7),
TrackPreset(name="Hi-Hats", track_type="midi", role="hat_closed", volume=0.7),
TrackPreset(name="Bass Deep", track_type="midi", role="bass", volume=0.9, sample_criteria={"role": "bass", "character": "deep"}),
TrackPreset(name="Lead", track_type="midi", role="synth_lead", volume=0.75),
],
mixing_config=MixingConfig(eq_low_gain=4.0, compressor_threshold=-6.0, compressor_ratio=3.5, send_reverb=0.2, master_volume=0.9),
)
# 3. Reggaeton Romántico 90 BPM
reggaeton_romantico = Preset(
name="reggaeton_romantico_90bpm",
description="Reggaeton romántico con reverb abundante y mezcla balanceada.",
bpm=90.0, key="Gm", style="romantico", structure="extended",
tags=["romantico", "smooth", "reverb", "extended"], is_builtin=True,
tracks_config=[
TrackPreset(name="Kick Soft", track_type="midi", role="kick", volume=0.75, sample_criteria={"role": "kick", "character": "soft"}),
TrackPreset(name="Snare", track_type="midi", role="snare", volume=0.65),
TrackPreset(name="Hi-Hats", track_type="midi", role="hat_closed", volume=0.55),
TrackPreset(name="Bass Smooth", track_type="midi", role="bass", volume=0.7, sample_criteria={"role": "bass", "character": "smooth"}),
TrackPreset(name="Pad", track_type="midi", role="synth_pad", volume=0.6),
TrackPreset(name="Lead Melodic", track_type="midi", role="synth_lead", volume=0.65),
],
mixing_config=MixingConfig(eq_low_gain=0.0, compressor_threshold=-8.0, compressor_ratio=2.0, send_reverb=0.5, send_delay=0.35, master_volume=0.82),
)
# 4. Moombahton 108 BPM
moombahton = Preset(
name="moombahton_108bpm",
description="Moombahton con variación de dembow y estructura minimal.",
bpm=108.0, key="Dm", style="moombahton", structure="minimal",
tags=["moombahton", "dembow", "minimal", "electronic"], is_builtin=True,
tracks_config=[
TrackPreset(name="Kick Moombah", track_type="midi", role="kick", volume=0.9, sample_criteria={"role": "kick", "style": "moombahton"}),
TrackPreset(name="Snare", track_type="midi", role="snare", volume=0.75),
TrackPreset(name="Tom", track_type="midi", role="perc", volume=0.6, sample_criteria={"role": "perc"}),
TrackPreset(name="Hi-Hats", track_type="midi", role="hat_closed", volume=0.65),
TrackPreset(name="Bass", track_type="midi", role="bass", volume=0.8),
TrackPreset(name="Stabs", track_type="midi", role="synth_lead", volume=0.7, sample_criteria={"role": "synth", "character": "stab"}),
],
mixing_config=MixingConfig(eq_low_gain=3.0, compressor_threshold=-5.0, compressor_ratio=3.0, send_reverb=0.3, master_volume=0.87),
)
# 5. Trapeton 140 BPM
trapeton = Preset(
name="trapeton_140bpm",
description="Trapeton con 808s pesados y hi-hat rolls. Fusión trap-reggaeton.",
bpm=140.0, key="Cm", style="trapeton", structure="standard",
tags=["trapeton", "trap", "808", "hihat_rolls", "hard"], is_builtin=True,
tracks_config=[
TrackPreset(name="808 Kick", track_type="midi", role="kick", volume=0.95, sample_criteria={"role": "kick", "character": "808"}),
TrackPreset(name="Snare", track_type="midi", role="snare", volume=0.8, sample_criteria={"role": "snare", "character": "trap"}),
TrackPreset(name="Hi-Hats", track_type="midi", role="hat_closed", volume=0.75, sample_criteria={"role": "hat_closed", "style": "trap"}),
TrackPreset(name="Hi-Hat Rolls", track_type="midi", role="hat_open", volume=0.65, sample_criteria={"role": "hat_open", "style": "trap_rolls"}),
TrackPreset(name="808 Bass", track_type="midi", role="bass", volume=0.9, sample_criteria={"role": "bass", "character": "808"}),
TrackPreset(name="Lead Hard", track_type="midi", role="synth_lead", volume=0.75, sample_criteria={"role": "synth", "character": "aggressive"}),
],
mixing_config=MixingConfig(eq_low_gain=5.0, eq_high_gain=2.0, compressor_threshold=-8.0, compressor_ratio=4.0, compressor_makeup=4.0, send_reverb=0.15, send_delay=0.25, master_volume=0.92),
)
return {
reggaeton_classic.name: reggaeton_classic,
perreo_intenso.name: perreo_intenso,
reggaeton_romantico.name: reggaeton_romantico,
moombahton.name: moombahton,
trapeton.name: trapeton,
}
# =============================================================================
# PRESET MANAGER
# =============================================================================
class PresetManager:
"""Gestor de presets para AbletonMCP_AI."""
def __init__(self, presets_dir: Optional[str] = None):
self._presets_dir = Path(presets_dir) if presets_dir else PRESETS_DIR
self._builtin_presets: Dict[str, Preset] = create_builtin_presets()
self._custom_presets: Dict[str, Preset] = {}
self._ensure_presets_dir()
self._load_custom_presets()
def _ensure_presets_dir(self):
if not self._presets_dir.exists():
try:
self._presets_dir.mkdir(parents=True, exist_ok=True)
logger.info("Created presets directory: %s", self._presets_dir)
except Exception as e:
logger.error("Failed to create presets directory: %s", e)
def _get_preset_path(self, preset_name: str) -> Path:
safe_name = preset_name.replace(" ", "_").lower()
return self._presets_dir / f"{safe_name}.json"
def _load_custom_presets(self):
if not self._presets_dir.exists():
return
for preset_file in self._presets_dir.glob("*.json"):
try:
with open(preset_file, "r", encoding="utf-8") as f:
data = json.load(f)
preset = Preset.from_dict(data)
if not preset.is_builtin:
self._custom_presets[preset.name] = preset
except Exception as e:
logger.warning("Failed to load preset %s: %s", preset_file, e)
logger.info("Loaded %d custom presets", len(self._custom_presets))
def load_preset(self, preset_name: str) -> Optional[Preset]:
"""Carga un preset por nombre. Busca primero en builtins, luego custom."""
if preset_name in self._builtin_presets:
logger.info("Loaded builtin preset: %s", preset_name)
return self._builtin_presets[preset_name]
if preset_name in self._custom_presets:
logger.info("Loaded custom preset: %s", preset_name)
return self._custom_presets[preset_name]
preset_name_lower = preset_name.lower()
for name, preset in {**self._builtin_presets, **self._custom_presets}.items():
if name.lower() == preset_name_lower:
return preset
logger.warning("Preset not found: %s", preset_name)
return None
def save_as_preset(self, config: Dict[str, Any], preset_name: str) -> bool:
"""Guarda una configuración como preset personalizado."""
try:
preset = self._config_to_preset(config, preset_name)
preset.is_builtin = False
preset.updated_at = datetime.now().isoformat()
preset_path = self._get_preset_path(preset_name)
with open(preset_path, "w", encoding="utf-8") as f:
json.dump(preset.to_dict(), f, indent=2, ensure_ascii=False)
self._custom_presets[preset_name] = preset
logger.info("Saved preset: %s", preset_name)
return True
except Exception as e:
logger.error("Failed to save preset %s: %s", preset_name, e)
return False
def _config_to_preset(self, config: Dict[str, Any], name: str) -> Preset:
"""Convierte un diccionario de configuración a un Preset."""
tracks_config = []
for track_data in config.get("tracks", []):
tracks_config.append(TrackPreset(
name=track_data.get("name", "Track"), track_type=track_data.get("track_type", "midi"),
role=track_data.get("instrument_role", "synth"), volume=track_data.get("volume", 0.8),
pan=track_data.get("pan", 0.0), device_chain=track_data.get("device_chain", []),
))
mixing_data = config.get("mixing_config", {})
mixing_config = MixingConfig(
eq_low_gain=mixing_data.get("eq_low_gain", 0.0), eq_mid_gain=mixing_data.get("eq_mid_gain", 0.0),
eq_high_gain=mixing_data.get("eq_high_gain", 0.0), compressor_threshold=mixing_data.get("compressor_threshold", -6.0),
compressor_ratio=mixing_data.get("compressor_ratio", 3.0), send_reverb=mixing_data.get("send_reverb", 0.3),
send_delay=mixing_data.get("send_delay", 0.2), master_volume=mixing_data.get("master_volume", 0.85),
)
return Preset(
name=name, description=config.get("description", f"Custom preset: {name}"),
bpm=config.get("bpm", 95.0), key=config.get("key", "Am"), style=config.get("style", "dembow"),
structure=config.get("structure", "standard"), tracks_config=tracks_config,
mixing_config=mixing_config, tags=config.get("tags", ["custom"]),
)
def list_presets(self, include_builtin: bool = True, filter_tags: Optional[List[str]] = None) -> List[Dict[str, Any]]:
"""Lista todos los presets disponibles."""
all_presets: Dict[str, Preset] = {}
if include_builtin:
all_presets.update(self._builtin_presets)
all_presets.update(self._custom_presets)
if filter_tags:
all_presets = {n: p for n, p in all_presets.items() if any(t in p.tags for t in filter_tags)}
result = [
{"name": n, "description": p.description, "bpm": p.bpm, "key": p.key, "style": p.style,
"structure": p.structure, "tags": p.tags, "is_builtin": p.is_builtin, "track_count": len(p.tracks_config)}
for n, p in all_presets.items()
]
result.sort(key=lambda x: (not x["is_builtin"], x["name"]))
return result
def create_custom_preset(self, current_config: Dict[str, Any], name: str, description: str = "", tags: Optional[List[str]] = None) -> Optional[Preset]:
"""Crea un nuevo preset personalizado desde una configuración."""
try:
preset = self._config_to_preset(current_config, name)
preset.description = description or f"Custom preset: {name}"
preset.tags = tags or ["custom"]
preset.is_builtin = False
preset.author = current_config.get("author", "")
if self.save_as_preset(current_config, name):
return preset
return None
except Exception as e:
logger.error("Failed to create custom preset: %s", e)
return None
def delete_preset(self, preset_name: str) -> bool:
"""Elimina un preset personalizado. No se pueden eliminar builtins."""
if preset_name in self._builtin_presets:
logger.warning("Cannot delete builtin preset: %s", preset_name)
return False
if preset_name not in self._custom_presets:
logger.warning("Preset not found for deletion: %s", preset_name)
return False
try:
preset_path = self._get_preset_path(preset_name)
if preset_path.exists():
preset_path.unlink()
del self._custom_presets[preset_name]
logger.info("Deleted preset: %s", preset_name)
return True
except Exception as e:
logger.error("Failed to delete preset %s: %s", preset_name, e)
return False
def export_preset(self, preset_name: str, export_path: str) -> bool:
"""Exporta un preset a un archivo externo."""
preset = self.load_preset(preset_name)
if not preset:
logger.warning("Cannot export non-existent preset: %s", preset_name)
return False
try:
export_path = Path(export_path)
if not export_path.suffix == ".json":
export_path = export_path.with_suffix(".json")
with open(export_path, "w", encoding="utf-8") as f:
json.dump(preset.to_dict(), f, indent=2, ensure_ascii=False)
logger.info("Exported preset %s to %s", preset_name, export_path)
return True
except Exception as e:
logger.error("Failed to export preset %s: %s", preset_name, e)
return False
def import_preset(self, import_path: str, preset_name: Optional[str] = None) -> Optional[Preset]:
"""Importa un preset desde un archivo externo."""
try:
import_path = Path(import_path)
if not import_path.exists():
logger.error("Import file not found: %s", import_path)
return None
with open(import_path, "r", encoding="utf-8") as f:
data = json.load(f)
preset = Preset.from_dict(data)
preset.is_builtin = False
if preset_name:
preset.name = preset_name
preset_path = self._get_preset_path(preset.name)
with open(preset_path, "w", encoding="utf-8") as f:
json.dump(preset.to_dict(), f, indent=2, ensure_ascii=False)
self._custom_presets[preset.name] = preset
logger.info("Imported preset: %s", preset.name)
return preset
except Exception as e:
logger.error("Failed to import preset from %s: %s", import_path, e)
return None
def get_preset_details(self, preset_name: str) -> Optional[Dict[str, Any]]:
"""Obtiene detalles completos de un preset."""
preset = self.load_preset(preset_name)
if not preset:
return None
return {
"name": preset.name, "description": preset.description, "version": preset.version,
"created_at": preset.created_at, "updated_at": preset.updated_at,
"bpm": preset.bpm, "key": preset.key, "style": preset.style, "structure": preset.structure,
"tracks": [{"name": t.name, "type": t.track_type, "role": t.role, "volume": t.volume, "pan": t.pan} for t in preset.tracks_config],
"mixing": preset.mixing_config.to_dict(),
"sample_selection": preset.sample_selection.to_dict(),
"tags": preset.tags, "author": preset.author, "is_builtin": preset.is_builtin,
}
def duplicate_preset(self, source_name: str, new_name: str) -> bool:
"""Duplica un preset existente con un nuevo nombre."""
source = self.load_preset(source_name)
if not source:
return False
try:
new_preset = Preset.from_dict(source.to_dict())
new_preset.name = new_name
new_preset.is_builtin = False
new_preset.description = f"Copy of {source_name}: {source.description}"
new_preset.created_at = datetime.now().isoformat()
new_preset.updated_at = datetime.now().isoformat()
preset_path = self._get_preset_path(new_name)
with open(preset_path, "w", encoding="utf-8") as f:
json.dump(new_preset.to_dict(), f, indent=2, ensure_ascii=False)
self._custom_presets[new_name] = new_preset
logger.info("Duplicated preset %s to %s", source_name, new_name)
return True
except Exception as e:
logger.error("Failed to duplicate preset: %s", e)
return False
# =============================================================================
# FUNCIONES DE CONVENIENCIA
# =============================================================================
_manager: Optional[PresetManager] = None
def get_preset_manager() -> PresetManager:
"""Retorna la instancia singleton del PresetManager."""
global _manager
if _manager is None:
_manager = PresetManager()
return _manager
def apply_preset_to_project(preset_name: str) -> Dict[str, Any]:
"""Aplica un preset completo al proyecto actual."""
manager = get_preset_manager()
preset = manager.load_preset(preset_name)
if not preset:
return {"success": False, "error": f"Preset not found: {preset_name}"}
config = {
"bpm": preset.bpm, "key": preset.key, "style": preset.style, "structure": preset.structure,
"tracks": [{"name": t.name, "track_type": t.track_type, "instrument_role": t.role,
"volume": t.volume, "pan": t.pan, "device_chain": t.device_chain} for t in preset.tracks_config],
"mixing_config": preset.mixing_config.to_dict(),
"sample_criteria": preset.sample_selection.to_dict(),
}
return {
"success": True, "preset_name": preset_name, "config": config,
"message": f"Preset '{preset_name}' loaded and ready to apply",
}
def get_default_preset() -> str:
"""Retorna el nombre del preset por defecto."""
return "reggaeton_classic_95bpm"
def list_available_presets(style_filter: Optional[str] = None) -> List[Dict[str, Any]]:
"""Lista todos los presets disponibles, opcionalmente filtrados por estilo."""
manager = get_preset_manager()
presets = manager.list_presets()
if style_filter:
presets = [p for p in presets if p.get("style") == style_filter]
return presets
def quick_apply_preset(preset_name: Optional[str] = None) -> Dict[str, Any]:
"""Aplica rápidamente un preset (o el default si no se especifica)."""
if preset_name is None:
preset_name = get_default_preset()
return apply_preset_to_project(preset_name)
# =============================================================================
# HANDLERS MCP
# =============================================================================
def _cmd_load_preset(params: Dict[str, Any]) -> Dict[str, Any]:
"""Handler MCP: Carga un preset por nombre."""
preset_name = params.get("preset_name", "")
if not preset_name:
return {"success": False, "error": "Missing preset_name parameter"}
manager = get_preset_manager()
preset = manager.load_preset(preset_name)
if not preset:
return {"success": False, "error": f"Preset not found: {preset_name}"}
return {"success": True, "preset": preset.to_dict()}
def _cmd_save_as_preset(params: Dict[str, Any]) -> Dict[str, Any]:
"""Handler MCP: Guarda configuración actual como preset."""
config, preset_name = params.get("config", {}), params.get("preset_name", "")
if not preset_name:
return {"success": False, "error": "Missing preset_name parameter"}
success = get_preset_manager().save_as_preset(config, preset_name)
return {"success": success, "preset_name": preset_name, "message": f"Preset '{preset_name}' saved" if success else "Failed to save"}
def _cmd_list_presets(params: Dict[str, Any]) -> Dict[str, Any]:
"""Handler MCP: Lista todos los presets disponibles."""
manager = get_preset_manager()
presets = manager.list_presets(include_builtin=params.get("include_builtin", True), filter_tags=params.get("filter_tags"))
return {"success": True, "count": len(presets), "presets": presets}
def _cmd_create_custom_preset(params: Dict[str, Any]) -> Dict[str, Any]:
"""Handler MCP: Crea un preset personalizado."""
current_config, name = params.get("current_config", {}), params.get("name", "")
if not name:
return {"success": False, "error": "Missing name parameter"}
preset = get_preset_manager().create_custom_preset(current_config, name, params.get("description", ""), params.get("tags"))
return {"success": preset is not None, "preset_name": name, "preset": preset.to_dict() if preset else None}
def _cmd_delete_preset(params: Dict[str, Any]) -> Dict[str, Any]:
"""Handler MCP: Elimina un preset personalizado."""
preset_name = params.get("preset_name", "")
if not preset_name:
return {"success": False, "error": "Missing preset_name parameter"}
success = get_preset_manager().delete_preset(preset_name)
return {"success": success, "message": f"Preset '{preset_name}' deleted" if success else f"Failed to delete '{preset_name}'"}
def _cmd_export_preset(params: Dict[str, Any]) -> Dict[str, Any]:
"""Handler MCP: Exporta un preset a archivo."""
preset_name, export_path = params.get("preset_name", ""), params.get("export_path", "")
if not preset_name or not export_path:
return {"success": False, "error": "Missing preset_name or export_path"}
success = get_preset_manager().export_preset(preset_name, export_path)
return {"success": success, "message": f"Exported to {export_path}" if success else "Export failed"}
def _cmd_import_preset(params: Dict[str, Any]) -> Dict[str, Any]:
"""Handler MCP: Importa un preset desde archivo."""
import_path = params.get("import_path", "")
if not import_path:
return {"success": False, "error": "Missing import_path parameter"}
preset = get_preset_manager().import_preset(import_path, params.get("preset_name"))
return {"success": preset is not None, "preset_name": preset.name if preset else None, "preset": preset.to_dict() if preset else None}
def _cmd_get_preset_details(params: Dict[str, Any]) -> Dict[str, Any]:
"""Handler MCP: Obtiene detalles completos de un preset."""
preset_name = params.get("preset_name", "")
if not preset_name:
return {"success": False, "error": "Missing preset_name parameter"}
details = get_preset_manager().get_preset_details(preset_name)
return {"success": details is not None, "preset": details, "error": f"Preset not found: {preset_name}" if not details else None}
def _cmd_duplicate_preset(params: Dict[str, Any]) -> Dict[str, Any]:
"""Handler MCP: Duplica un preset existente."""
source_name, new_name = params.get("source_name", ""), params.get("new_name", "")
if not source_name or not new_name:
return {"success": False, "error": "Missing source_name or new_name"}
success = get_preset_manager().duplicate_preset(source_name, new_name)
return {"success": success, "message": f"Duplicated: {source_name} -> {new_name}" if success else "Duplication failed"}
# Mapa de handlers disponibles para el MCP server
MCP_HANDLERS = {
"load_preset": _cmd_load_preset,
"save_as_preset": _cmd_save_as_preset,
"list_presets": _cmd_list_presets,
"create_custom_preset": _cmd_create_custom_preset,
"delete_preset": _cmd_delete_preset,
"export_preset": _cmd_export_preset,
"import_preset": _cmd_import_preset,
"get_preset_details": _cmd_get_preset_details,
"duplicate_preset": _cmd_duplicate_preset,
"apply_preset": lambda p: apply_preset_to_project(p.get("preset_name", "")),
}
# =============================================================================
# MAIN / TEST
# =============================================================================
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
print("=" * 70)
print("PRESET SYSTEM - AbletonMCP_AI")
print("=" * 70)
print("\n1. Inicializando PresetManager...")
manager = get_preset_manager()
print(f" OK - Directorio: {manager._presets_dir}")
print("\n2. Presets predefinidos:")
for name, preset in manager._builtin_presets.items():
print(f" - {name}: {preset.description[:45]}...")
print("\n3. Listando todos los presets...")
all_presets = manager.list_presets()
print(f" Total: {len(all_presets)} presets")
for p in all_presets[:5]:
print(f" - {p['name']} ({p['style']}, {p['bpm']} BPM, {p['track_count']} tracks)")
print("\n4. Cargando 'reggaeton_classic_95bpm'...")
classic = manager.load_preset("reggaeton_classic_95bpm")
if classic:
print(f" BPM: {classic.bpm}, Key: {classic.key}, Tracks: {len(classic.tracks_config)}")
print("\n5. Detalles de 'perreo_intenso_100bpm'...")
details = manager.get_preset_details("perreo_intenso_100bpm")
if details:
print(f" EQ Low: {details['mixing']['eq_low_gain']} dB, Comp: {details['mixing']['compressor_threshold']} dB")
print("\n6. Aplicando preset default...")
result = quick_apply_preset()
print(f" Success: {result['success']}, Preset: {result.get('preset_name')}")
print("\n" + "=" * 70)
print("Tests completados!")
print("=" * 70)

View File

@@ -0,0 +1,65 @@
"""Compatibility wrapper for legacy production_workflow imports."""
from typing import Any, Dict, List, Optional
from .workflow_engine import get_workflow
class ProductionWorkflow:
"""Expose the legacy API expected by server.py."""
def __init__(self):
self._workflow = get_workflow()
def __getattr__(self, name):
return getattr(self._workflow, name)
def generate_song(self, genre: str = "reggaeton", bpm: float = 95.0, key: str = "Am",
style: str = "classic", structure: str = "standard") -> Dict[str, Any]:
return self._workflow.generate_complete_reggaeton(
bpm=bpm, key=key, style=style, structure=structure
)
def generate_from_samples(self, samples: Optional[List[Dict[str, Any]]] = None,
bpm: float = 95.0, key: str = "Am",
style: str = "matched") -> Dict[str, Any]:
result = self._workflow.generate_complete_reggaeton(
bpm=bpm, key=key, style=style, structure="standard", use_samples=bool(samples)
)
if isinstance(result, dict):
result.setdefault("input_samples", samples or [])
return result
def produce_reggaeton(self, bpm: float = 95.0, key: str = "Am",
style: str = "classic", structure: str = "verse-chorus") -> Dict[str, Any]:
return self._workflow.generate_complete_reggaeton(
bpm=bpm, key=key, style=style, structure=structure
)
def produce_from_reference(self, reference_path: str, bpm: Optional[float] = None,
key: Optional[str] = None) -> Dict[str, Any]:
result = self._workflow.generate_from_reference(reference_path)
if isinstance(result, dict):
if bpm is not None:
result.setdefault("requested_bpm", bpm)
if key is not None:
result.setdefault("requested_key", key)
return result
def produce_arrangement(self, bpm: float = 95.0, key: str = "Am",
style: str = "classic") -> Dict[str, Any]:
result = self._workflow.generate_complete_reggaeton(
bpm=bpm, key=key, style=style, structure="extended"
)
if isinstance(result, dict):
result.setdefault("view", "Arrangement")
return result
def complete_production(self, bpm: float = 95.0, key: str = "Am",
style: str = "classic") -> Dict[str, Any]:
result = self._workflow.generate_complete_reggaeton(
bpm=bpm, key=key, style=style, structure="extended"
)
if isinstance(result, dict):
result.setdefault("production_complete", True)
return result

View File

@@ -0,0 +1,820 @@
"""
RationaleLogger - Tracks all AI decisions for auditability and analysis.
This module provides comprehensive logging of all AI-driven decisions in the
production pipeline, including sample selection, kit assembly, variations, and
mixing choices. All entries are stored in SQLite for queryable analysis.
"""
import sqlite3
import json
import os
import uuid
from datetime import datetime
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict
from pathlib import Path
@dataclass
class SampleSelectionRationale:
"""Rationale for a sample selection decision."""
decision: str
reasoning: List[str]
rejected: List[Dict[str, str]]
confidence: float
role: str
selected_sample: str
similarity_scores: Dict[str, float]
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class KitAssemblyRationale:
"""Rationale for a drum kit assembly decision."""
kit_samples: Dict[str, str] # role -> sample path
coherence_score: float
weak_links: List[Dict[str, Any]]
reasoning: List[str]
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class SectionVariationRationale:
"""Rationale for a section variation decision."""
section_name: str
base_kit: Dict[str, str]
evolved_kit: Dict[str, str]
coherence_with_base: float
changes: List[str]
reasoning: List[str]
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
@dataclass
class MixDecisionRationale:
"""Rationale for a mixing decision."""
track_index: int
track_name: str
effect: str
parameters: Dict[str, Any]
reasoning: List[str]
before_state: Optional[Dict[str, Any]]
after_state: Optional[Dict[str, Any]]
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
class RationaleLogger:
"""
Logs and queries AI decisions for auditability.
Provides a complete audit trail of all AI-driven decisions including:
- Sample selection with similarity scores and alternatives
- Kit assembly with coherence analysis
- Section variations with change tracking
- Mix decisions with before/after states
All data is stored in SQLite for efficient querying and analysis.
"""
def __init__(self, db_path: Optional[str] = None):
"""
Initialize the RationaleLogger.
Args:
db_path: Path to SQLite database. If None, uses default location.
"""
if db_path is None:
# Store in the same directory as the engine files
base_dir = Path(__file__).parent.parent
db_path = str(base_dir / "data" / "rationale.db")
self.db_path = db_path
self._ensure_data_dir()
self._init_database()
self._current_session_id: Optional[str] = None
def _ensure_data_dir(self) -> None:
"""Create data directory if it doesn't exist."""
data_dir = Path(self.db_path).parent
data_dir.mkdir(parents=True, exist_ok=True)
def _init_database(self) -> None:
"""Initialize the SQLite database with required tables."""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
# Create rationale_entries table
cursor.execute("""
CREATE TABLE IF NOT EXISTS rationale_entries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
session_id TEXT,
track_name TEXT,
decision_type TEXT,
decision_description TEXT,
inputs TEXT,
outputs TEXT,
scores TEXT,
rationale TEXT,
alternatives_considered TEXT
)
""")
# Create index for efficient queries
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_session
ON rationale_entries(session_id)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_decision_type
ON rationale_entries(decision_type)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_timestamp
ON rationale_entries(timestamp)
""")
# Create stats tracking table
cursor.execute("""
CREATE TABLE IF NOT EXISTS decision_stats (
id INTEGER PRIMARY KEY AUTOINCREMENT,
decision_type TEXT UNIQUE,
count INTEGER DEFAULT 0,
avg_confidence REAL DEFAULT 0.0,
last_updated DATETIME DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
def start_session(self, track_name: Optional[str] = None) -> str:
"""
Start a new logging session.
Args:
track_name: Name of the track/project being worked on
Returns:
The generated session ID
"""
self._current_session_id = str(uuid.uuid4())[:8]
self._current_track_name = track_name or "untitled"
return self._current_session_id
def get_session_id(self) -> str:
"""Get current session ID, creating one if needed."""
if self._current_session_id is None:
self.start_session()
return self._current_session_id
def _insert_entry(
self,
decision_type: str,
description: str,
inputs: Dict[str, Any],
outputs: Dict[str, Any],
scores: Dict[str, Any],
rationale: Dict[str, Any],
alternatives: List[Dict[str, Any]]
) -> int:
"""Insert a rationale entry into the database."""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
INSERT INTO rationale_entries (
session_id, track_name, decision_type, decision_description,
inputs, outputs, scores, rationale, alternatives_considered
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
self.get_session_id(),
getattr(self, '_current_track_name', 'untitled'),
decision_type,
description,
json.dumps(inputs, default=str),
json.dumps(outputs, default=str),
json.dumps(scores, default=str),
json.dumps(rationale, default=str),
json.dumps(alternatives, default=str)
))
entry_id = cursor.lastrowid
# Update stats
self._update_stats(conn, cursor, decision_type, rationale.get('confidence', 0.5))
conn.commit()
return entry_id
def _update_stats(
self,
conn: sqlite3.Connection,
cursor: sqlite3.Cursor,
decision_type: str,
confidence: float
) -> None:
"""Update decision statistics."""
cursor.execute("""
INSERT INTO decision_stats (decision_type, count, avg_confidence)
VALUES (?, 1, ?)
ON CONFLICT(decision_type) DO UPDATE SET
count = count + 1,
avg_confidence = (avg_confidence * count + ?) / (count + 1),
last_updated = CURRENT_TIMESTAMP
""", (decision_type, confidence, confidence))
def log_sample_selection(
self,
role: str,
selected_sample: str,
alternatives: List[str],
similarity_scores: Dict[str, float],
rationale: str,
reasoning: Optional[List[str]] = None,
rejected_details: Optional[List[Dict[str, str]]] = None,
confidence: float = 0.0
) -> int:
"""
Log a sample selection decision.
Args:
role: Sample role (kick, snare, hihat, etc.)
selected_sample: Path or name of selected sample
alternatives: List of alternative samples considered
similarity_scores: Dict of similarity metrics
rationale: Human-readable explanation
reasoning: List of detailed reasoning points
rejected_details: List of rejected options with reasons
confidence: Confidence score (0.0-1.0)
Returns:
Entry ID
"""
inputs = {
'role': role,
'candidates': alternatives + [selected_sample],
'criteria': similarity_scores.get('criteria', 'similarity')
}
outputs = {
'selected': selected_sample,
'alternatives_count': len(alternatives)
}
scores = {
'confidence': confidence,
'similarity_to_reference': similarity_scores.get('reference_similarity', 0.0),
'genre_match': similarity_scores.get('genre_match', 0.0),
'energy_match': similarity_scores.get('energy_match', 0.0)
}
rationale_dict = {
'decision': f"Selected {os.path.basename(selected_sample)} as {role}",
'reasoning': reasoning or [rationale],
'rejected': rejected_details or [],
'confidence': confidence
}
alternatives_list = [
{'sample': alt, 'reason': 'Lower similarity score'}
for alt in alternatives
]
if rejected_details:
alternatives_list.extend(rejected_details)
return self._insert_entry(
decision_type='sample_selection',
description=f"{role}: {os.path.basename(selected_sample)}",
inputs=inputs,
outputs=outputs,
scores=scores,
rationale=rationale_dict,
alternatives=alternatives_list
)
def log_kit_assembly(
self,
kit_samples: Dict[str, str],
coherence_score: float,
weak_links: List[Dict[str, Any]],
reasoning: Optional[List[str]] = None
) -> int:
"""
Log a drum kit assembly decision.
Args:
kit_samples: Dict mapping roles to sample paths
coherence_score: Overall kit coherence (0.0-1.0)
weak_links: List of weak coherence points with details
reasoning: List of reasoning points
Returns:
Entry ID
"""
inputs = {
'available_samples': len(kit_samples),
'target_coherence': 0.8
}
outputs = {
'kit_configuration': {role: os.path.basename(path) for role, path in kit_samples.items()},
'size': len(kit_samples)
}
scores = {
'coherence': coherence_score,
'weak_link_count': len(weak_links),
'confidence': coherence_score # Use coherence as confidence
}
rationale_dict = {
'decision': f"Assembled {len(kit_samples)}-piece drum kit",
'reasoning': reasoning or [f"Kit coherence: {coherence_score:.2f}"],
'rejected': weak_links,
'confidence': coherence_score
}
return self._insert_entry(
decision_type='kit_assembly',
description=f"Drum kit with {len(kit_samples)} samples",
inputs=inputs,
outputs=outputs,
scores=scores,
rationale=rationale_dict,
alternatives=weak_links
)
def log_section_variation(
self,
section_name: str,
base_kit: Dict[str, str],
evolved_kit: Dict[str, str],
coherence_with_base: float,
changes: Optional[List[str]] = None,
reasoning: Optional[List[str]] = None
) -> int:
"""
Log a section variation decision.
Args:
section_name: Name of section (verse, chorus, bridge, etc.)
base_kit: Original kit configuration
evolved_kit: Modified kit configuration
coherence_with_base: How well variation matches base
changes: List of specific changes made
reasoning: List of reasoning points
Returns:
Entry ID
"""
# Calculate differences
changed_samples = []
for role in set(base_kit.keys()) | set(evolved_kit.keys()):
if base_kit.get(role) != evolved_kit.get(role):
changed_samples.append(role)
inputs = {
'section': section_name,
'base_kit': {k: os.path.basename(v) for k, v in base_kit.items()}
}
outputs = {
'evolved_kit': {k: os.path.basename(v) for k, v in evolved_kit.items()},
'changed_roles': changed_samples,
'unchanged_roles': list(set(base_kit.keys()) - set(changed_samples))
}
scores = {
'coherence_with_base': coherence_with_base,
'change_ratio': len(changed_samples) / max(len(base_kit), 1),
'confidence': coherence_with_base
}
rationale_dict = {
'decision': f"Created {section_name} variation from base kit",
'reasoning': reasoning or [f"Coherence with base: {coherence_with_base:.2f}"],
'rejected': [],
'confidence': coherence_with_base
}
return self._insert_entry(
decision_type='variation',
description=f"{section_name} kit variation",
inputs=inputs,
outputs=outputs,
scores=scores,
rationale=rationale_dict,
alternatives=[]
)
def log_mix_decision(
self,
track_index: int,
effect: str,
parameters: Dict[str, Any],
rationale: str,
track_name: Optional[str] = None,
reasoning: Optional[List[str]] = None,
before_state: Optional[Dict[str, Any]] = None,
after_state: Optional[Dict[str, Any]] = None,
alternatives: Optional[List[Dict[str, Any]]] = None
) -> int:
"""
Log a mixing decision.
Args:
track_index: Index of affected track
effect: Effect/processor name
parameters: Effect parameters applied
rationale: Human-readable explanation
track_name: Name of track
reasoning: List of detailed reasoning points
before_state: State before the change
after_state: State after the change
alternatives: Alternative approaches considered
Returns:
Entry ID
"""
inputs = {
'track_index': track_index,
'track_name': track_name or f"Track {track_index}",
'before_state': before_state or {}
}
outputs = {
'effect': effect,
'parameters': parameters,
'after_state': after_state or {}
}
scores = {
'impact_score': parameters.get('impact', 0.5),
'confidence': 0.8 # Mix decisions typically have good confidence
}
rationale_dict = {
'decision': f"Applied {effect} to {track_name or f'track {track_index}'}",
'reasoning': reasoning or [rationale],
'rejected': alternatives or [],
'confidence': 0.8
}
return self._insert_entry(
decision_type='mix',
description=f"{effect} on {track_name or f'track {track_index}'}",
inputs=inputs,
outputs=outputs,
scores=scores,
rationale=rationale_dict,
alternatives=alternatives or []
)
def get_session_rationale(self, session_id: str) -> List[Dict[str, Any]]:
"""
Retrieve all decisions for a session.
Args:
session_id: Session ID to query
Returns:
List of rationale entries
"""
with sqlite3.connect(self.db_path) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("""
SELECT * FROM rationale_entries
WHERE session_id = ?
ORDER BY timestamp
""", (session_id,))
rows = cursor.fetchall()
return [dict(row) for row in rows]
def get_decision_stats(self) -> Dict[str, Any]:
"""
Get analytics on all decisions.
Returns:
Dict with statistics including counts, averages, trends
"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
# Get per-type stats
cursor.execute("""
SELECT decision_type, count, avg_confidence, last_updated
FROM decision_stats
ORDER BY count DESC
""")
type_stats = {}
for row in cursor.fetchall():
type_stats[row[0]] = {
'count': row[1],
'avg_confidence': row[2],
'last_updated': row[3]
}
# Get overall stats
cursor.execute("""
SELECT
COUNT(*) as total_decisions,
COUNT(DISTINCT session_id) as total_sessions,
AVG(
CASE
WHEN json_extract(scores, '$.confidence') IS NOT NULL
THEN json_extract(scores, '$.confidence')
ELSE 0.5
END
) as overall_confidence
FROM rationale_entries
""")
row = cursor.fetchone()
overall = {
'total_decisions': row[0] or 0,
'total_sessions': row[1] or 0,
'overall_confidence': row[2] or 0.0
}
# Get recent activity (last 24 hours)
cursor.execute("""
SELECT COUNT(*)
FROM rationale_entries
WHERE timestamp > datetime('now', '-1 day')
""")
recent_count = cursor.fetchone()[0]
return {
'by_type': type_stats,
'overall': overall,
'recent_24h': recent_count
}
def find_similar_decisions(
self,
decision_type: str,
min_confidence: float = 0.7,
limit: int = 10
) -> List[Dict[str, Any]]:
"""
Find similar past decisions with high confidence.
Args:
decision_type: Type of decision to query
min_confidence: Minimum confidence threshold
limit: Maximum results to return
Returns:
List of similar decisions
"""
with sqlite3.connect(self.db_path) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("""
SELECT * FROM rationale_entries
WHERE decision_type = ?
AND json_extract(scores, '$.confidence') >= ?
ORDER BY json_extract(scores, '$.confidence') DESC, timestamp DESC
LIMIT ?
""", (decision_type, min_confidence, limit))
rows = cursor.fetchall()
return [dict(row) for row in rows]
def get_most_used_samples(self, role: Optional[str] = None, limit: int = 20) -> List[Dict[str, Any]]:
"""
Track which samples are used most frequently.
Args:
role: Filter by specific role (optional)
limit: Maximum results to return
Returns:
List of samples with usage counts
"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
if role:
cursor.execute("""
SELECT
json_extract(outputs, '$.selected') as sample,
json_extract(inputs, '$.role') as sample_role,
COUNT(*) as usage_count,
AVG(json_extract(scores, '$.confidence')) as avg_confidence
FROM rationale_entries
WHERE decision_type = 'sample_selection'
AND json_extract(inputs, '$.role') = ?
GROUP BY json_extract(outputs, '$.selected')
ORDER BY usage_count DESC
LIMIT ?
""", (role, limit))
else:
cursor.execute("""
SELECT
json_extract(outputs, '$.selected') as sample,
json_extract(inputs, '$.role') as sample_role,
COUNT(*) as usage_count,
AVG(json_extract(scores, '$.confidence')) as avg_confidence
FROM rationale_entries
WHERE decision_type = 'sample_selection'
GROUP BY json_extract(outputs, '$.selected')
ORDER BY usage_count DESC
LIMIT ?
""", (limit,))
results = []
for row in cursor.fetchall():
results.append({
'sample': row[0],
'role': row[1],
'usage_count': row[2],
'avg_confidence': row[3]
})
return results
def analyze_coherence_trends(self) -> Dict[str, Any]:
"""
Analyze coherence trends over time.
Returns:
Dict with trend analysis
"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
# Get coherence scores over time by decision type
cursor.execute("""
SELECT
decision_type,
date(timestamp) as date,
AVG(json_extract(scores, '$.coherence')) as avg_coherence,
COUNT(*) as count
FROM rationale_entries
WHERE json_extract(scores, '$.coherence') IS NOT NULL
GROUP BY decision_type, date(timestamp)
ORDER BY date
""")
trends = {}
for row in cursor.fetchall():
dec_type = row[0]
if dec_type not in trends:
trends[dec_type] = []
trends[dec_type].append({
'date': row[1],
'avg_coherence': row[2],
'count': row[3]
})
# Calculate overall trend
cursor.execute("""
SELECT
AVG(json_extract(scores, '$.coherence')) as overall_avg,
MIN(json_extract(scores, '$.coherence')) as min_coherence,
MAX(json_extract(scores, '$.coherence')) as max_coherence
FROM rationale_entries
WHERE json_extract(scores, '$.coherence') IS NOT NULL
""")
row = cursor.fetchone()
return {
'trends_by_type': trends,
'overall': {
'average': row[0] or 0.0,
'minimum': row[1] or 0.0,
'maximum': row[2] or 0.0
}
}
def export_session_report(self, session_id: str, output_path: Optional[str] = None) -> str:
"""
Export a detailed session report.
Args:
session_id: Session to export
output_path: Output file path (optional)
Returns:
Path to exported report
"""
entries = self.get_session_rationale(session_id)
if not entries:
return ""
# Generate report
report = {
'session_id': session_id,
'generated_at': datetime.now().isoformat(),
'total_decisions': len(entries),
'decisions': []
}
for entry in entries:
report['decisions'].append({
'timestamp': entry['timestamp'],
'type': entry['decision_type'],
'description': entry['decision_description'],
'rationale': json.loads(entry['rationale']),
'scores': json.loads(entry['scores'])
})
# Determine output path
if output_path is None:
base_dir = Path(self.db_path).parent
output_path = str(base_dir / f"session_report_{session_id}.json")
with open(output_path, 'w') as f:
json.dump(report, f, indent=2)
return output_path
def clear_session(self, session_id: str) -> int:
"""
Clear all entries for a session.
Args:
session_id: Session to clear
Returns:
Number of entries deleted
"""
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
DELETE FROM rationale_entries
WHERE session_id = ?
""", (session_id,))
deleted = cursor.rowcount
conn.commit()
return deleted
def get_decision_by_id(self, entry_id: int) -> Optional[Dict[str, Any]]:
"""
Retrieve a specific decision by ID.
Args:
entry_id: Entry ID to retrieve
Returns:
Decision entry or None
"""
with sqlite3.connect(self.db_path) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("""
SELECT * FROM rationale_entries
WHERE id = ?
""", (entry_id,))
row = cursor.fetchone()
return dict(row) if row else None
# Singleton instance for module-level access
_default_logger: Optional[RationaleLogger] = None
def get_logger(db_path: Optional[str] = None) -> RationaleLogger:
"""
Get or create the default RationaleLogger instance.
Args:
db_path: Path to database (optional)
Returns:
RationaleLogger instance
"""
global _default_logger
if _default_logger is None:
_default_logger = RationaleLogger(db_path)
return _default_logger
def reset_logger() -> None:
"""Reset the singleton logger (useful for testing)."""
global _default_logger
_default_logger = None

View File

@@ -0,0 +1,922 @@
"""
Reference Matcher - Analyzes reference tracks and creates user sound profiles.
Este módulo analiza archivos de referencia (como reggaeton_ejemplo.mp3),
extrae sus características espectrales y genera un perfil de sonido
personalizado para el usuario basado en samples similares de la librería.
"""
import json
import logging
import os
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, field, asdict
import numpy as np
from collections import Counter
logger = logging.getLogger("ReferenceMatcher")
# Paths
LIBRERIA_DIR = Path(r"C:\ProgramData\Ableton\Live 12 Suite\Resources\MIDI Remote Scripts\libreria")
REGGAETON_DIR = LIBRERIA_DIR / "reggaeton"
REFERENCE_FILE = LIBRERIA_DIR / "reggaeton_ejemplo.mp3"
PROFILE_FILE = REGGAETON_DIR / ".user_sound_profile.json"
# Roles de samples soportados
SAMPLE_ROLES = ["kick", "snare", "clap", "hat_closed", "hat_open",
"bass", "synth", "fx", "perc", "drum_loop"]
@dataclass
class SpectralFingerprint:
"""Fingerprint espectral completo de un audio."""
bpm: float = 0.0
key: str = ""
energy_curve: List[float] = field(default_factory=list)
mfccs_mean: List[float] = field(default_factory=list)
spectral_centroid_mean: float = 0.0
onset_strength_mean: float = 0.0
duration: float = 0.0
sample_rate: int = 0
def to_dict(self) -> Dict[str, Any]:
return {
"bpm": self.bpm,
"key": self.key,
"energy_curve": self.energy_curve,
"mfccs_mean": self.mfccs_mean,
"spectral_centroid_mean": self.spectral_centroid_mean,
"onset_strength_mean": self.onset_strength_mean,
"duration": self.duration,
"sample_rate": self.sample_rate
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SpectralFingerprint":
return cls(
bpm=data.get("bpm", 0.0),
key=data.get("key", ""),
energy_curve=data.get("energy_curve", []),
mfccs_mean=data.get("mfccs_mean", []),
spectral_centroid_mean=data.get("spectral_centroid_mean", 0.0),
onset_strength_mean=data.get("onset_strength_mean", 0.0),
duration=data.get("duration", 0.0),
sample_rate=data.get("sample_rate", 0)
)
@dataclass
class SampleMatch:
"""Resultado de comparación de un sample contra referencia."""
path: str
name: str
role: str
similarity_score: float
fingerprint: SpectralFingerprint
@dataclass
class UserSoundProfile:
"""Perfil de sonido personalizado del usuario."""
# Características promedio ponderadas
preferred_bpm: float = 0.0
preferred_key: str = ""
preferred_timbre: List[float] = field(default_factory=list)
characteristic_energy_curve: List[float] = field(default_factory=list)
# Roles más usados (ordenados por frecuencia)
preferred_roles: List[str] = field(default_factory=list)
# Metadata
created_from_reference: str = ""
total_matches_analyzed: int = 0
genre: str = "reggaeton"
# Matches más similares por rol
top_matches_by_role: Dict[str, List[Dict]] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"preferred_bpm": self.preferred_bpm,
"preferred_key": self.preferred_key,
"preferred_timbre": self.preferred_timbre,
"characteristic_energy_curve": self.characteristic_energy_curve,
"preferred_roles": self.preferred_roles,
"created_from_reference": self.created_from_reference,
"total_matches_analyzed": self.total_matches_analyzed,
"genre": self.genre,
"top_matches_by_role": self.top_matches_by_role
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "UserSoundProfile":
return cls(
preferred_bpm=data.get("preferred_bpm", 0.0),
preferred_key=data.get("preferred_key", ""),
preferred_timbre=data.get("preferred_timbre", []),
characteristic_energy_curve=data.get("characteristic_energy_curve", []),
preferred_roles=data.get("preferred_roles", []),
created_from_reference=data.get("created_from_reference", ""),
total_matches_analyzed=data.get("total_matches_analyzed", 0),
genre=data.get("genre", "reggaeton"),
top_matches_by_role=data.get("top_matches_by_role", {})
)
class AudioAnalyzer:
"""Analiza archivos de audio y extrae fingerprints espectrales."""
def __init__(self):
self._librosa_available = self._check_librosa()
def _check_librosa(self) -> bool:
"""Verifica si librosa está disponible."""
try:
import librosa
import librosa.display
return True
except ImportError:
logger.warning("librosa no disponible. Usando modo simulado.")
return False
def analyze_file(self, file_path: str) -> Optional[SpectralFingerprint]:
"""
Analiza un archivo de audio y extrae su fingerprint espectral.
Args:
file_path: Ruta al archivo de audio
Returns:
SpectralFingerprint con todas las características extraídas
"""
if not os.path.exists(file_path):
logger.error("Archivo no encontrado: %s", file_path)
return None
if self._librosa_available:
return self._analyze_with_librosa(file_path)
else:
return self._generate_mock_fingerprint(file_path)
def _analyze_with_librosa(self, file_path: str) -> Optional[SpectralFingerprint]:
"""Análisis real usando librosa."""
try:
import librosa
import librosa.display
# Cargar audio
y, sr = librosa.load(file_path, sr=None)
duration = librosa.get_duration(y=y, sr=sr)
# 1. Detectar BPM
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
bpm = float(tempo) if isinstance(tempo, (int, float, np.number)) else 95.0
# 2. Detectar Key (simplificado - usa chroma)
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
chroma_mean = np.mean(chroma, axis=1)
key_idx = np.argmax(chroma_mean)
keys = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
key = keys[key_idx] + "m" # Asumimos menor para reggaeton
# 3. Energy curve (RMS por segmentos de 1 segundo)
hop_length = 512
frame_length = sr # 1 segundo
rms = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0]
energy_curve = rms.tolist() if len(rms) > 0 else [0.5]
# Normalizar a 16 segmentos máximo
if len(energy_curve) > 16:
# Agrupar en 16 segmentos
segment_size = len(energy_curve) // 16
energy_curve = [
np.mean(energy_curve[i:i+segment_size])
for i in range(0, len(energy_curve), segment_size)
][:16]
# 4. MFCCs (timbre) - promedio
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
mfccs_mean = np.mean(mfccs, axis=1).tolist()
# 5. Spectral centroid (brillo)
spectral_centroids = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
spectral_centroid_mean = float(np.mean(spectral_centroids))
# 6. Onset strength (ritmo/percussividad)
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
onset_strength_mean = float(np.mean(onset_env))
logger.info("Análisis completado: %s (BPM: %.1f, Key: %s)",
file_path, bpm, key)
return SpectralFingerprint(
bpm=bpm,
key=key,
energy_curve=energy_curve,
mfccs_mean=mfccs_mean,
spectral_centroid_mean=spectral_centroid_mean,
onset_strength_mean=onset_strength_mean,
duration=duration,
sample_rate=sr
)
except Exception as e:
logger.error("Error analizando %s: %s", file_path, e)
return self._generate_mock_fingerprint(file_path)
def _generate_mock_fingerprint(self, file_path: str) -> SpectralFingerprint:
"""Genera fingerprint simulado para pruebas sin librosa."""
import hashlib
# Generar valores deterministas basados en el nombre del archivo
name_hash = hashlib.md5(file_path.encode()).hexdigest()
# BPM entre 85-105 (típico reggaeton)
bpm = 85 + (int(name_hash[:4], 16) % 20)
# Key basada en hash
keys = ['Am', 'Dm', 'Gm', 'Cm', 'Em', 'Bm', 'Fm']
key = keys[int(name_hash[4:6], 16) % len(keys)]
# Energy curve simulado (16 segmentos)
np.random.seed(int(name_hash[:8], 16))
energy_curve = np.random.uniform(0.3, 0.9, 16).tolist()
# MFCCs simulados
mfccs_mean = np.random.uniform(-50, 50, 13).tolist()
return SpectralFingerprint(
bpm=float(bpm),
key=key,
energy_curve=energy_curve,
mfccs_mean=mfccs_mean,
spectral_centroid_mean=float(2000 + int(name_hash[6:10], 16) % 2000),
onset_strength_mean=float(0.3 + (int(name_hash[10:12], 16) % 70) / 100),
duration=30.0,
sample_rate=44100
)
class SimilarityEngine:
"""Calcula similitud entre fingerprints espectrales."""
def find_similar(self,
reference: SpectralFingerprint,
candidates: List[Tuple[str, SpectralFingerprint]],
top_k: int = 20) -> List[SampleMatch]:
"""
Encuentra los samples más similares a la referencia.
Args:
reference: Fingerprint de referencia
candidates: Lista de (path, fingerprint) a comparar
top_k: Número de resultados a retornar
Returns:
Lista de SampleMatch ordenados por similitud
"""
matches = []
for path, candidate_fp in candidates:
score = self._calculate_similarity(reference, candidate_fp)
# Determinar rol basado en path
role = self._guess_role_from_path(path)
name = os.path.basename(path)
matches.append(SampleMatch(
path=path,
name=name,
role=role,
similarity_score=score,
fingerprint=candidate_fp
))
# Ordenar por score descendente
matches.sort(key=lambda x: x.similarity_score, reverse=True)
return matches[:top_k]
def _calculate_similarity(self,
ref: SpectralFingerprint,
cand: SpectralFingerprint) -> float:
"""
Calcula score de similitud entre dos fingerprints.
Retorna valor entre 0.0 y 1.0.
"""
scores = []
weights = []
# 1. Similitud de BPM (weight: 0.25)
if ref.bpm > 0 and cand.bpm > 0:
bpm_diff = abs(ref.bpm - cand.bpm)
bpm_sim = max(0, 1 - (bpm_diff / 30)) # 30 BPM de tolerancia
scores.append(bpm_sim)
weights.append(0.25)
# 2. Similitud de Key (weight: 0.15)
if ref.key and cand.key:
key_sim = 1.0 if ref.key == cand.key else 0.5 if ref.key[0] == cand.key[0] else 0.0
scores.append(key_sim)
weights.append(0.15)
# 3. Similitud de Energy Curve (weight: 0.25)
if ref.energy_curve and cand.energy_curve:
# Interpolar a mismo tamaño
min_len = min(len(ref.energy_curve), len(cand.energy_curve))
ref_curve = np.array(ref.energy_curve[:min_len])
cand_curve = np.array(cand.energy_curve[:min_len])
# Correlación de Pearson
if len(ref_curve) > 1:
corr = np.corrcoef(ref_curve, cand_curve)[0, 1]
if not np.isnan(corr):
energy_sim = (corr + 1) / 2 # Normalizar a 0-1
scores.append(energy_sim)
weights.append(0.25)
# 4. Similitud de Timbre (MFCCs) (weight: 0.20)
if ref.mfccs_mean and cand.mfccs_mean:
ref_mfccs = np.array(ref.mfccs_mean)
cand_mfccs = np.array(cand.mfccs_mean)
# Distancia euclidiana normalizada
distance = np.linalg.norm(ref_mfccs - cand_mfccs)
max_dist = np.linalg.norm(np.abs(ref_mfccs) + 100) # Estimación de max
timbre_sim = max(0, 1 - (distance / max_dist))
scores.append(timbre_sim)
weights.append(0.20)
# 5. Similitud de Spectral Centroid (weight: 0.10)
if ref.spectral_centroid_mean > 0 and cand.spectral_centroid_mean > 0:
sc_diff = abs(ref.spectral_centroid_mean - cand.spectral_centroid_mean)
sc_max = max(ref.spectral_centroid_mean, cand.spectral_centroid_mean)
sc_sim = max(0, 1 - (sc_diff / sc_max)) if sc_max > 0 else 0.5
scores.append(sc_sim)
weights.append(0.10)
# 6. Similitud de Onset Strength (weight: 0.05)
if ref.onset_strength_mean > 0 and cand.onset_strength_mean > 0:
os_diff = abs(ref.onset_strength_mean - cand.onset_strength_mean)
os_max = max(ref.onset_strength_mean, cand.onset_strength_mean)
os_sim = max(0, 1 - (os_diff / os_max)) if os_max > 0 else 0.5
scores.append(os_sim)
weights.append(0.05)
# Calcular promedio ponderado
if not scores:
return 0.5
total_weight = sum(weights)
weighted_score = sum(s * w for s, w in zip(scores, weights)) / total_weight
return float(weighted_score)
def _guess_role_from_path(self, path: str) -> str:
"""Infiere el rol del sample basado en su path."""
lower = path.lower()
if "kick" in lower:
return "kick"
if "snare" in lower:
return "snare"
if "clap" in lower:
return "clap"
if "hi-hat" in lower or "hihat" in lower:
return "hat_closed"
if "bass" in lower:
return "bass"
if "fx" in lower:
return "fx"
if "perc" in lower:
return "perc"
if "drumloop" in lower or "drum_loop" in lower:
return "drum_loop"
if "oneshot" in lower or "synth" in lower:
return "synth"
return "synth" # Default
class ReferenceMatcher:
"""
Matcher principal que analiza referencias y genera perfiles de usuario.
"""
def __init__(self,
reference_path: Optional[str] = None,
library_path: Optional[str] = None,
profile_path: Optional[str] = None):
self.reference_path = reference_path or str(REFERENCE_FILE)
self.library_path = library_path or str(REGGAETON_DIR)
self.profile_path = profile_path or str(PROFILE_FILE)
self.analyzer = AudioAnalyzer()
self.similarity = SimilarityEngine()
self._reference_fingerprint: Optional[SpectralFingerprint] = None
self._library_index: List[Tuple[str, SpectralFingerprint]] = []
self._profile: Optional[UserSoundProfile] = None
def analyze_reference(self) -> Optional[SpectralFingerprint]:
"""
Analiza el archivo de referencia y retorna su fingerprint.
Returns:
SpectralFingerprint del archivo de referencia
"""
logger.info("Analizando referencia: %s", self.reference_path)
self._reference_fingerprint = self.analyzer.analyze_file(self.reference_path)
if self._reference_fingerprint:
logger.info("Referencia analizada - BPM: %.1f, Key: %s",
self._reference_fingerprint.bpm,
self._reference_fingerprint.key)
return self._reference_fingerprint
def index_library(self, force_reindex: bool = False) -> List[Tuple[str, SpectralFingerprint]]:
"""
Indexa toda la librería y extrae fingerprints.
Args:
force_reindex: Si True, reindexa aunque ya exista índice
Returns:
Lista de (path, fingerprint) de todos los samples
"""
if self._library_index and not force_reindex:
return self._library_index
logger.info("Indexando librería: %s", self.library_path)
self._library_index = []
library = Path(self.library_path)
if not library.is_dir():
logger.error("Librería no encontrada: %s", self.library_path)
return []
audio_extensions = ('.wav', '.aif', '.aiff', '.mp3', '.flac', '.ogg')
for root, _dirs, files in os.walk(library):
for filename in files:
if filename.lower().endswith(audio_extensions):
filepath = os.path.join(root, filename)
# Analizar sample
fingerprint = self.analyzer.analyze_file(filepath)
if fingerprint:
self._library_index.append((filepath, fingerprint))
logger.debug("Indexado: %s", filename)
logger.info("Librería indexada: %d samples", len(self._library_index))
return self._library_index
def find_similar_samples(self,
top_k: int = 50,
role_filter: Optional[str] = None) -> List[SampleMatch]:
"""
Encuentra los samples más similares a la referencia.
Args:
top_k: Número de samples a retornar
role_filter: Si se especifica, filtra por rol específico
Returns:
Lista de SampleMatch ordenados por similitud
"""
if not self._reference_fingerprint:
self.analyze_reference()
if not self._library_index:
self.index_library()
if not self._reference_fingerprint or not self._library_index:
logger.error("No se puede buscar similares: falta referencia o librería")
return []
# Filtrar por rol si es necesario
candidates = self._library_index
if role_filter:
candidates = [
(path, fp) for path, fp in candidates
if self.similarity._guess_role_from_path(path) == role_filter
]
logger.info("Buscando %d samples similares (filtro: %s)...",
top_k, role_filter or "ninguno")
matches = self.similarity.find_similar(
self._reference_fingerprint,
candidates,
top_k=top_k
)
return matches
def generate_user_profile(self,
top_matches_count: int = 100,
save: bool = True) -> UserSoundProfile:
"""
Genera el perfil de sonido del usuario basado en matches similares.
Args:
top_matches_count: Cuántos matches usar para el perfil
save: Si True, guarda el perfil en disco
Returns:
UserSoundProfile generado
"""
logger.info("Generando perfil de usuario...")
# Obtener matches
matches = self.find_similar_samples(top_k=top_matches_count)
if not matches:
logger.warning("No hay matches para generar perfil")
return UserSoundProfile()
# Calcular BPM preferido (promedio ponderado por similitud)
total_weight = sum(m.similarity_score for m in matches)
weighted_bpm = sum(m.fingerprint.bpm * m.similarity_score
for m in matches if m.fingerprint.bpm > 0)
preferred_bpm = weighted_bpm / total_weight if total_weight > 0 else 95.0
# Calcular Key preferida (moda)
keys = [m.fingerprint.key for m in matches if m.fingerprint.key]
preferred_key = Counter(keys).most_common(1)[0][0] if keys else "Am"
# Calcular Timbre promedio (MFCCs ponderados)
mfccs_list = []
weights = []
for m in matches:
if m.fingerprint.mfccs_mean:
mfccs_list.append(np.array(m.fingerprint.mfccs_mean))
weights.append(m.similarity_score)
if mfccs_list and weights:
weighted_mfccs = np.average(mfccs_list, axis=0, weights=weights)
preferred_timbre = weighted_mfccs.tolist()
else:
preferred_timbre = []
# Energy curve característico (promedio de los matches)
energy_curves = []
for m in matches:
if m.fingerprint.energy_curve:
energy_curves.append(np.array(m.fingerprint.energy_curve))
if energy_curves:
# Interpolar todos a 16 segmentos
interpolated = []
for ec in energy_curves:
if len(ec) < 16:
# Replicar para llegar a 16
repeated = np.repeat(ec, 16 // len(ec) + 1)[:16]
interpolated.append(repeated)
else:
interpolated.append(ec[:16])
char_energy_curve = np.mean(interpolated, axis=0).tolist()
else:
char_energy_curve = [0.5] * 16
# Roles más usados
role_counts = Counter(m.role for m in matches)
preferred_roles = [role for role, _ in role_counts.most_common()]
# Top matches por rol
top_by_role: Dict[str, List[Dict]] = {}
for role in SAMPLE_ROLES:
role_matches = [m for m in matches if m.role == role][:10]
if role_matches:
top_by_role[role] = [
{
"path": m.path,
"name": m.name,
"similarity_score": m.similarity_score,
"bpm": m.fingerprint.bpm,
"key": m.fingerprint.key
}
for m in role_matches
]
# Crear perfil
profile = UserSoundProfile(
preferred_bpm=preferred_bpm,
preferred_key=preferred_key,
preferred_timbre=preferred_timbre,
characteristic_energy_curve=char_energy_curve,
preferred_roles=preferred_roles,
created_from_reference=self.reference_path,
total_matches_analyzed=len(matches),
genre="reggaeton",
top_matches_by_role=top_by_role
)
self._profile = profile
if save:
self._save_profile(profile)
logger.info("Perfil generado - BPM: %.1f, Key: %s, Roles: %s",
preferred_bpm, preferred_key, preferred_roles[:5])
return profile
def _save_profile(self, profile: UserSoundProfile) -> bool:
"""Guarda el perfil en disco."""
try:
profile_data = profile.to_dict()
with open(self.profile_path, 'w', encoding='utf-8') as f:
json.dump(profile_data, f, indent=2, ensure_ascii=False)
logger.info("Perfil guardado en: %s", self.profile_path)
return True
except Exception as e:
logger.error("Error guardando perfil: %s", e)
return False
def load_profile(self) -> Optional[UserSoundProfile]:
"""
Carga el perfil desde disco.
Returns:
UserSoundProfile o None si no existe
"""
if not os.path.exists(self.profile_path):
logger.info("No existe perfil guardado en: %s", self.profile_path)
return None
try:
with open(self.profile_path, 'r', encoding='utf-8') as f:
data = json.load(f)
self._profile = UserSoundProfile.from_dict(data)
logger.info("Perfil cargado desde: %s", self.profile_path)
return self._profile
except Exception as e:
logger.error("Error cargando perfil: %s", e)
return None
def get_user_profile(self) -> UserSoundProfile:
"""
Obtiene el perfil del usuario, cargándolo o generándolo si no existe.
Returns:
UserSoundProfile del usuario
"""
# Intentar cargar
profile = self.load_profile()
if profile:
self._profile = profile
return profile
# Generar nuevo
logger.info("Generando nuevo perfil de usuario...")
return self.generate_user_profile()
def get_recommended_samples(self,
role: str,
count: int = 5,
bpm_tolerance: float = 5.0) -> List[Dict[str, Any]]:
"""
Retorna samples recomendados basados en el perfil del usuario.
Args:
role: Rol del sample deseado (kick, snare, bass, etc.)
count: Número de samples a retornar
bpm_tolerance: Tolerancia de BPM para filtrar
Returns:
Lista de diccionarios con información de samples recomendados
"""
# Asegurar que tenemos perfil
if not self._profile:
self.get_user_profile()
profile = self._profile
if not profile:
logger.warning("No se pudo obtener perfil, usando recomendaciones genéricas")
# Fallback: buscar similares sin perfil
matches = self.find_similar_samples(top_k=count * 3, role_filter=role)
return [
{
"path": m.path,
"name": m.name,
"role": m.role,
"similarity_score": m.similarity_score,
"bpm": m.fingerprint.bpm,
"key": m.fingerprint.key,
"reason": "Similitud directa con referencia"
}
for m in matches[:count]
]
# Buscar en top_matches_by_role del perfil
if role in profile.top_matches_by_role:
matches = profile.top_matches_by_role[role]
# Filtrar por BPM dentro de tolerancia
filtered = [
m for m in matches
if abs(m.get("bpm", 0) - profile.preferred_bpm) <= bpm_tolerance
]
# Si no hay suficientes con BPM cercano, usar todos
if len(filtered) < count:
filtered = matches
recommendations = filtered[:count]
return [
{
"path": r["path"],
"name": r["name"],
"role": role,
"similarity_score": r["similarity_score"],
"bpm": r.get("bpm", 0),
"key": r.get("key", ""),
"reason": f"Match con perfil (Key: {profile.preferred_key}, BPM: {profile.preferred_bpm:.1f})"
}
for r in recommendations
]
# Si no hay matches en el perfil para este rol, buscar en tiempo real
logger.info("No hay matches en perfil para '%s', buscando en librería...", role)
matches = self.find_similar_samples(top_k=count * 2, role_filter=role)
return [
{
"path": m.path,
"name": m.name,
"role": m.role,
"similarity_score": m.similarity_score,
"bpm": m.fingerprint.bpm,
"key": m.fingerprint.key,
"reason": "Búsqueda en tiempo real"
}
for m in matches[:count]
]
def get_profile_summary(self) -> Dict[str, Any]:
"""
Retorna resumen del perfil para debugging/visualización.
Returns:
Diccionario con resumen del perfil
"""
if not self._profile:
self.get_user_profile()
if not self._profile:
return {"error": "No se pudo generar perfil"}
p = self._profile
return {
"preferred_bpm": round(p.preferred_bpm, 1),
"preferred_key": p.preferred_key,
"characteristic_energy_curve": [round(x, 3) for x in p.characteristic_energy_curve[:8]],
"preferred_roles": p.preferred_roles[:5],
"top_matches_by_role_count": {
role: len(matches)
for role, matches in p.top_matches_by_role.items()
},
"total_matches_analyzed": p.total_matches_analyzed,
"created_from": p.created_from_reference,
"genre": p.genre
}
# Funciones de conveniencia globales
_matcher: Optional[ReferenceMatcher] = None
def get_matcher(reference_path: Optional[str] = None,
library_path: Optional[str] = None) -> ReferenceMatcher:
"""Obtiene instancia global del matcher."""
global _matcher
if _matcher is None:
_matcher = ReferenceMatcher(reference_path, library_path)
return _matcher
def get_user_profile(reference_path: Optional[str] = None,
library_path: Optional[str] = None) -> Dict[str, Any]:
"""
Función principal: obtiene o genera el perfil del usuario.
Args:
reference_path: Ruta al archivo de referencia (opcional)
library_path: Ruta a la librería de samples (opcional)
Returns:
Diccionario con el perfil del usuario
"""
matcher = get_matcher(reference_path, library_path)
profile = matcher.get_user_profile()
return profile.to_dict()
def get_recommended_samples(role: str,
count: int = 5,
reference_path: Optional[str] = None,
library_path: Optional[str] = None) -> List[Dict[str, Any]]:
"""
Obtiene samples recomendados para un rol específico.
Args:
role: Rol del sample (kick, snare, bass, synth, etc.)
count: Número de samples a retornar
reference_path: Ruta al archivo de referencia (opcional)
library_path: Ruta a la librería (opcional)
Returns:
Lista de samples recomendados
"""
matcher = get_matcher(reference_path, library_path)
return matcher.get_recommended_samples(role, count)
def analyze_reference(file_path: str) -> Optional[Dict[str, Any]]:
"""
Analiza un archivo de referencia y retorna su fingerprint.
Args:
file_path: Ruta al archivo de audio
Returns:
Diccionario con el fingerprint o None si falla
"""
analyzer = AudioAnalyzer()
fingerprint = analyzer.analyze_file(file_path)
if fingerprint:
return fingerprint.to_dict()
return None
def refresh_profile() -> Dict[str, Any]:
"""
Fuerza la regeneración del perfil del usuario.
Returns:
Nuevo perfil generado
"""
global _matcher
_matcher = None # Reset para forzar regeneración
matcher = get_matcher()
profile = matcher.generate_user_profile(save=True)
return profile.to_dict()
if __name__ == "__main__":
# Test del módulo
logging.basicConfig(level=logging.INFO)
print("=" * 60)
print("Reference Matcher - Test")
print("=" * 60)
# Test 1: Analizar referencia
print("\n1. Analizando referencia...")
matcher = ReferenceMatcher()
ref_fp = matcher.analyze_reference()
if ref_fp:
print(f" BPM: {ref_fp.bpm}")
print(f" Key: {ref_fp.key}")
print(f" Duration: {ref_fp.duration:.2f}s")
# Test 2: Indexar librería
print("\n2. Indexando librería...")
library = matcher.index_library()
print(f" Samples indexados: {len(library)}")
# Test 3: Generar perfil
print("\n3. Generando perfil de usuario...")
profile = matcher.generate_user_profile(top_matches_count=30)
print(f" Preferred BPM: {profile.preferred_bpm:.1f}")
print(f" Preferred Key: {profile.preferred_key}")
print(f" Preferred Roles: {profile.preferred_roles[:3]}")
# Test 4: Recomendaciones
print("\n4. Obteniendo recomendaciones...")
for role in ["kick", "snare", "bass"]:
recs = matcher.get_recommended_samples(role, count=2)
print(f" {role}: {[r['name'] for r in recs]}")
print("\n" + "=" * 60)
print("Test completado!")
print("=" * 60)

View File

@@ -0,0 +1,699 @@
"""
Sample Selector - Intelligent sample selection with metadata store integration.
Indexes libreria/reggaeton and returns sample packs by genre with support for:
- Database-first queries with SQLite caching
- Graceful degradation when numpy is unavailable
- Hybrid analysis with automatic caching
Usage:
from engines.sample_selector import SampleSelector, get_selector
# With metadata store
selector = SampleSelector(metadata_store=store)
samples = selector.select_for_genre("reggaeton")
# Without numpy (database-only mode)
samples = selector.get_samples_without_numpy("kick", count=10)
"""
import json
import logging
import os
import random
from pathlib import Path
from typing import Optional, Dict, List, Any, Union
from dataclasses import dataclass, field
logger = logging.getLogger("SampleSelector")
# Senior Architecture: Check numpy availability
NUMPY_AVAILABLE = False
try:
import numpy as np
NUMPY_AVAILABLE = True
except ImportError:
pass
LIBROSA_AVAILABLE = False
try:
import librosa
LIBROSA_AVAILABLE = True
except ImportError:
pass
# Import new metadata store and abstract analyzer
from .metadata_store import SampleMetadataStore, SampleFeatures, create_metadata_store
from .abstract_analyzer import (
HybridExtractor,
DatabaseExtractor,
create_extractor
)
REGGAETON_DIR = Path(
r"C:\ProgramData\Ableton\Live 12 Suite\Resources\MIDI Remote Scripts\libreria\reggaeton"
)
_ROLE_MAP = {
"kick": ["kick"],
"snare": ["snare"],
"clap": ["snare", "clap"],
"hat_closed": ["hi-hat"],
"hat_open": ["hi-hat"],
"bass": ["bass"],
"synth": ["oneshots", "reggaeton 3"],
"fx": ["fx"],
"perc": ["perc loop", "hi-hat"],
}
@dataclass
class SampleInfo:
name: str
path: str
role: str
pack: str = ""
key: str = ""
bpm: float = 0.0
@classmethod
def from_sample_features(cls, features: SampleFeatures, role: str = "") -> "SampleInfo":
"""Create SampleInfo from SampleFeatures."""
return cls(
name=Path(features.path).name,
path=features.path,
role=role or (features.categories[0] if features.categories else "unknown"),
pack=Path(features.path).parent.name,
key=features.key or "",
bpm=features.bpm or 0.0
)
@dataclass
class DrumKit:
name: str
kick: Optional[SampleInfo] = None
snare: Optional[SampleInfo] = None
clap: Optional[SampleInfo] = None
hat_closed: Optional[SampleInfo] = None
hat_open: Optional[SampleInfo] = None
@dataclass
class InstrumentGroup:
genre: str
key: str
bpm: float
drums: Optional[DrumKit] = None
bass: List[SampleInfo] = field(default_factory=list)
synths: List[SampleInfo] = field(default_factory=list)
fx: List[SampleInfo] = field(default_factory=list)
def __post_init__(self):
if self.drums is None:
self.drums = DrumKit(name="%s Kit" % self.genre.title())
class SampleSelector:
"""
Intelligent sample selector with metadata store integration.
Supports two modes:
- Full mode (numpy available): Database + audio analysis with caching
- Database-only mode: SQLite queries without audio analysis
"""
def __init__(
self,
library_path: Optional[str] = None,
metadata_store: Optional[SampleMetadataStore] = None,
embedding_engine=None,
reference_matcher=None,
verbose: bool = False
):
"""
Initialize sample selector.
Args:
library_path: Path to sample library (default: libreria/reggaeton)
metadata_store: Optional metadata store instance
embedding_engine: Optional embedding engine for similarity search
reference_matcher: Optional reference matcher for style matching
verbose: Enable verbose logging
"""
self._library = Path(library_path) if library_path else REGGAETON_DIR
self._index: List[SampleInfo] = []
self._indexed = False
self.verbose = verbose
self.embedding_engine = embedding_engine
self.reference_matcher = reference_matcher
# Senior Architecture: Metadata store integration
if metadata_store is None and NUMPY_AVAILABLE:
# Only create metadata store if we can populate it
db_path = str(self._library.parent / "sample_metadata.db")
self.metadata_store = create_metadata_store(db_path)
if self.verbose:
logger.info(f"[SampleSelector] Created metadata store at {db_path}")
elif metadata_store is not None:
self.metadata_store = metadata_store
if self.verbose:
logger.info("[SampleSelector] Using provided metadata store")
else:
self.metadata_store = None
logger.warning("[SampleSelector] No metadata store available")
# Initialize extractor (Hybrid or Database-only based on numpy availability)
self.extractor = create_extractor(self.metadata_store, verbose=verbose)
# Track extraction mode
if metadata_store:
self._extraction_mode = "database_first"
self.extraction_mode = "database_first"
elif NUMPY_AVAILABLE and LIBROSA_AVAILABLE:
self._extraction_mode = "full_analysis"
self.extraction_mode = "full_analysis"
else:
self._extraction_mode = "limited"
self.extraction_mode = "limited"
if verbose:
logger.info(f"[SampleSelector] Mode: {self.extraction_mode}")
if not NUMPY_AVAILABLE:
logger.warning("[SampleSelector] Running in DATABASE-ONLY mode (numpy unavailable)")
elif not LIBROSA_AVAILABLE:
logger.warning("[SampleSelector] Running in LIMITED mode (librosa unavailable)")
else:
logger.info("[SampleSelector] Running in FULL mode (numpy + librosa available)")
def _build_index(self):
"""Build index from filesystem."""
if self._indexed:
return
self._index = []
if not self._library.is_dir():
logger.warning("Library not found: %s", self._library)
return
for root, _dirs, files in os.walk(self._library):
for f in files:
if f.lower().endswith((".wav", ".aif", ".aiff", ".mp3", ".flac")):
fpath = os.path.join(root, f)
rel = os.path.relpath(root, str(self._library))
pack = rel.split(os.sep)[0] if rel else "unknown"
role = self._guess_role(f, rel)
self._index.append(SampleInfo(
name=f, path=fpath, role=role, pack=pack
))
self._indexed = True
logger.info("Indexed %d samples from %s", len(self._index), self._library)
def _guess_role(self, filename: str, relpath: str) -> str:
"""Guess sample role from filename and path."""
lower = filename.lower()
rel = relpath.lower()
if "kick" in lower or "kick" in rel:
return "kick"
if "snare" in lower or "snare" in rel:
return "snare"
if "clap" in lower:
return "clap"
if "hi-hat" in rel or "hihat" in lower:
return "hat_closed"
if "bass" in lower or "bass" in rel:
return "bass"
if "fx" in lower or "fx" in rel:
return "fx"
if "perc" in lower or "perc" in rel:
return "perc"
if "drumloop" in rel:
return "drum_loop"
return "synth"
def _get_samples(self, role: str, limit: int = 10) -> List[SampleInfo]:
"""Get samples by role from filesystem index."""
self._build_index()
dirs = _ROLE_MAP.get(role, [])
results = [s for s in self._index if s.role == role or s.pack in dirs]
return results[:limit]
def select_samples_db_only(self, role, count=10, bpm_range=None, key=None):
"""Select samples using only database (no numpy/librosa).
Args:
role: Sample role (kick, snare, bass, etc.)
count: Number of samples to return
bpm_range: Optional (min, max) BPM range
key: Optional musical key
Returns:
List of SampleInfo objects from database
"""
if not self.metadata_store:
logger.error("Metadata store not available")
return []
# Query database for samples matching criteria
features_list = self.metadata_store.get_samples_by_category(role)
# Filter by BPM range if specified
if bpm_range and len(bpm_range) == 2:
min_bpm, max_bpm = bpm_range
features_list = [
f for f in features_list
if min_bpm <= f.bpm <= max_bpm
]
# Filter by key if specified
if key:
features_list = [
f for f in features_list
if f.key == key
]
# Convert to SampleInfo
results = []
for features in features_list[:count]:
info = SampleInfo(
path=features.path,
name=os.path.basename(features.path),
role=role,
pack=os.path.basename(os.path.dirname(features.path)),
key=features.key or "",
bpm=features.bpm or 0.0
)
results.append(info)
return results
def _get_samples_librosa(self, role: str, count: int = 10, **kwargs) -> List[SampleInfo]:
"""Get samples using librosa audio analysis.
This method requires numpy and librosa for audio feature extraction.
Used as fallback when database has no cached samples.
Args:
role: Sample role (kick, snare, bass, etc.)
count: Number of samples to return
**kwargs: Additional filter parameters (target_bpm, target_key, etc.)
Returns:
List of SampleInfo objects from audio analysis
"""
if not NUMPY_AVAILABLE or not LIBROSA_AVAILABLE:
logger.error("Librosa analysis requested but numpy/librosa not available")
return []
# Get filesystem samples for this role
fs_samples = self._get_samples(role, count * 2)
results = []
target_bpm = kwargs.get('target_bpm')
target_key = kwargs.get('target_key')
for sample in fs_samples:
try:
# Analyze audio with librosa
features = self.extractor.extract(sample.path)
if features:
# Filter by BPM if specified
if target_bpm and features.bpm:
if abs(features.bpm - target_bpm) > 10:
continue
# Filter by key if specified
if target_key and features.key:
if features.key != target_key:
continue
sample_info = SampleInfo.from_sample_features(features, role=role)
results.append(sample_info)
else:
# Analysis failed, use filesystem sample with basic info
results.append(sample)
except Exception as e:
logger.warning(f"[SampleSelector] Librosa analysis failed for {sample.path}: {e}")
results.append(sample)
if len(results) >= count:
break
return results[:count]
def get_samples_without_numpy(self, role: str, count: int = 10) -> List[SampleInfo]:
"""
Get samples using only SQLite database, no audio analysis.
This method works entirely without numpy/librosa by querying
the pre-populated metadata database.
Args:
role: Sample role (kick, snare, bass, etc.)
count: Number of samples to return
Returns:
List of SampleInfo objects from database
"""
logger.info(f"[SampleSelector] Database-only query for role: {role}")
# Map role to database category
categories = _ROLE_MAP.get(role, [role])
results = []
# Search database for each category
for category in categories:
db_results = self.metadata_store.search_samples(
category=category,
limit=count
)
for features in db_results:
sample_info = SampleInfo.from_sample_features(features, role=role)
results.append(sample_info)
if len(results) >= count:
break
# If no database results, fall back to filesystem
if not results:
logger.warning(f"[SampleSelector] No database results for {role}, using filesystem fallback")
return self._get_samples(role, count)
logger.info(f"[SampleSelector] Found {len(results[:count])} samples for {role} (database-only)")
return results[:count]
def select_by_similarity(self, reference_path: str, top_n: int = 10) -> InstrumentGroup:
"""Select samples similar to a reference audio file."""
try:
# Import here to avoid circular dependencies
from . import embedding_engine as ee
# Find similar samples using embeddings
similar = ee.find_similar(reference_path, top_n=top_n * 3)
if not similar:
logger.warning("No similar samples found for %s, falling back to random", reference_path)
return self.select_for_genre("reggaeton")
# Build index if not already done
self._build_index()
# Get reference features using extractor (database-first, then analysis)
ref_features = self.extractor.get_features(reference_path)
ref_bpm = ref_features.get("bpm", 95.0) if ref_features else 95.0
ref_key = ref_features.get("key", "Am") if ref_features else "Am"
group = InstrumentGroup(genre="similar_to_reference", key=ref_key, bpm=ref_bpm)
# Filter similar samples by role
kick_samples = [s for s in similar if s.role == "kick"][:3]
snare_samples = [s for s in similar if s.role in ("snare", "clap")][:3]
hat_samples = [s for s in similar if s.role in ("hat_closed", "hat_open")][:3]
bass_samples = [s for s in similar if s.role == "bass"][:5]
synth_samples = [s for s in similar if s.role in ("synth", "oneshot")][:5]
fx_samples = [s for s in similar if s.role == "fx"][:3]
# Build drum kit
group.drums = DrumKit(
name="Similar Kit",
kick=kick_samples[0] if kick_samples else None,
snare=snare_samples[0] if snare_samples else None,
clap=snare_samples[1] if len(snare_samples) > 1 else None,
hat_closed=hat_samples[0] if hat_samples else None,
hat_open=hat_samples[1] if len(hat_samples) > 1 else None,
)
# Fill other instruments
group.bass = bass_samples
group.synths = synth_samples
group.fx = fx_samples
logger.info("Selected %d similar samples for reference: %s",
len([x for x in [group.drums.kick, group.drums.snare] + group.bass + group.synths + group.fx if x]),
reference_path)
return group
except Exception as e:
logger.error("Error in select_by_similarity: %s", str(e))
return self.select_for_genre("reggaeton")
def select_for_genre(
self,
genre: str,
key: Optional[str] = None,
bpm: Optional[float] = None
) -> InstrumentGroup:
"""
Select a complete sample pack for the given genre.
Uses database-first approach: queries SQLite for cached samples,
only analyzing new samples if numpy is available.
Args:
genre: Genre to select samples for
key: Musical key (default: Am)
bpm: Tempo in BPM (default: 95.0)
Returns:
InstrumentGroup with selected samples
"""
self._build_index()
if not self._index:
raise ValueError("No samples found in %s" % self._library)
group = InstrumentGroup(genre=genre, key=key or "Am", bpm=bpm or 95.0)
# Try database-first for each role, fallback to filesystem
if isinstance(self.extractor, DatabaseExtractor) or not NUMPY_AVAILABLE:
# Database-only mode
logger.info("[SampleSelector] Using database-only selection")
kick = self.get_samples_without_numpy("kick", 3)
snare = self.get_samples_without_numpy("snare", 3)
clap = self.get_samples_without_numpy("clap", 2)
hats = self.get_samples_without_numpy("hat_closed", 4)
bass = self.get_samples_without_numpy("bass", 5)
synths = self.get_samples_without_numpy("synth", 5)
fx = self.get_samples_without_numpy("fx", 3)
else:
# Hybrid mode: database first, then analyze uncached samples
logger.info("[SampleSelector] Using hybrid selection (database + analysis)")
kick = self._get_samples_hybrid("kick", 3)
snare = self._get_samples_hybrid("snare", 3)
clap = self._get_samples_hybrid("clap", 2)
hats = self._get_samples_hybrid("hat_closed", 4)
bass = self._get_samples_hybrid("bass", 5)
synths = self._get_samples_hybrid("synth", 5)
fx = self._get_samples_hybrid("fx", 3)
# Build drum kit
group.drums = DrumKit(
name="%s Kit" % genre.title(),
kick=kick[0] if kick else None,
snare=snare[0] if snare else None,
clap=clap[0] if clap else (snare[1] if len(snare) > 1 else None),
hat_closed=hats[0] if hats else None,
hat_open=hats[1] if len(hats) > 1 else None,
)
# Fill other instruments
group.bass = bass
group.synths = synths
group.fx = fx
return group
def _get_samples_hybrid(self, role: str, count: int) -> List[SampleInfo]:
"""
Get samples using hybrid approach: database first, analyze if needed.
Args:
role: Sample role
count: Number of samples needed
Returns:
List of SampleInfo objects
"""
results = []
# Get filesystem samples for this role
fs_samples = self._get_samples(role, count * 2)
for sample in fs_samples:
# Try database first
db_features = self.metadata_store.get_sample_features(sample.path)
if db_features:
# Cache hit - use database result
sample_info = SampleInfo.from_sample_features(db_features, role=role)
results.append(sample_info)
elif NUMPY_AVAILABLE and LIBROSA_AVAILABLE:
# Cache miss - analyze and cache
try:
features = self.extractor.extract(sample.path)
if features:
sample_info = SampleInfo.from_sample_features(features, role=role)
results.append(sample_info)
else:
# Analysis failed, use filesystem sample
results.append(sample)
except Exception as e:
logger.warning(f"[SampleSelector] Analysis failed for {sample.path}: {e}")
results.append(sample)
else:
# No numpy available, use filesystem sample
results.append(sample)
if len(results) >= count:
break
return results[:count]
def get_recommended_samples(self, role, count=10, **kwargs):
"""Get recommended samples with database-first approach."""
# Try database first
if self.metadata_store:
target_bpm = kwargs.get('target_bpm')
target_key = kwargs.get('target_key')
bpm_range = None
if target_bpm:
bpm_range = (target_bpm - 5, target_bpm + 5)
db_results = self.select_samples_db_only(role, count, bpm_range=bpm_range, key=target_key)
if db_results:
logger.info(f"Retrieved {len(db_results)} samples from database")
return db_results
# Fall back to legacy analysis if numpy available
if NUMPY_AVAILABLE and LIBROSA_AVAILABLE:
logger.info("Using librosa analysis for samples")
return self._get_samples_librosa(role, count, **kwargs)
# Limited mode: return empty with warning
logger.warning("No metadata store and no numpy - cannot select samples")
return []
# Global instance
_selector: Optional[SampleSelector] = None
def get_selector(
library_path: Optional[str] = None,
metadata_store: Optional[SampleMetadataStore] = None
) -> SampleSelector:
"""
Get global SampleSelector instance.
Args:
library_path: Optional library path
metadata_store: Optional metadata store
Returns:
SampleSelector singleton
"""
global _selector
if _selector is None:
_selector = SampleSelector(library_path, metadata_store)
return _selector
def select_samples_for_track(
genre: str,
key: str = "",
bpm: float = 0,
metadata_store: Optional[SampleMetadataStore] = None
) -> InstrumentGroup:
"""
Convenience function: select samples for a genre.
Args:
genre: Genre to select
key: Musical key
bpm: Tempo in BPM
metadata_store: Optional metadata store
Returns:
InstrumentGroup with selected samples
"""
return get_selector(metadata_store=metadata_store).select_for_genre(
genre,
key if key else None,
bpm if bpm > 0 else None
)
def get_drum_kit(
genre: str = "reggaeton",
variation: str = "standard",
metadata_store: Optional[SampleMetadataStore] = None
) -> DrumKit:
"""
Get a drum kit for the genre.
Args:
genre: Genre for drum kit
variation: Kit variation style
metadata_store: Optional metadata store
Returns:
DrumKit with selected samples
"""
group = get_selector(metadata_store=metadata_store).select_for_genre(genre)
return group.drums
def get_recommended_samples(
role: str,
count: int = 5,
target_bpm: Optional[float] = None,
target_key: Optional[str] = None,
metadata_store: Optional[SampleMetadataStore] = None
) -> List[SampleInfo]:
"""
Get recommended samples for a role from metadata store.
Args:
role: Sample role/category
count: Number of samples
target_bpm: Optional BPM target
target_key: Optional key target
metadata_store: Optional metadata store
Returns:
List of recommended SampleInfo objects
"""
return get_selector(metadata_store=metadata_store).get_recommended_samples(
role=role,
count=count,
target_bpm=target_bpm,
target_key=target_key
)
def reset_cross_generation_memory():
"""Reset selection memory (compatibility stub)."""
pass
def get_extraction_mode() -> str:
"""
Get current extraction mode for debugging.
Returns:
Mode string: "full_analysis", "limited_analysis", "database_only", etc.
"""
selector = get_selector()
return selector.extraction_mode
def is_numpy_available() -> bool:
"""Check if numpy is available for analysis."""
return NUMPY_AVAILABLE
def is_librosa_available() -> bool:
"""Check if librosa is available for analysis."""
return LIBROSA_AVAILABLE

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff