Files
ableton-mcp-ai/mcp_server/engines/sample_selector.py
OpenCode Agent 5ce8187c65 feat: Implement senior audio injection with 5 fallback methods
- Add _cmd_create_arrangement_audio_pattern with 5-method fallback chain
- Method 1: track.insert_arrangement_clip() [Live 12+]
- Method 2: track.create_audio_clip() [Live 11+]
- Method 3: arrangement_clips.add_new_clip() [Live 12+]
- Method 4: Session->duplicate_clip_to_arrangement [Legacy]
- Method 5: Session->Recording [Universal]

- Add _cmd_duplicate_clip_to_arrangement for session-to-arrangement workflow
- Update skills documentation
- Verified: 3 clips created at positions [0, 4, 8] in Arrangement View

Closes: Audio injection in Arrangement View
2026-04-12 14:02:32 -03:00

700 lines
24 KiB
Python

"""
Sample Selector - Intelligent sample selection with metadata store integration.
Indexes libreria/reggaeton and returns sample packs by genre with support for:
- Database-first queries with SQLite caching
- Graceful degradation when numpy is unavailable
- Hybrid analysis with automatic caching
Usage:
from engines.sample_selector import SampleSelector, get_selector
# With metadata store
selector = SampleSelector(metadata_store=store)
samples = selector.select_for_genre("reggaeton")
# Without numpy (database-only mode)
samples = selector.get_samples_without_numpy("kick", count=10)
"""
import json
import logging
import os
import random
from pathlib import Path
from typing import Optional, Dict, List, Any, Union
from dataclasses import dataclass, field
logger = logging.getLogger("SampleSelector")
# Senior Architecture: Check numpy availability
NUMPY_AVAILABLE = False
try:
import numpy as np
NUMPY_AVAILABLE = True
except ImportError:
pass
LIBROSA_AVAILABLE = False
try:
import librosa
LIBROSA_AVAILABLE = True
except ImportError:
pass
# Import new metadata store and abstract analyzer
from .metadata_store import SampleMetadataStore, SampleFeatures, create_metadata_store
from .abstract_analyzer import (
HybridExtractor,
DatabaseExtractor,
create_extractor
)
REGGAETON_DIR = Path(
r"C:\ProgramData\Ableton\Live 12 Suite\Resources\MIDI Remote Scripts\libreria\reggaeton"
)
_ROLE_MAP = {
"kick": ["kick"],
"snare": ["snare"],
"clap": ["snare", "clap"],
"hat_closed": ["hi-hat"],
"hat_open": ["hi-hat"],
"bass": ["bass"],
"synth": ["oneshots", "reggaeton 3"],
"fx": ["fx"],
"perc": ["perc loop", "hi-hat"],
}
@dataclass
class SampleInfo:
name: str
path: str
role: str
pack: str = ""
key: str = ""
bpm: float = 0.0
@classmethod
def from_sample_features(cls, features: SampleFeatures, role: str = "") -> "SampleInfo":
"""Create SampleInfo from SampleFeatures."""
return cls(
name=Path(features.path).name,
path=features.path,
role=role or (features.categories[0] if features.categories else "unknown"),
pack=Path(features.path).parent.name,
key=features.key or "",
bpm=features.bpm or 0.0
)
@dataclass
class DrumKit:
name: str
kick: Optional[SampleInfo] = None
snare: Optional[SampleInfo] = None
clap: Optional[SampleInfo] = None
hat_closed: Optional[SampleInfo] = None
hat_open: Optional[SampleInfo] = None
@dataclass
class InstrumentGroup:
genre: str
key: str
bpm: float
drums: Optional[DrumKit] = None
bass: List[SampleInfo] = field(default_factory=list)
synths: List[SampleInfo] = field(default_factory=list)
fx: List[SampleInfo] = field(default_factory=list)
def __post_init__(self):
if self.drums is None:
self.drums = DrumKit(name="%s Kit" % self.genre.title())
class SampleSelector:
"""
Intelligent sample selector with metadata store integration.
Supports two modes:
- Full mode (numpy available): Database + audio analysis with caching
- Database-only mode: SQLite queries without audio analysis
"""
def __init__(
self,
library_path: Optional[str] = None,
metadata_store: Optional[SampleMetadataStore] = None,
embedding_engine=None,
reference_matcher=None,
verbose: bool = False
):
"""
Initialize sample selector.
Args:
library_path: Path to sample library (default: libreria/reggaeton)
metadata_store: Optional metadata store instance
embedding_engine: Optional embedding engine for similarity search
reference_matcher: Optional reference matcher for style matching
verbose: Enable verbose logging
"""
self._library = Path(library_path) if library_path else REGGAETON_DIR
self._index: List[SampleInfo] = []
self._indexed = False
self.verbose = verbose
self.embedding_engine = embedding_engine
self.reference_matcher = reference_matcher
# Senior Architecture: Metadata store integration
if metadata_store is None and NUMPY_AVAILABLE:
# Only create metadata store if we can populate it
db_path = str(self._library.parent / "sample_metadata.db")
self.metadata_store = create_metadata_store(db_path)
if self.verbose:
logger.info(f"[SampleSelector] Created metadata store at {db_path}")
elif metadata_store is not None:
self.metadata_store = metadata_store
if self.verbose:
logger.info("[SampleSelector] Using provided metadata store")
else:
self.metadata_store = None
logger.warning("[SampleSelector] No metadata store available")
# Initialize extractor (Hybrid or Database-only based on numpy availability)
self.extractor = create_extractor(self.metadata_store, verbose=verbose)
# Track extraction mode
if metadata_store:
self._extraction_mode = "database_first"
self.extraction_mode = "database_first"
elif NUMPY_AVAILABLE and LIBROSA_AVAILABLE:
self._extraction_mode = "full_analysis"
self.extraction_mode = "full_analysis"
else:
self._extraction_mode = "limited"
self.extraction_mode = "limited"
if verbose:
logger.info(f"[SampleSelector] Mode: {self.extraction_mode}")
if not NUMPY_AVAILABLE:
logger.warning("[SampleSelector] Running in DATABASE-ONLY mode (numpy unavailable)")
elif not LIBROSA_AVAILABLE:
logger.warning("[SampleSelector] Running in LIMITED mode (librosa unavailable)")
else:
logger.info("[SampleSelector] Running in FULL mode (numpy + librosa available)")
def _build_index(self):
"""Build index from filesystem."""
if self._indexed:
return
self._index = []
if not self._library.is_dir():
logger.warning("Library not found: %s", self._library)
return
for root, _dirs, files in os.walk(self._library):
for f in files:
if f.lower().endswith((".wav", ".aif", ".aiff", ".mp3", ".flac")):
fpath = os.path.join(root, f)
rel = os.path.relpath(root, str(self._library))
pack = rel.split(os.sep)[0] if rel else "unknown"
role = self._guess_role(f, rel)
self._index.append(SampleInfo(
name=f, path=fpath, role=role, pack=pack
))
self._indexed = True
logger.info("Indexed %d samples from %s", len(self._index), self._library)
def _guess_role(self, filename: str, relpath: str) -> str:
"""Guess sample role from filename and path."""
lower = filename.lower()
rel = relpath.lower()
if "kick" in lower or "kick" in rel:
return "kick"
if "snare" in lower or "snare" in rel:
return "snare"
if "clap" in lower:
return "clap"
if "hi-hat" in rel or "hihat" in lower:
return "hat_closed"
if "bass" in lower or "bass" in rel:
return "bass"
if "fx" in lower or "fx" in rel:
return "fx"
if "perc" in lower or "perc" in rel:
return "perc"
if "drumloop" in rel:
return "drum_loop"
return "synth"
def _get_samples(self, role: str, limit: int = 10) -> List[SampleInfo]:
"""Get samples by role from filesystem index."""
self._build_index()
dirs = _ROLE_MAP.get(role, [])
results = [s for s in self._index if s.role == role or s.pack in dirs]
return results[:limit]
def select_samples_db_only(self, role, count=10, bpm_range=None, key=None):
"""Select samples using only database (no numpy/librosa).
Args:
role: Sample role (kick, snare, bass, etc.)
count: Number of samples to return
bpm_range: Optional (min, max) BPM range
key: Optional musical key
Returns:
List of SampleInfo objects from database
"""
if not self.metadata_store:
logger.error("Metadata store not available")
return []
# Query database for samples matching criteria
features_list = self.metadata_store.get_samples_by_category(role)
# Filter by BPM range if specified
if bpm_range and len(bpm_range) == 2:
min_bpm, max_bpm = bpm_range
features_list = [
f for f in features_list
if min_bpm <= f.bpm <= max_bpm
]
# Filter by key if specified
if key:
features_list = [
f for f in features_list
if f.key == key
]
# Convert to SampleInfo
results = []
for features in features_list[:count]:
info = SampleInfo(
path=features.path,
name=os.path.basename(features.path),
role=role,
pack=os.path.basename(os.path.dirname(features.path)),
key=features.key or "",
bpm=features.bpm or 0.0
)
results.append(info)
return results
def _get_samples_librosa(self, role: str, count: int = 10, **kwargs) -> List[SampleInfo]:
"""Get samples using librosa audio analysis.
This method requires numpy and librosa for audio feature extraction.
Used as fallback when database has no cached samples.
Args:
role: Sample role (kick, snare, bass, etc.)
count: Number of samples to return
**kwargs: Additional filter parameters (target_bpm, target_key, etc.)
Returns:
List of SampleInfo objects from audio analysis
"""
if not NUMPY_AVAILABLE or not LIBROSA_AVAILABLE:
logger.error("Librosa analysis requested but numpy/librosa not available")
return []
# Get filesystem samples for this role
fs_samples = self._get_samples(role, count * 2)
results = []
target_bpm = kwargs.get('target_bpm')
target_key = kwargs.get('target_key')
for sample in fs_samples:
try:
# Analyze audio with librosa
features = self.extractor.extract(sample.path)
if features:
# Filter by BPM if specified
if target_bpm and features.bpm:
if abs(features.bpm - target_bpm) > 10:
continue
# Filter by key if specified
if target_key and features.key:
if features.key != target_key:
continue
sample_info = SampleInfo.from_sample_features(features, role=role)
results.append(sample_info)
else:
# Analysis failed, use filesystem sample with basic info
results.append(sample)
except Exception as e:
logger.warning(f"[SampleSelector] Librosa analysis failed for {sample.path}: {e}")
results.append(sample)
if len(results) >= count:
break
return results[:count]
def get_samples_without_numpy(self, role: str, count: int = 10) -> List[SampleInfo]:
"""
Get samples using only SQLite database, no audio analysis.
This method works entirely without numpy/librosa by querying
the pre-populated metadata database.
Args:
role: Sample role (kick, snare, bass, etc.)
count: Number of samples to return
Returns:
List of SampleInfo objects from database
"""
logger.info(f"[SampleSelector] Database-only query for role: {role}")
# Map role to database category
categories = _ROLE_MAP.get(role, [role])
results = []
# Search database for each category
for category in categories:
db_results = self.metadata_store.search_samples(
category=category,
limit=count
)
for features in db_results:
sample_info = SampleInfo.from_sample_features(features, role=role)
results.append(sample_info)
if len(results) >= count:
break
# If no database results, fall back to filesystem
if not results:
logger.warning(f"[SampleSelector] No database results for {role}, using filesystem fallback")
return self._get_samples(role, count)
logger.info(f"[SampleSelector] Found {len(results[:count])} samples for {role} (database-only)")
return results[:count]
def select_by_similarity(self, reference_path: str, top_n: int = 10) -> InstrumentGroup:
"""Select samples similar to a reference audio file."""
try:
# Import here to avoid circular dependencies
from . import embedding_engine as ee
# Find similar samples using embeddings
similar = ee.find_similar(reference_path, top_n=top_n * 3)
if not similar:
logger.warning("No similar samples found for %s, falling back to random", reference_path)
return self.select_for_genre("reggaeton")
# Build index if not already done
self._build_index()
# Get reference features using extractor (database-first, then analysis)
ref_features = self.extractor.get_features(reference_path)
ref_bpm = ref_features.get("bpm", 95.0) if ref_features else 95.0
ref_key = ref_features.get("key", "Am") if ref_features else "Am"
group = InstrumentGroup(genre="similar_to_reference", key=ref_key, bpm=ref_bpm)
# Filter similar samples by role
kick_samples = [s for s in similar if s.role == "kick"][:3]
snare_samples = [s for s in similar if s.role in ("snare", "clap")][:3]
hat_samples = [s for s in similar if s.role in ("hat_closed", "hat_open")][:3]
bass_samples = [s for s in similar if s.role == "bass"][:5]
synth_samples = [s for s in similar if s.role in ("synth", "oneshot")][:5]
fx_samples = [s for s in similar if s.role == "fx"][:3]
# Build drum kit
group.drums = DrumKit(
name="Similar Kit",
kick=kick_samples[0] if kick_samples else None,
snare=snare_samples[0] if snare_samples else None,
clap=snare_samples[1] if len(snare_samples) > 1 else None,
hat_closed=hat_samples[0] if hat_samples else None,
hat_open=hat_samples[1] if len(hat_samples) > 1 else None,
)
# Fill other instruments
group.bass = bass_samples
group.synths = synth_samples
group.fx = fx_samples
logger.info("Selected %d similar samples for reference: %s",
len([x for x in [group.drums.kick, group.drums.snare] + group.bass + group.synths + group.fx if x]),
reference_path)
return group
except Exception as e:
logger.error("Error in select_by_similarity: %s", str(e))
return self.select_for_genre("reggaeton")
def select_for_genre(
self,
genre: str,
key: Optional[str] = None,
bpm: Optional[float] = None
) -> InstrumentGroup:
"""
Select a complete sample pack for the given genre.
Uses database-first approach: queries SQLite for cached samples,
only analyzing new samples if numpy is available.
Args:
genre: Genre to select samples for
key: Musical key (default: Am)
bpm: Tempo in BPM (default: 95.0)
Returns:
InstrumentGroup with selected samples
"""
self._build_index()
if not self._index:
raise ValueError("No samples found in %s" % self._library)
group = InstrumentGroup(genre=genre, key=key or "Am", bpm=bpm or 95.0)
# Try database-first for each role, fallback to filesystem
if isinstance(self.extractor, DatabaseExtractor) or not NUMPY_AVAILABLE:
# Database-only mode
logger.info("[SampleSelector] Using database-only selection")
kick = self.get_samples_without_numpy("kick", 3)
snare = self.get_samples_without_numpy("snare", 3)
clap = self.get_samples_without_numpy("clap", 2)
hats = self.get_samples_without_numpy("hat_closed", 4)
bass = self.get_samples_without_numpy("bass", 5)
synths = self.get_samples_without_numpy("synth", 5)
fx = self.get_samples_without_numpy("fx", 3)
else:
# Hybrid mode: database first, then analyze uncached samples
logger.info("[SampleSelector] Using hybrid selection (database + analysis)")
kick = self._get_samples_hybrid("kick", 3)
snare = self._get_samples_hybrid("snare", 3)
clap = self._get_samples_hybrid("clap", 2)
hats = self._get_samples_hybrid("hat_closed", 4)
bass = self._get_samples_hybrid("bass", 5)
synths = self._get_samples_hybrid("synth", 5)
fx = self._get_samples_hybrid("fx", 3)
# Build drum kit
group.drums = DrumKit(
name="%s Kit" % genre.title(),
kick=kick[0] if kick else None,
snare=snare[0] if snare else None,
clap=clap[0] if clap else (snare[1] if len(snare) > 1 else None),
hat_closed=hats[0] if hats else None,
hat_open=hats[1] if len(hats) > 1 else None,
)
# Fill other instruments
group.bass = bass
group.synths = synths
group.fx = fx
return group
def _get_samples_hybrid(self, role: str, count: int) -> List[SampleInfo]:
"""
Get samples using hybrid approach: database first, analyze if needed.
Args:
role: Sample role
count: Number of samples needed
Returns:
List of SampleInfo objects
"""
results = []
# Get filesystem samples for this role
fs_samples = self._get_samples(role, count * 2)
for sample in fs_samples:
# Try database first
db_features = self.metadata_store.get_sample_features(sample.path)
if db_features:
# Cache hit - use database result
sample_info = SampleInfo.from_sample_features(db_features, role=role)
results.append(sample_info)
elif NUMPY_AVAILABLE and LIBROSA_AVAILABLE:
# Cache miss - analyze and cache
try:
features = self.extractor.extract(sample.path)
if features:
sample_info = SampleInfo.from_sample_features(features, role=role)
results.append(sample_info)
else:
# Analysis failed, use filesystem sample
results.append(sample)
except Exception as e:
logger.warning(f"[SampleSelector] Analysis failed for {sample.path}: {e}")
results.append(sample)
else:
# No numpy available, use filesystem sample
results.append(sample)
if len(results) >= count:
break
return results[:count]
def get_recommended_samples(self, role, count=10, **kwargs):
"""Get recommended samples with database-first approach."""
# Try database first
if self.metadata_store:
target_bpm = kwargs.get('target_bpm')
target_key = kwargs.get('target_key')
bpm_range = None
if target_bpm:
bpm_range = (target_bpm - 5, target_bpm + 5)
db_results = self.select_samples_db_only(role, count, bpm_range=bpm_range, key=target_key)
if db_results:
logger.info(f"Retrieved {len(db_results)} samples from database")
return db_results
# Fall back to legacy analysis if numpy available
if NUMPY_AVAILABLE and LIBROSA_AVAILABLE:
logger.info("Using librosa analysis for samples")
return self._get_samples_librosa(role, count, **kwargs)
# Limited mode: return empty with warning
logger.warning("No metadata store and no numpy - cannot select samples")
return []
# Global instance
_selector: Optional[SampleSelector] = None
def get_selector(
library_path: Optional[str] = None,
metadata_store: Optional[SampleMetadataStore] = None
) -> SampleSelector:
"""
Get global SampleSelector instance.
Args:
library_path: Optional library path
metadata_store: Optional metadata store
Returns:
SampleSelector singleton
"""
global _selector
if _selector is None:
_selector = SampleSelector(library_path, metadata_store)
return _selector
def select_samples_for_track(
genre: str,
key: str = "",
bpm: float = 0,
metadata_store: Optional[SampleMetadataStore] = None
) -> InstrumentGroup:
"""
Convenience function: select samples for a genre.
Args:
genre: Genre to select
key: Musical key
bpm: Tempo in BPM
metadata_store: Optional metadata store
Returns:
InstrumentGroup with selected samples
"""
return get_selector(metadata_store=metadata_store).select_for_genre(
genre,
key if key else None,
bpm if bpm > 0 else None
)
def get_drum_kit(
genre: str = "reggaeton",
variation: str = "standard",
metadata_store: Optional[SampleMetadataStore] = None
) -> DrumKit:
"""
Get a drum kit for the genre.
Args:
genre: Genre for drum kit
variation: Kit variation style
metadata_store: Optional metadata store
Returns:
DrumKit with selected samples
"""
group = get_selector(metadata_store=metadata_store).select_for_genre(genre)
return group.drums
def get_recommended_samples(
role: str,
count: int = 5,
target_bpm: Optional[float] = None,
target_key: Optional[str] = None,
metadata_store: Optional[SampleMetadataStore] = None
) -> List[SampleInfo]:
"""
Get recommended samples for a role from metadata store.
Args:
role: Sample role/category
count: Number of samples
target_bpm: Optional BPM target
target_key: Optional key target
metadata_store: Optional metadata store
Returns:
List of recommended SampleInfo objects
"""
return get_selector(metadata_store=metadata_store).get_recommended_samples(
role=role,
count=count,
target_bpm=target_bpm,
target_key=target_key
)
def reset_cross_generation_memory():
"""Reset selection memory (compatibility stub)."""
pass
def get_extraction_mode() -> str:
"""
Get current extraction mode for debugging.
Returns:
Mode string: "full_analysis", "limited_analysis", "database_only", etc.
"""
selector = get_selector()
return selector.extraction_mode
def is_numpy_available() -> bool:
"""Check if numpy is available for analysis."""
return NUMPY_AVAILABLE
def is_librosa_available() -> bool:
"""Check if librosa is available for analysis."""
return LIBROSA_AVAILABLE