""" Sample Selector - Intelligent sample selection with metadata store integration. Indexes libreria/reggaeton and returns sample packs by genre with support for: - Database-first queries with SQLite caching - Graceful degradation when numpy is unavailable - Hybrid analysis with automatic caching Usage: from engines.sample_selector import SampleSelector, get_selector # With metadata store selector = SampleSelector(metadata_store=store) samples = selector.select_for_genre("reggaeton") # Without numpy (database-only mode) samples = selector.get_samples_without_numpy("kick", count=10) """ import json import logging import os import random from pathlib import Path from typing import Optional, Dict, List, Any, Union from dataclasses import dataclass, field logger = logging.getLogger("SampleSelector") # Senior Architecture: Check numpy availability NUMPY_AVAILABLE = False try: import numpy as np NUMPY_AVAILABLE = True except ImportError: pass LIBROSA_AVAILABLE = False try: import librosa LIBROSA_AVAILABLE = True except ImportError: pass # Import new metadata store and abstract analyzer from .metadata_store import SampleMetadataStore, SampleFeatures, create_metadata_store from .abstract_analyzer import ( HybridExtractor, DatabaseExtractor, create_extractor ) REGGAETON_DIR = Path( r"C:\ProgramData\Ableton\Live 12 Suite\Resources\MIDI Remote Scripts\libreria\reggaeton" ) _ROLE_MAP = { "kick": ["kick"], "snare": ["snare"], "clap": ["snare", "clap"], "hat_closed": ["hi-hat"], "hat_open": ["hi-hat"], "bass": ["bass"], "synth": ["oneshots", "reggaeton 3"], "fx": ["fx"], "perc": ["perc loop", "hi-hat"], } @dataclass class SampleInfo: name: str path: str role: str pack: str = "" key: str = "" bpm: float = 0.0 @classmethod def from_sample_features(cls, features: SampleFeatures, role: str = "") -> "SampleInfo": """Create SampleInfo from SampleFeatures.""" return cls( name=Path(features.path).name, path=features.path, role=role or (features.categories[0] if features.categories else "unknown"), pack=Path(features.path).parent.name, key=features.key or "", bpm=features.bpm or 0.0 ) @dataclass class DrumKit: name: str kick: Optional[SampleInfo] = None snare: Optional[SampleInfo] = None clap: Optional[SampleInfo] = None hat_closed: Optional[SampleInfo] = None hat_open: Optional[SampleInfo] = None @dataclass class InstrumentGroup: genre: str key: str bpm: float drums: Optional[DrumKit] = None bass: List[SampleInfo] = field(default_factory=list) synths: List[SampleInfo] = field(default_factory=list) fx: List[SampleInfo] = field(default_factory=list) def __post_init__(self): if self.drums is None: self.drums = DrumKit(name="%s Kit" % self.genre.title()) class SampleSelector: """ Intelligent sample selector with metadata store integration. Supports two modes: - Full mode (numpy available): Database + audio analysis with caching - Database-only mode: SQLite queries without audio analysis """ def __init__( self, library_path: Optional[str] = None, metadata_store: Optional[SampleMetadataStore] = None, embedding_engine=None, reference_matcher=None, verbose: bool = False ): """ Initialize sample selector. Args: library_path: Path to sample library (default: libreria/reggaeton) metadata_store: Optional metadata store instance embedding_engine: Optional embedding engine for similarity search reference_matcher: Optional reference matcher for style matching verbose: Enable verbose logging """ self._library = Path(library_path) if library_path else REGGAETON_DIR self._index: List[SampleInfo] = [] self._indexed = False self.verbose = verbose self.embedding_engine = embedding_engine self.reference_matcher = reference_matcher # Senior Architecture: Metadata store integration if metadata_store is None and NUMPY_AVAILABLE: # Only create metadata store if we can populate it db_path = str(self._library.parent / "sample_metadata.db") self.metadata_store = create_metadata_store(db_path) if self.verbose: logger.info(f"[SampleSelector] Created metadata store at {db_path}") elif metadata_store is not None: self.metadata_store = metadata_store if self.verbose: logger.info("[SampleSelector] Using provided metadata store") else: self.metadata_store = None logger.warning("[SampleSelector] No metadata store available") # Initialize extractor (Hybrid or Database-only based on numpy availability) self.extractor = create_extractor(self.metadata_store, verbose=verbose) # Track extraction mode if metadata_store: self._extraction_mode = "database_first" self.extraction_mode = "database_first" elif NUMPY_AVAILABLE and LIBROSA_AVAILABLE: self._extraction_mode = "full_analysis" self.extraction_mode = "full_analysis" else: self._extraction_mode = "limited" self.extraction_mode = "limited" if verbose: logger.info(f"[SampleSelector] Mode: {self.extraction_mode}") if not NUMPY_AVAILABLE: logger.warning("[SampleSelector] Running in DATABASE-ONLY mode (numpy unavailable)") elif not LIBROSA_AVAILABLE: logger.warning("[SampleSelector] Running in LIMITED mode (librosa unavailable)") else: logger.info("[SampleSelector] Running in FULL mode (numpy + librosa available)") def _build_index(self): """Build index from filesystem.""" if self._indexed: return self._index = [] if not self._library.is_dir(): logger.warning("Library not found: %s", self._library) return for root, _dirs, files in os.walk(self._library): for f in files: if f.lower().endswith((".wav", ".aif", ".aiff", ".mp3", ".flac")): fpath = os.path.join(root, f) rel = os.path.relpath(root, str(self._library)) pack = rel.split(os.sep)[0] if rel else "unknown" role = self._guess_role(f, rel) self._index.append(SampleInfo( name=f, path=fpath, role=role, pack=pack )) self._indexed = True logger.info("Indexed %d samples from %s", len(self._index), self._library) def _guess_role(self, filename: str, relpath: str) -> str: """Guess sample role from filename and path.""" lower = filename.lower() rel = relpath.lower() if "kick" in lower or "kick" in rel: return "kick" if "snare" in lower or "snare" in rel: return "snare" if "clap" in lower: return "clap" if "hi-hat" in rel or "hihat" in lower: return "hat_closed" if "bass" in lower or "bass" in rel: return "bass" if "fx" in lower or "fx" in rel: return "fx" if "perc" in lower or "perc" in rel: return "perc" if "drumloop" in rel: return "drum_loop" return "synth" def _get_samples(self, role: str, limit: int = 10) -> List[SampleInfo]: """Get samples by role from filesystem index.""" self._build_index() dirs = _ROLE_MAP.get(role, []) results = [s for s in self._index if s.role == role or s.pack in dirs] return results[:limit] def select_samples_db_only(self, role, count=10, bpm_range=None, key=None): """Select samples using only database (no numpy/librosa). Args: role: Sample role (kick, snare, bass, etc.) count: Number of samples to return bpm_range: Optional (min, max) BPM range key: Optional musical key Returns: List of SampleInfo objects from database """ if not self.metadata_store: logger.error("Metadata store not available") return [] # Query database for samples matching criteria features_list = self.metadata_store.get_samples_by_category(role) # Filter by BPM range if specified if bpm_range and len(bpm_range) == 2: min_bpm, max_bpm = bpm_range features_list = [ f for f in features_list if min_bpm <= f.bpm <= max_bpm ] # Filter by key if specified if key: features_list = [ f for f in features_list if f.key == key ] # Convert to SampleInfo results = [] for features in features_list[:count]: info = SampleInfo( path=features.path, name=os.path.basename(features.path), role=role, pack=os.path.basename(os.path.dirname(features.path)), key=features.key or "", bpm=features.bpm or 0.0 ) results.append(info) return results def _get_samples_librosa(self, role: str, count: int = 10, **kwargs) -> List[SampleInfo]: """Get samples using librosa audio analysis. This method requires numpy and librosa for audio feature extraction. Used as fallback when database has no cached samples. Args: role: Sample role (kick, snare, bass, etc.) count: Number of samples to return **kwargs: Additional filter parameters (target_bpm, target_key, etc.) Returns: List of SampleInfo objects from audio analysis """ if not NUMPY_AVAILABLE or not LIBROSA_AVAILABLE: logger.error("Librosa analysis requested but numpy/librosa not available") return [] # Get filesystem samples for this role fs_samples = self._get_samples(role, count * 2) results = [] target_bpm = kwargs.get('target_bpm') target_key = kwargs.get('target_key') for sample in fs_samples: try: # Analyze audio with librosa features = self.extractor.extract(sample.path) if features: # Filter by BPM if specified if target_bpm and features.bpm: if abs(features.bpm - target_bpm) > 10: continue # Filter by key if specified if target_key and features.key: if features.key != target_key: continue sample_info = SampleInfo.from_sample_features(features, role=role) results.append(sample_info) else: # Analysis failed, use filesystem sample with basic info results.append(sample) except Exception as e: logger.warning(f"[SampleSelector] Librosa analysis failed for {sample.path}: {e}") results.append(sample) if len(results) >= count: break return results[:count] def get_samples_without_numpy(self, role: str, count: int = 10) -> List[SampleInfo]: """ Get samples using only SQLite database, no audio analysis. This method works entirely without numpy/librosa by querying the pre-populated metadata database. Args: role: Sample role (kick, snare, bass, etc.) count: Number of samples to return Returns: List of SampleInfo objects from database """ logger.info(f"[SampleSelector] Database-only query for role: {role}") # Map role to database category categories = _ROLE_MAP.get(role, [role]) results = [] # Search database for each category for category in categories: db_results = self.metadata_store.search_samples( category=category, limit=count ) for features in db_results: sample_info = SampleInfo.from_sample_features(features, role=role) results.append(sample_info) if len(results) >= count: break # If no database results, fall back to filesystem if not results: logger.warning(f"[SampleSelector] No database results for {role}, using filesystem fallback") return self._get_samples(role, count) logger.info(f"[SampleSelector] Found {len(results[:count])} samples for {role} (database-only)") return results[:count] def select_by_similarity(self, reference_path: str, top_n: int = 10) -> InstrumentGroup: """Select samples similar to a reference audio file.""" try: # Import here to avoid circular dependencies from . import embedding_engine as ee # Find similar samples using embeddings similar = ee.find_similar(reference_path, top_n=top_n * 3) if not similar: logger.warning("No similar samples found for %s, falling back to random", reference_path) return self.select_for_genre("reggaeton") # Build index if not already done self._build_index() # Get reference features using extractor (database-first, then analysis) ref_features = self.extractor.get_features(reference_path) ref_bpm = ref_features.get("bpm", 95.0) if ref_features else 95.0 ref_key = ref_features.get("key", "Am") if ref_features else "Am" group = InstrumentGroup(genre="similar_to_reference", key=ref_key, bpm=ref_bpm) # Filter similar samples by role kick_samples = [s for s in similar if s.role == "kick"][:3] snare_samples = [s for s in similar if s.role in ("snare", "clap")][:3] hat_samples = [s for s in similar if s.role in ("hat_closed", "hat_open")][:3] bass_samples = [s for s in similar if s.role == "bass"][:5] synth_samples = [s for s in similar if s.role in ("synth", "oneshot")][:5] fx_samples = [s for s in similar if s.role == "fx"][:3] # Build drum kit group.drums = DrumKit( name="Similar Kit", kick=kick_samples[0] if kick_samples else None, snare=snare_samples[0] if snare_samples else None, clap=snare_samples[1] if len(snare_samples) > 1 else None, hat_closed=hat_samples[0] if hat_samples else None, hat_open=hat_samples[1] if len(hat_samples) > 1 else None, ) # Fill other instruments group.bass = bass_samples group.synths = synth_samples group.fx = fx_samples logger.info("Selected %d similar samples for reference: %s", len([x for x in [group.drums.kick, group.drums.snare] + group.bass + group.synths + group.fx if x]), reference_path) return group except Exception as e: logger.error("Error in select_by_similarity: %s", str(e)) return self.select_for_genre("reggaeton") def select_for_genre( self, genre: str, key: Optional[str] = None, bpm: Optional[float] = None ) -> InstrumentGroup: """ Select a complete sample pack for the given genre. Uses database-first approach: queries SQLite for cached samples, only analyzing new samples if numpy is available. Args: genre: Genre to select samples for key: Musical key (default: Am) bpm: Tempo in BPM (default: 95.0) Returns: InstrumentGroup with selected samples """ self._build_index() if not self._index: raise ValueError("No samples found in %s" % self._library) group = InstrumentGroup(genre=genre, key=key or "Am", bpm=bpm or 95.0) # Try database-first for each role, fallback to filesystem if isinstance(self.extractor, DatabaseExtractor) or not NUMPY_AVAILABLE: # Database-only mode logger.info("[SampleSelector] Using database-only selection") kick = self.get_samples_without_numpy("kick", 3) snare = self.get_samples_without_numpy("snare", 3) clap = self.get_samples_without_numpy("clap", 2) hats = self.get_samples_without_numpy("hat_closed", 4) bass = self.get_samples_without_numpy("bass", 5) synths = self.get_samples_without_numpy("synth", 5) fx = self.get_samples_without_numpy("fx", 3) else: # Hybrid mode: database first, then analyze uncached samples logger.info("[SampleSelector] Using hybrid selection (database + analysis)") kick = self._get_samples_hybrid("kick", 3) snare = self._get_samples_hybrid("snare", 3) clap = self._get_samples_hybrid("clap", 2) hats = self._get_samples_hybrid("hat_closed", 4) bass = self._get_samples_hybrid("bass", 5) synths = self._get_samples_hybrid("synth", 5) fx = self._get_samples_hybrid("fx", 3) # Build drum kit group.drums = DrumKit( name="%s Kit" % genre.title(), kick=kick[0] if kick else None, snare=snare[0] if snare else None, clap=clap[0] if clap else (snare[1] if len(snare) > 1 else None), hat_closed=hats[0] if hats else None, hat_open=hats[1] if len(hats) > 1 else None, ) # Fill other instruments group.bass = bass group.synths = synths group.fx = fx return group def _get_samples_hybrid(self, role: str, count: int) -> List[SampleInfo]: """ Get samples using hybrid approach: database first, analyze if needed. Args: role: Sample role count: Number of samples needed Returns: List of SampleInfo objects """ results = [] # Get filesystem samples for this role fs_samples = self._get_samples(role, count * 2) for sample in fs_samples: # Try database first db_features = self.metadata_store.get_sample_features(sample.path) if db_features: # Cache hit - use database result sample_info = SampleInfo.from_sample_features(db_features, role=role) results.append(sample_info) elif NUMPY_AVAILABLE and LIBROSA_AVAILABLE: # Cache miss - analyze and cache try: features = self.extractor.extract(sample.path) if features: sample_info = SampleInfo.from_sample_features(features, role=role) results.append(sample_info) else: # Analysis failed, use filesystem sample results.append(sample) except Exception as e: logger.warning(f"[SampleSelector] Analysis failed for {sample.path}: {e}") results.append(sample) else: # No numpy available, use filesystem sample results.append(sample) if len(results) >= count: break return results[:count] def get_recommended_samples(self, role, count=10, **kwargs): """Get recommended samples with database-first approach.""" # Try database first if self.metadata_store: target_bpm = kwargs.get('target_bpm') target_key = kwargs.get('target_key') bpm_range = None if target_bpm: bpm_range = (target_bpm - 5, target_bpm + 5) db_results = self.select_samples_db_only(role, count, bpm_range=bpm_range, key=target_key) if db_results: logger.info(f"Retrieved {len(db_results)} samples from database") return db_results # Fall back to legacy analysis if numpy available if NUMPY_AVAILABLE and LIBROSA_AVAILABLE: logger.info("Using librosa analysis for samples") return self._get_samples_librosa(role, count, **kwargs) # Limited mode: return empty with warning logger.warning("No metadata store and no numpy - cannot select samples") return [] # Global instance _selector: Optional[SampleSelector] = None def get_selector( library_path: Optional[str] = None, metadata_store: Optional[SampleMetadataStore] = None ) -> SampleSelector: """ Get global SampleSelector instance. Args: library_path: Optional library path metadata_store: Optional metadata store Returns: SampleSelector singleton """ global _selector if _selector is None: _selector = SampleSelector(library_path, metadata_store) return _selector def select_samples_for_track( genre: str, key: str = "", bpm: float = 0, metadata_store: Optional[SampleMetadataStore] = None ) -> InstrumentGroup: """ Convenience function: select samples for a genre. Args: genre: Genre to select key: Musical key bpm: Tempo in BPM metadata_store: Optional metadata store Returns: InstrumentGroup with selected samples """ return get_selector(metadata_store=metadata_store).select_for_genre( genre, key if key else None, bpm if bpm > 0 else None ) def get_drum_kit( genre: str = "reggaeton", variation: str = "standard", metadata_store: Optional[SampleMetadataStore] = None ) -> DrumKit: """ Get a drum kit for the genre. Args: genre: Genre for drum kit variation: Kit variation style metadata_store: Optional metadata store Returns: DrumKit with selected samples """ group = get_selector(metadata_store=metadata_store).select_for_genre(genre) return group.drums def get_recommended_samples( role: str, count: int = 5, target_bpm: Optional[float] = None, target_key: Optional[str] = None, metadata_store: Optional[SampleMetadataStore] = None ) -> List[SampleInfo]: """ Get recommended samples for a role from metadata store. Args: role: Sample role/category count: Number of samples target_bpm: Optional BPM target target_key: Optional key target metadata_store: Optional metadata store Returns: List of recommended SampleInfo objects """ return get_selector(metadata_store=metadata_store).get_recommended_samples( role=role, count=count, target_bpm=target_bpm, target_key=target_key ) def reset_cross_generation_memory(): """Reset selection memory (compatibility stub).""" pass def get_extraction_mode() -> str: """ Get current extraction mode for debugging. Returns: Mode string: "full_analysis", "limited_analysis", "database_only", etc. """ selector = get_selector() return selector.extraction_mode def is_numpy_available() -> bool: """Check if numpy is available for analysis.""" return NUMPY_AVAILABLE def is_librosa_available() -> bool: """Check if librosa is available for analysis.""" return LIBROSA_AVAILABLE