feat: reggaeton production system with intelligent sample selection and FLP generation
This commit is contained in:
330
src/selector/__init__.py
Normal file
330
src/selector/__init__.py
Normal file
@@ -0,0 +1,330 @@
|
||||
"""Sample Selector — queries the forensic sample index by musical criteria.
|
||||
|
||||
Loads data/sample_index.json and provides scored, ranked queries:
|
||||
- Role matching (exact)
|
||||
- Key compatibility (exact, relative major/minor, dominant/subdominant)
|
||||
- BPM tolerance (±5%, half/double time)
|
||||
- Character similarity (grouped characters)
|
||||
- Tonal/atonal filtering
|
||||
|
||||
Usage:
|
||||
selector = SampleSelector()
|
||||
results = selector.select(role="kick", bpm=95, limit=5)
|
||||
results = selector.select(role="bass", key="Am", bpm=92, character="deep")
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Key Compatibility
|
||||
# ---------------------------------------------------------------------------
|
||||
CIRCLE_OF_FIFTHS = ["C", "G", "D", "A", "E", "B", "F#", "C#", "G#", "D#", "A#", "F"]
|
||||
|
||||
# Relative major/minor pairs (each minor → its relative major)
|
||||
RELATIVE_MAJOR = {
|
||||
"Am": "C", "Em": "G", "Bm": "D", "F#m": "A", "C#m": "E",
|
||||
"G#m": "B", "D#m": "F#", "A#m": "C#", "Fm": "G#", "Cm": "Eb",
|
||||
"Gm": "Bb", "Dm": "F",
|
||||
# Enharmonic equivalents
|
||||
"Bbm": "Db", "Ebm": "Gb", "Abm": "B", "Bbm": "Cb",
|
||||
}
|
||||
|
||||
# Build reverse: major → relative minor
|
||||
RELATIVE_MINOR = {v: k for k, v in RELATIVE_MAJOR.items()}
|
||||
|
||||
# Dominant (V) and subdominant (IV) relationships
|
||||
DOMINANT = {"C": "G", "G": "D", "D": "A", "A": "E", "E": "B", "B": "F#",
|
||||
"F#": "C#", "C#": "G#", "G#": "D#", "D#": "A#", "A#": "F", "F": "C"}
|
||||
SUBDOMINANT = {v: k for k, v in DOMINANT.items()}
|
||||
|
||||
# Character similarity groups
|
||||
CHARACTER_GROUPS = [
|
||||
{"warm", "soft", "lush"},
|
||||
{"boomy", "deep", "dark"},
|
||||
{"sharp", "crisp", "bright"},
|
||||
{"aggressive", "tight"},
|
||||
{"ethereal", "neutral"},
|
||||
{"impact", "short"},
|
||||
{"hollow", "full"},
|
||||
]
|
||||
|
||||
# All roles the classifier produces
|
||||
KNOWN_ROLES = {
|
||||
"kick", "snare", "hihat", "bass", "lead", "pad", "pluck",
|
||||
"vocal", "arp", "guitar", "keys", "synth", "brass",
|
||||
"perc", "drumloop", "fx", "fill", "oneshot",
|
||||
}
|
||||
|
||||
# Roles that are typically atonal (key doesn't matter)
|
||||
ATONAL_ROLES = {"kick", "snare", "hihat", "perc", "fx", "fill", "oneshot"}
|
||||
|
||||
|
||||
def _normalize_key(key: str) -> str:
|
||||
"""Normalize key names: Eb→D#, Bb→A#, Db→C#, Gb→F#, Ab→G#."""
|
||||
enharmonics = {"Eb": "D#", "Bb": "A#", "Db": "C#", "Gb": "F#", "Ab": "G#", "Cb": "B"}
|
||||
return enharmonics.get(key, key)
|
||||
|
||||
|
||||
def _key_compatibility(query_key: str, sample_key: str) -> float:
|
||||
"""Score how compatible a sample's key is with the query key.
|
||||
|
||||
Returns:
|
||||
1.0 = exact match
|
||||
0.9 = same root, different mode (C ↔ Cm)
|
||||
0.8 = relative major/minor (Am ↔ C)
|
||||
0.7 = dominant/subdominant (C ↔ G or C ↔ F)
|
||||
0.5 = compatible (nearby in circle of fifths)
|
||||
0.0 = atonal or no match
|
||||
"""
|
||||
if query_key == "X" or sample_key == "X":
|
||||
return 0.0 # Atonal, no key compatibility
|
||||
|
||||
q = _normalize_key(query_key)
|
||||
s = _normalize_key(sample_key)
|
||||
|
||||
# Exact match
|
||||
if q == s:
|
||||
return 1.0
|
||||
|
||||
# Separate root and mode
|
||||
q_root = q.rstrip("m")
|
||||
q_minor = q.endswith("m")
|
||||
s_root = s.rstrip("m")
|
||||
s_minor = s.endswith("m")
|
||||
|
||||
# Same root, different mode (C ↔ Cm)
|
||||
if q_root == s_root:
|
||||
return 0.9
|
||||
|
||||
# Relative major/minor (Am ↔ C)
|
||||
if q_minor and not s_minor:
|
||||
rel = RELATIVE_MAJOR.get(q, "")
|
||||
if s_root == _normalize_key(rel):
|
||||
return 0.8
|
||||
if not q_minor and s_minor:
|
||||
rel = RELATIVE_MINOR.get(q, "")
|
||||
if s_root == _normalize_key(rel.rstrip("m")):
|
||||
return 0.8
|
||||
|
||||
# Dominant/subdominant
|
||||
q_root_norm = _normalize_key(q_root)
|
||||
s_root_norm = _normalize_key(s_root)
|
||||
if DOMINANT.get(q_root_norm) == s_root_norm or SUBDOMINANT.get(q_root_norm) == s_root_norm:
|
||||
return 0.7
|
||||
|
||||
# Circle of fifths proximity
|
||||
try:
|
||||
q_idx = CIRCLE_OF_FIFTHS.index(q_root_norm)
|
||||
s_idx = CIRCLE_OF_FIFTHS.index(s_root_norm)
|
||||
distance = min(abs(q_idx - s_idx), 12 - abs(q_idx - s_idx))
|
||||
if distance <= 2:
|
||||
return 0.5
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return 0.3
|
||||
|
||||
|
||||
def _bpm_compatibility(query_bpm: float, sample_bpm: float) -> float:
|
||||
"""Score BPM compatibility. Handles half/double time."""
|
||||
if query_bpm <= 0 or sample_bpm <= 0:
|
||||
return 0.5 # Unknown BPM, neutral score
|
||||
|
||||
ratio = sample_bpm / query_bpm
|
||||
tolerance = 0.05 # ±5%
|
||||
|
||||
# Direct match
|
||||
if abs(ratio - 1.0) <= tolerance:
|
||||
return 1.0
|
||||
# Half time
|
||||
if abs(ratio - 0.5) <= tolerance:
|
||||
return 0.8
|
||||
# Double time
|
||||
if abs(ratio - 2.0) <= tolerance:
|
||||
return 0.8
|
||||
# Near match (±10%)
|
||||
if abs(ratio - 1.0) <= 0.10:
|
||||
return 0.6
|
||||
|
||||
return 0.3
|
||||
|
||||
|
||||
def _character_compatibility(query_char: Optional[str], sample_char: str) -> float:
|
||||
"""Score character compatibility using similarity groups."""
|
||||
if not query_char:
|
||||
return 0.5 # No preference
|
||||
if query_char == sample_char:
|
||||
return 1.0
|
||||
|
||||
# Check if in same group
|
||||
for group in CHARACTER_GROUPS:
|
||||
if query_char in group and sample_char in group:
|
||||
return 0.7
|
||||
|
||||
return 0.3
|
||||
|
||||
|
||||
@dataclass
|
||||
class SampleMatch:
|
||||
"""A scored sample match from the selector."""
|
||||
score: float
|
||||
sample: dict
|
||||
score_breakdown: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
class SampleSelector:
|
||||
"""Query the forensic sample index with musical criteria."""
|
||||
|
||||
def __init__(self, index_path: Optional[str] = None):
|
||||
if index_path is None:
|
||||
project = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
index_path = os.path.join(project, "data", "sample_index.json")
|
||||
|
||||
self.index_path = index_path
|
||||
self._samples: list[dict] = []
|
||||
self._by_role: dict[str, list[dict]] = {}
|
||||
self._loaded = False
|
||||
|
||||
def _load(self):
|
||||
"""Lazy-load the index."""
|
||||
if self._loaded:
|
||||
return
|
||||
with open(self.index_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
self._samples = [s for s in data.get("samples", []) if "error" not in s]
|
||||
|
||||
# Index by role for fast lookup
|
||||
self._by_role = {}
|
||||
for s in self._samples:
|
||||
role = s.get("role", "unknown")
|
||||
if role not in self._by_role:
|
||||
self._by_role[role] = []
|
||||
self._by_role[role].append(s)
|
||||
self._loaded = True
|
||||
|
||||
def select(
|
||||
self,
|
||||
role: str,
|
||||
key: Optional[str] = None,
|
||||
bpm: Optional[float] = None,
|
||||
character: Optional[str] = None,
|
||||
is_tonal: Optional[bool] = None,
|
||||
limit: int = 10,
|
||||
path_prefix: Optional[str] = None,
|
||||
) -> list[SampleMatch]:
|
||||
"""Select samples matching criteria, ranked by compatibility score.
|
||||
|
||||
Args:
|
||||
role: Required. Production role (kick, bass, lead, etc.)
|
||||
key: Musical key for compatibility (e.g. "Am", "C")
|
||||
bpm: Target BPM for tempo matching
|
||||
character: Timbre character preference (e.g. "warm", "boomy")
|
||||
is_tonal: Filter by tonal/atonal status
|
||||
limit: Maximum results to return
|
||||
path_prefix: Filter by file path prefix
|
||||
|
||||
Returns:
|
||||
List of SampleMatch objects sorted by score (descending)
|
||||
"""
|
||||
self._load()
|
||||
|
||||
if role not in KNOWN_ROLES:
|
||||
# Try fuzzy match
|
||||
role_lower = role.lower()
|
||||
for known in KNOWN_ROLES:
|
||||
if known in role_lower:
|
||||
role = known
|
||||
break
|
||||
|
||||
candidates = self._by_role.get(role, [])
|
||||
if not candidates:
|
||||
return []
|
||||
|
||||
# Score each candidate
|
||||
matches: list[SampleMatch] = []
|
||||
for s in candidates:
|
||||
# Path prefix filter
|
||||
if path_prefix:
|
||||
if path_prefix.lower() not in s.get("original_path", "").lower():
|
||||
continue
|
||||
|
||||
# Tonal filter
|
||||
if is_tonal is not None:
|
||||
sample_tonal = s.get("musical", {}).get("is_tonal", False)
|
||||
if sample_tonal != is_tonal:
|
||||
continue
|
||||
|
||||
breakdown = {}
|
||||
total = 0.0
|
||||
|
||||
# Role score (always 1.0 since we filtered by role)
|
||||
breakdown["role"] = 1.0
|
||||
total += 1.0
|
||||
|
||||
# Key compatibility
|
||||
if key and role not in ATONAL_ROLES:
|
||||
sample_key = s.get("musical", {}).get("key", "X")
|
||||
kc = _key_compatibility(key, sample_key)
|
||||
breakdown["key"] = kc
|
||||
total += kc * 2.0 # Weight key heavily
|
||||
else:
|
||||
breakdown["key"] = 0.5
|
||||
|
||||
# BPM compatibility
|
||||
if bpm:
|
||||
sample_bpm = s.get("perceptual", {}).get("tempo", 0)
|
||||
bc = _bpm_compatibility(bpm, sample_bpm)
|
||||
breakdown["bpm"] = bc
|
||||
total += bc * 1.5
|
||||
else:
|
||||
breakdown["bpm"] = 0.5
|
||||
|
||||
# Character compatibility
|
||||
cc = _character_compatibility(character, s.get("character", ""))
|
||||
breakdown["character"] = cc
|
||||
total += cc * 0.5
|
||||
|
||||
# Duration preference: shorter samples get slight bonus for flexibility
|
||||
dur = s.get("signal", {}).get("duration", 0)
|
||||
if dur > 0 and dur < 5.0:
|
||||
total += 0.1 # Short bonus
|
||||
breakdown["duration"] = dur
|
||||
|
||||
matches.append(SampleMatch(
|
||||
score=round(total, 4),
|
||||
sample=s,
|
||||
score_breakdown=breakdown,
|
||||
))
|
||||
|
||||
# Sort by score descending
|
||||
matches.sort(key=lambda m: m.score, reverse=True)
|
||||
return matches[:limit]
|
||||
|
||||
def select_one(self, role: str, **kwargs) -> Optional[dict]:
|
||||
"""Select the single best matching sample."""
|
||||
results = self.select(role=role, limit=1, **kwargs)
|
||||
return results[0].sample if results else None
|
||||
|
||||
def get_roles(self) -> list[str]:
|
||||
"""Get all available roles and their counts."""
|
||||
self._load()
|
||||
return sorted(self._by_role.keys())
|
||||
|
||||
def get_stats(self) -> dict[str, int]:
|
||||
"""Get count per role."""
|
||||
self._load()
|
||||
return {role: len(samples) for role, samples in sorted(self._by_role.items())}
|
||||
|
||||
def random_sample(self, role: str, **kwargs) -> Optional[dict]:
|
||||
"""Select a random sample from the top candidates for variation."""
|
||||
import random
|
||||
results = self.select(role=role, limit=5, **kwargs)
|
||||
if not results:
|
||||
return None
|
||||
return random.choice(results).sample
|
||||
Reference in New Issue
Block a user