feat: reggaeton production system with intelligent sample selection and FLP generation

This commit is contained in:
renato97
2026-05-02 21:40:18 -03:00
commit 4d941f3f90
62 changed files with 8656 additions and 0 deletions

330
src/selector/__init__.py Normal file
View File

@@ -0,0 +1,330 @@
"""Sample Selector — queries the forensic sample index by musical criteria.
Loads data/sample_index.json and provides scored, ranked queries:
- Role matching (exact)
- Key compatibility (exact, relative major/minor, dominant/subdominant)
- BPM tolerance (±5%, half/double time)
- Character similarity (grouped characters)
- Tonal/atonal filtering
Usage:
selector = SampleSelector()
results = selector.select(role="kick", bpm=95, limit=5)
results = selector.select(role="bass", key="Am", bpm=92, character="deep")
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Optional
from dataclasses import dataclass, field
# ---------------------------------------------------------------------------
# Key Compatibility
# ---------------------------------------------------------------------------
CIRCLE_OF_FIFTHS = ["C", "G", "D", "A", "E", "B", "F#", "C#", "G#", "D#", "A#", "F"]
# Relative major/minor pairs (each minor → its relative major)
RELATIVE_MAJOR = {
"Am": "C", "Em": "G", "Bm": "D", "F#m": "A", "C#m": "E",
"G#m": "B", "D#m": "F#", "A#m": "C#", "Fm": "G#", "Cm": "Eb",
"Gm": "Bb", "Dm": "F",
# Enharmonic equivalents
"Bbm": "Db", "Ebm": "Gb", "Abm": "B", "Bbm": "Cb",
}
# Build reverse: major → relative minor
RELATIVE_MINOR = {v: k for k, v in RELATIVE_MAJOR.items()}
# Dominant (V) and subdominant (IV) relationships
DOMINANT = {"C": "G", "G": "D", "D": "A", "A": "E", "E": "B", "B": "F#",
"F#": "C#", "C#": "G#", "G#": "D#", "D#": "A#", "A#": "F", "F": "C"}
SUBDOMINANT = {v: k for k, v in DOMINANT.items()}
# Character similarity groups
CHARACTER_GROUPS = [
{"warm", "soft", "lush"},
{"boomy", "deep", "dark"},
{"sharp", "crisp", "bright"},
{"aggressive", "tight"},
{"ethereal", "neutral"},
{"impact", "short"},
{"hollow", "full"},
]
# All roles the classifier produces
KNOWN_ROLES = {
"kick", "snare", "hihat", "bass", "lead", "pad", "pluck",
"vocal", "arp", "guitar", "keys", "synth", "brass",
"perc", "drumloop", "fx", "fill", "oneshot",
}
# Roles that are typically atonal (key doesn't matter)
ATONAL_ROLES = {"kick", "snare", "hihat", "perc", "fx", "fill", "oneshot"}
def _normalize_key(key: str) -> str:
"""Normalize key names: Eb→D#, Bb→A#, Db→C#, Gb→F#, Ab→G#."""
enharmonics = {"Eb": "D#", "Bb": "A#", "Db": "C#", "Gb": "F#", "Ab": "G#", "Cb": "B"}
return enharmonics.get(key, key)
def _key_compatibility(query_key: str, sample_key: str) -> float:
"""Score how compatible a sample's key is with the query key.
Returns:
1.0 = exact match
0.9 = same root, different mode (C ↔ Cm)
0.8 = relative major/minor (Am ↔ C)
0.7 = dominant/subdominant (C ↔ G or C ↔ F)
0.5 = compatible (nearby in circle of fifths)
0.0 = atonal or no match
"""
if query_key == "X" or sample_key == "X":
return 0.0 # Atonal, no key compatibility
q = _normalize_key(query_key)
s = _normalize_key(sample_key)
# Exact match
if q == s:
return 1.0
# Separate root and mode
q_root = q.rstrip("m")
q_minor = q.endswith("m")
s_root = s.rstrip("m")
s_minor = s.endswith("m")
# Same root, different mode (C ↔ Cm)
if q_root == s_root:
return 0.9
# Relative major/minor (Am ↔ C)
if q_minor and not s_minor:
rel = RELATIVE_MAJOR.get(q, "")
if s_root == _normalize_key(rel):
return 0.8
if not q_minor and s_minor:
rel = RELATIVE_MINOR.get(q, "")
if s_root == _normalize_key(rel.rstrip("m")):
return 0.8
# Dominant/subdominant
q_root_norm = _normalize_key(q_root)
s_root_norm = _normalize_key(s_root)
if DOMINANT.get(q_root_norm) == s_root_norm or SUBDOMINANT.get(q_root_norm) == s_root_norm:
return 0.7
# Circle of fifths proximity
try:
q_idx = CIRCLE_OF_FIFTHS.index(q_root_norm)
s_idx = CIRCLE_OF_FIFTHS.index(s_root_norm)
distance = min(abs(q_idx - s_idx), 12 - abs(q_idx - s_idx))
if distance <= 2:
return 0.5
except ValueError:
pass
return 0.3
def _bpm_compatibility(query_bpm: float, sample_bpm: float) -> float:
"""Score BPM compatibility. Handles half/double time."""
if query_bpm <= 0 or sample_bpm <= 0:
return 0.5 # Unknown BPM, neutral score
ratio = sample_bpm / query_bpm
tolerance = 0.05 # ±5%
# Direct match
if abs(ratio - 1.0) <= tolerance:
return 1.0
# Half time
if abs(ratio - 0.5) <= tolerance:
return 0.8
# Double time
if abs(ratio - 2.0) <= tolerance:
return 0.8
# Near match (±10%)
if abs(ratio - 1.0) <= 0.10:
return 0.6
return 0.3
def _character_compatibility(query_char: Optional[str], sample_char: str) -> float:
"""Score character compatibility using similarity groups."""
if not query_char:
return 0.5 # No preference
if query_char == sample_char:
return 1.0
# Check if in same group
for group in CHARACTER_GROUPS:
if query_char in group and sample_char in group:
return 0.7
return 0.3
@dataclass
class SampleMatch:
"""A scored sample match from the selector."""
score: float
sample: dict
score_breakdown: dict = field(default_factory=dict)
class SampleSelector:
"""Query the forensic sample index with musical criteria."""
def __init__(self, index_path: Optional[str] = None):
if index_path is None:
project = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
index_path = os.path.join(project, "data", "sample_index.json")
self.index_path = index_path
self._samples: list[dict] = []
self._by_role: dict[str, list[dict]] = {}
self._loaded = False
def _load(self):
"""Lazy-load the index."""
if self._loaded:
return
with open(self.index_path, "r", encoding="utf-8") as f:
data = json.load(f)
self._samples = [s for s in data.get("samples", []) if "error" not in s]
# Index by role for fast lookup
self._by_role = {}
for s in self._samples:
role = s.get("role", "unknown")
if role not in self._by_role:
self._by_role[role] = []
self._by_role[role].append(s)
self._loaded = True
def select(
self,
role: str,
key: Optional[str] = None,
bpm: Optional[float] = None,
character: Optional[str] = None,
is_tonal: Optional[bool] = None,
limit: int = 10,
path_prefix: Optional[str] = None,
) -> list[SampleMatch]:
"""Select samples matching criteria, ranked by compatibility score.
Args:
role: Required. Production role (kick, bass, lead, etc.)
key: Musical key for compatibility (e.g. "Am", "C")
bpm: Target BPM for tempo matching
character: Timbre character preference (e.g. "warm", "boomy")
is_tonal: Filter by tonal/atonal status
limit: Maximum results to return
path_prefix: Filter by file path prefix
Returns:
List of SampleMatch objects sorted by score (descending)
"""
self._load()
if role not in KNOWN_ROLES:
# Try fuzzy match
role_lower = role.lower()
for known in KNOWN_ROLES:
if known in role_lower:
role = known
break
candidates = self._by_role.get(role, [])
if not candidates:
return []
# Score each candidate
matches: list[SampleMatch] = []
for s in candidates:
# Path prefix filter
if path_prefix:
if path_prefix.lower() not in s.get("original_path", "").lower():
continue
# Tonal filter
if is_tonal is not None:
sample_tonal = s.get("musical", {}).get("is_tonal", False)
if sample_tonal != is_tonal:
continue
breakdown = {}
total = 0.0
# Role score (always 1.0 since we filtered by role)
breakdown["role"] = 1.0
total += 1.0
# Key compatibility
if key and role not in ATONAL_ROLES:
sample_key = s.get("musical", {}).get("key", "X")
kc = _key_compatibility(key, sample_key)
breakdown["key"] = kc
total += kc * 2.0 # Weight key heavily
else:
breakdown["key"] = 0.5
# BPM compatibility
if bpm:
sample_bpm = s.get("perceptual", {}).get("tempo", 0)
bc = _bpm_compatibility(bpm, sample_bpm)
breakdown["bpm"] = bc
total += bc * 1.5
else:
breakdown["bpm"] = 0.5
# Character compatibility
cc = _character_compatibility(character, s.get("character", ""))
breakdown["character"] = cc
total += cc * 0.5
# Duration preference: shorter samples get slight bonus for flexibility
dur = s.get("signal", {}).get("duration", 0)
if dur > 0 and dur < 5.0:
total += 0.1 # Short bonus
breakdown["duration"] = dur
matches.append(SampleMatch(
score=round(total, 4),
sample=s,
score_breakdown=breakdown,
))
# Sort by score descending
matches.sort(key=lambda m: m.score, reverse=True)
return matches[:limit]
def select_one(self, role: str, **kwargs) -> Optional[dict]:
"""Select the single best matching sample."""
results = self.select(role=role, limit=1, **kwargs)
return results[0].sample if results else None
def get_roles(self) -> list[str]:
"""Get all available roles and their counts."""
self._load()
return sorted(self._by_role.keys())
def get_stats(self) -> dict[str, int]:
"""Get count per role."""
self._load()
return {role: len(samples) for role, samples in sorted(self._by_role.items())}
def random_sample(self, role: str, **kwargs) -> Optional[dict]:
"""Select a random sample from the top candidates for variation."""
import random
results = self.select(role=role, limit=5, **kwargs)
if not results:
return None
return random.choice(results).sample