feat: reggaeton production system with intelligent sample selection and FLP generation

2026-05-02 21:40:18 -03:00
commit 4d941f3f90
62 changed files with 8656 additions and 0 deletions
--- a/src/analyzer/init.py
+++ b/src/analyzer/init.py
@@ -0,0 +1,827 @@
+"""Deep forensic audio sample analyzer.
+
+4-layer analysis pipeline:
+  Layer 1 - Signal:    FFT, spectral centroid, bandwidth, rolloff, flatness, ZCR, RMS, crest factor
+  Layer 2 - Perceptual: MFCC (20), chromagram (12), onset envelope, tempo, LUFS
+  Layer 3 - Musical:   Key estimation (Krumhansl-Schmuckler), F0 via aubio (C-native), tonal/atonal
+  Layer 4 - Timbre:    Mel band stats, spectral contrast, tonnetz
+
+Architecture: ProcessPoolExecutor with 16 workers for TRUE multi-core parallelism.
+              aubio for F0 (C-native, ~1ms per file vs pyin ~2s per file).
+"""
+from __future__ import annotations
+
+import os
+import json
+import hashlib
+from pathlib import Path
+from typing import Optional
+from concurrent.futures import ProcessPoolExecutor, as_completed
+
+import numpy as np
+import librosa
+import soundfile as sf
+import aubio
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+SAMPLE_RATE = 44100
+HOP_LENGTH = 512
+N_FFT = 2048
+N_MFCC = 20
+N_CHROMA = 12
+MAX_WORKERS = 16  # 70% of 24 cores
+
+AUDIO_EXT = {".wav", ".flac", ".mp3", ".aif", ".aiff"}
+
+# Krumhansl-Schmuckler key profiles
+MAJOR_PROFILE = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
+MINOR_PROFILE = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
+NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
+
+# Character classification thresholds
+CHARACTERS = {
+    "boomy":   {"low_ratio_min": 0.6, "centroid_max": 400},
+    "deep":    {"low_ratio_min": 0.5, "centroid_max": 500, "fundamental_max": 150},
+    "sharp":   {"high_ratio_min": 0.4, "centroid_min": 3000, "attack_max": 0.005},
+    "crisp":   {"high_ratio_min": 0.3, "centroid_min": 4000, "duration_max": 0.2},
+    "warm":    {"centroid_min": 300, "centroid_max": 2000, "mid_ratio_min": 0.4},
+    "bright":  {"centroid_min": 3000, "high_ratio_min": 0.3},
+    "dark":    {"centroid_max": 800, "low_ratio_min": 0.4},
+    "ethereal": {"centroid_min": 1500, "centroid_max": 5000, "rms_std_max": 0.03},
+    "short":   {"duration_max": 0.15},
+    "impact":  {"attack_max": 0.005, "peak_rms_ratio_min": 5.0},
+    "full":    {"duration_min": 1.0, "bandwidth_min": 4000},
+    "hollow":  {"mid_ratio_max": 0.2, "low_ratio_min": 0.3, "high_ratio_min": 0.3},
+    "tight":   {"attack_max": 0.003, "duration_max": 0.3, "centroid_min": 1000},
+    "lush":    {"spectral_flatness_min": 0.1, "mid_ratio_min": 0.3, "duration_min": 0.5},
+    "aggressive": {"peak_rms_ratio_min": 4.0, "centroid_min": 2000},
+    "soft":    {"peak_rms_ratio_max": 3.0, "attack_min": 0.01},
+}
+
+
+# ---------------------------------------------------------------------------
+# Layer 1: Signal Analysis
+# ---------------------------------------------------------------------------
+def analyze_signal(y: np.ndarray, sr: int) -> dict:
+    """Layer 1: Time-domain and spectral signal features."""
+    duration = len(y) / sr
+    rms = librosa.feature.rms(y=y, hop_length=HOP_LENGTH)[0]
+    rms_mean = float(np.mean(rms))
+    rms_std = float(np.std(rms))
+    peak = float(np.max(np.abs(y)))
+    crest_factor = peak / (rms_mean + 1e-10)
+    peak_rms_ratio = float(np.max(rms) / (np.mean(rms) + 1e-10))
+    zcr = librosa.feature.zero_crossing_rate(y, hop_length=HOP_LENGTH)[0]
+    zcr_mean = float(np.mean(zcr))
+
+    S = np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH))
+    S_power = S ** 2
+    spectral_centroid = librosa.feature.spectral_centroid(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
+    spectral_bandwidth = librosa.feature.spectral_bandwidth(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
+    spectral_rolloff = librosa.feature.spectral_rolloff(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
+    spectral_flatness = librosa.feature.spectral_flatness(S=S_power)[0]
+
+    freqs = librosa.fft_frequencies(sr=sr, n_fft=N_FFT)
+    low_mask = freqs < 300
+    mid_mask = (freqs >= 300) & (freqs < 3000)
+    high_mask = freqs >= 3000
+    band_energy = np.mean(S_power, axis=1)
+    total_energy = np.sum(band_energy) + 1e-10
+    low_ratio = float(np.sum(band_energy[low_mask]) / total_energy)
+    mid_ratio = float(np.sum(band_energy[mid_mask]) / total_energy)
+    high_ratio = float(np.sum(band_energy[high_mask]) / total_energy)
+
+    rms_peak_idx = int(np.argmax(rms))
+    attack_time = float(rms_peak_idx * HOP_LENGTH / sr)
+
+    return {
+        "duration": round(duration, 4),
+        "rms_mean": round(rms_mean, 6),
+        "rms_std": round(rms_std, 6),
+        "peak_amplitude": round(peak, 6),
+        "crest_factor": round(crest_factor, 2),
+        "peak_rms_ratio": round(peak_rms_ratio, 2),
+        "zcr_mean": round(zcr_mean, 4),
+        "spectral_centroid_mean": round(float(np.mean(spectral_centroid)), 2),
+        "spectral_centroid_std": round(float(np.std(spectral_centroid)), 2),
+        "spectral_centroid_max": round(float(np.max(spectral_centroid)), 2),
+        "spectral_bandwidth_mean": round(float(np.mean(spectral_bandwidth)), 2),
+        "spectral_rolloff_mean": round(float(np.mean(spectral_rolloff)), 2),
+        "spectral_flatness_mean": round(float(np.mean(spectral_flatness)), 6),
+        "low_energy_ratio": round(low_ratio, 4),
+        "mid_energy_ratio": round(mid_ratio, 4),
+        "high_energy_ratio": round(high_ratio, 4),
+        "attack_time": round(attack_time, 4),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Layer 2: Perceptual Analysis
+# ---------------------------------------------------------------------------
+def analyze_perceptual(y: np.ndarray, sr: int) -> dict:
+    """Layer 2: MFCC, chromagram, onset, tempo."""
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC, hop_length=HOP_LENGTH)
+    mfcc_means = [round(float(np.mean(mfcc[i])), 4) for i in range(N_MFCC)]
+    mfcc_stds = [round(float(np.std(mfcc[i])), 4) for i in range(N_MFCC)]
+
+    chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=HOP_LENGTH)
+    chroma_mean = np.mean(chroma, axis=1)
+
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=HOP_LENGTH)
+    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, hop_length=HOP_LENGTH)
+    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=HOP_LENGTH)
+    onset_count = len(onset_times)
+
+    tempo = 0.0
+    if len(onset_env) > 0:
+        tempo_vals = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=HOP_LENGTH)
+        if len(tempo_vals) > 0:
+            tempo = float(tempo_vals[0])
+
+    lufs = _compute_lufs(y, sr)
+
+    return {
+        "mfcc_means": mfcc_means,
+        "mfcc_stds": mfcc_stds,
+        "chroma_mean": [round(float(v), 4) for v in chroma_mean],
+        "onset_count": onset_count,
+        "onset_density": round(onset_count / max(len(y) / sr, 0.01), 2),
+        "tempo": round(tempo, 2),
+        "lufs": round(lufs, 2),
+    }
+
+
+def _compute_lufs(y: np.ndarray, sr: int) -> float:
+    """Simplified LUFS (integrated loudness) approximation."""
+    try:
+        from scipy.signal import butter, sosfilt
+        sos_hp = butter(2, 60, btype='high', fs=sr, output='sos')
+        y_filtered = sosfilt(sos_hp, y)
+        sos_hs = butter(1, 1500, btype='high', fs=sr, output='sos')
+        y_filtered = sosfilt(sos_hs, y_filtered)
+
+        block_size = int(0.4 * sr)
+        hop = int(0.1 * sr)
+        if len(y_filtered) < block_size:
+            block_size = len(y_filtered)
+            hop = max(1, block_size // 4)
+
+        blocks = []
+        for i in range(0, len(y_filtered) - block_size + 1, hop):
+            block = y_filtered[i:i + block_size]
+            rms = np.sqrt(np.mean(block ** 2))
+            if rms > 1e-10:
+                blocks.append(rms)
+
+        if not blocks:
+            return -70.0
+
+        mean_rms = np.mean(blocks)
+        lufs = -0.691 + 10 * np.log10(mean_rms ** 2 + 1e-20)
+        return max(lufs, -70.0)
+    except Exception:
+        return -70.0
+
+
+# ---------------------------------------------------------------------------
+# F0 Detection via aubio (C-native, ~1ms per file)
+# ---------------------------------------------------------------------------
+def _fast_f0(y: np.ndarray, sr: int) -> float:
+    """Estimate fundamental frequency using aubio's YIN implementation.
+    This is C-native code running at ~1ms per file, vs librosa.pyin at ~2s."""
+    try:
+        # aubio pitch detector
+        win_s = N_FFT
+        hop_s = HOP_LENGTH
+        pitch_o = aubio.pitch("yin", win_s, hop_s, sr)
+        pitch_o.set_unit("Hz")
+        pitch_o.set_tolerance(0.8)  # confidence threshold
+
+        # Process in chunks
+        pitches = []
+        for i in range(0, len(y) - win_s + 1, hop_s):
+            chunk = y[i:i + hop_s].astype(np.float32)
+            if len(chunk) < hop_s:
+                break
+            freq = pitch_o(chunk)
+            if freq[0] > 0:
+                pitches.append(float(freq[0]))
+
+        if pitches:
+            return float(np.median(pitches))
+        return 0.0
+    except Exception:
+        return 0.0
+
+
+# ---------------------------------------------------------------------------
+# Layer 3: Musical Analysis
+# ---------------------------------------------------------------------------
+def analyze_musical(signal_features: dict, perceptual_features: dict, y: np.ndarray, sr: int) -> dict:
+    """Layer 3: Key estimation, tonal/atonal, F0 via aubio, one-shot vs loop."""
+    chroma_mean = np.array(perceptual_features["chroma_mean"])
+
+    key_name, key_correlation, mode = _estimate_key(chroma_mean)
+
+    chroma_max = float(np.max(chroma_mean))
+    chroma_std = float(np.std(chroma_mean))
+    is_tonal = chroma_std > 0.05 and chroma_max > 0.15
+
+    duration = signal_features["duration"]
+    onset_count = perceptual_features["onset_count"]
+    is_oneshot = duration < 2.0 and onset_count <= 2
+    is_loop = duration > 1.5 and onset_count >= 4
+
+    f0 = 0.0
+    f0_note = "X"
+    if is_tonal:
+        f0 = _fast_f0(y, sr)
+        if f0 > 0:
+            midi_note = int(round(12 * np.log2(f0 / 440.0) + 69))
+            f0_note = _midi_to_note_name(midi_note)
+
+    return {
+        "key": key_name,
+        "key_correlation": round(key_correlation, 4),
+        "mode": mode,
+        "is_tonal": is_tonal,
+        "is_oneshot": is_oneshot,
+        "is_loop": is_loop,
+        "fundamental_freq": round(f0, 2),
+        "fundamental_note": f0_note,
+    }
+
+
+def _estimate_key(chroma_profile: np.ndarray) -> tuple:
+    """Krumhansl-Schmuckler key-finding algorithm."""
+    if np.max(chroma_profile) < 0.01:
+        return "X", 0.0, "atonal"
+
+    chroma_norm = chroma_profile / (np.sum(chroma_profile) + 1e-10)
+    best_key = "C"
+    best_corr = -1.0
+    best_mode = "atonal"
+
+    for i in range(12):
+        rotated = np.roll(chroma_norm, -i)
+        major_corr = float(np.corrcoef(rotated, MAJOR_PROFILE)[0, 1])
+        if np.isnan(major_corr):
+            major_corr = 0.0
+        minor_corr = float(np.corrcoef(rotated, MINOR_PROFILE)[0, 1])
+        if np.isnan(minor_corr):
+            minor_corr = 0.0
+
+        if major_corr > best_corr:
+            best_corr = major_corr
+            best_key = NOTE_NAMES[i]
+            best_mode = "major"
+        if minor_corr > best_corr:
+            best_corr = minor_corr
+            best_key = NOTE_NAMES[i]
+            best_mode = "minor"
+
+    if best_corr < 0.3:
+        return "X", best_corr, "atonal"
+
+    if best_mode == "minor":
+        return f"{best_key}m", best_corr, "minor"
+    return best_key, best_corr, "major"
+
+
+def _midi_to_note_name(midi: int) -> str:
+    if midi < 0 or midi > 127:
+        return "X"
+    note = NOTE_NAMES[midi % 12]
+    octave = midi // 12 - 1
+    return f"{note}{octave}"
+
+
+# ---------------------------------------------------------------------------
+# Layer 4: Timbre Fingerprint
+# ---------------------------------------------------------------------------
+def analyze_timbre(y: np.ndarray, sr: int) -> dict:
+    """Layer 4: Mel spectrogram statistics for timbre fingerprinting."""
+    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, hop_length=HOP_LENGTH)
+    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
+
+    n_bands = 8
+    band_size = 128 // n_bands
+    band_stats = []
+    for b in range(n_bands):
+        start = b * band_size
+        end = start + band_size
+        band = mel_spec_db[start:end, :]
+        band_stats.append({
+            "mean": round(float(np.mean(band)), 2),
+            "std": round(float(np.std(band)), 2),
+            "max": round(float(np.max(band)), 2),
+        })
+
+    contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=HOP_LENGTH)
+    contrast_mean = [round(float(np.mean(contrast[i])), 4) for i in range(min(7, contrast.shape[0]))]
+
+    try:
+        tonnetz = librosa.feature.tonnetz(y=y, sr=sr, hop_length=HOP_LENGTH)
+        tonnetz_mean = [round(float(np.mean(tonnetz[i])), 4) for i in range(min(6, tonnetz.shape[0]))]
+    except Exception:
+        tonnetz_mean = [0.0] * 6
+
+    return {
+        "mel_band_stats": band_stats,
+        "spectral_contrast": contrast_mean,
+        "tonnetz": tonnetz_mean,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Classification — 3-layer priority: filename → folder → spectral heuristic
+# ---------------------------------------------------------------------------
+def classify_role(signal: dict, perceptual: dict, musical: dict, folder_hint: str = "") -> str:
+    """Classify sample into a production role.
+    
+    Priority order:
+      1. FILENAME keywords (most reliable — producers name their files correctly)
+      2. FOLDER structure (less reliable — "SentimientoLatino" has everything mixed)
+      3. SPECTRAL heuristics (fallback for unnamed/unknown samples)
+    """
+    filename = folder_hint.lower()  # contains parent + current folder names
+    
+    # ====================================================================
+    # LAYER 1: Filename keyword extraction (HIGHEST PRIORITY)
+    # Matches specific patterns like "Lead", "Pad", "Bass" in filenames
+    # ====================================================================
+    
+    # Ordered by specificity — more specific keywords first
+    filename_map = [
+        # (keyword_pattern, role, require_not) — avoid false positives
+        (["reese"], "bass", []),
+        (["808"], "bass", []),
+        (["kick"], "kick", ["kickdown", "kick drum"]),
+        (["snare"], "snare", []),
+        (["hi-hat", "hihat", "hats", "hat "], "hihat", []),
+        (["shaker"], "perc", []),
+        (["tambourine", "tambor"], "perc", []),
+        (["conga", "bongo", "rim"], "perc", []),
+        (["timbal"], "perc", []),
+        (["vocal chop", "v.chop", "vox chop"], "vocal", []),
+        (["vocal", "vox", "vocals"], "vocal", []),
+        (["pluck"], "pluck", []),
+        (["bell"], "pluck", []),
+        (["stab"], "oneshot", []),
+        (["lead"], "lead", []),
+        (["arp", "arpeggio"], "arp", []),
+        (["pad reverse"], "pad", []),
+        (["pad", "pads"], "pad", []),
+        (["chord", "chords"], "pad", []),
+        (["rhodes", "piano", "keys", "key "], "keys", []),
+        (["guitar"], "guitar", []),
+        (["string"], "pad", []),
+        (["brass"], "brass", []),
+        (["synth"], "synth", []),
+        (["texture"], "pad", []),
+        (["riser", "sweep", "impact", "explosion"], "fx", []),
+        (["loop"], "drumloop", ["vocal loop", "melody loop"]),
+        (["fill"], "fill", []),
+        (["drum"], "drumloop", []),
+    ]
+    
+    for keywords, role, excludes in filename_map:
+        for kw in keywords:
+            if kw in filename:
+                # Check exclusions
+                excluded = False
+                for ex in excludes:
+                    if ex in filename:
+                        excluded = True
+                        break
+                if not excluded:
+                    return role
+    
+    # ====================================================================
+    # LAYER 2: Midilatino / SS_RNBL structured filename parsing
+    # These packs have naming conventions we can extract roles from
+    # ====================================================================
+    
+    # Midilatino pattern: "Midilatino_Song_Key_BPM_StemType.wav"
+    # e.g. "Midilatino_Holanda_F_Min_108BPM_Lead.wav"
+    # e.g. "Midilatino_Cookie_E_Min_89BPM_Pluck.wav"
+    parts = filename.replace(".wav", "").replace(".flac", "").replace(".mp3", "").split("_")
+    if len(parts) >= 2:
+        # Check last part for stem type
+        last_parts = " ".join(parts[-2:]).lower()
+        stem_map = {
+            "drums": "drumloop", "drum": "drumloop",
+            "bass": "bass", "reese": "bass",
+            "lead": "lead", "pluck": "pluck", "pluck fx": "fx",
+            "pad": "pad", "pad reverse": "pad",
+            "arp": "arp", "vocal": "vocal", "vocals": "vocal",
+            "vocal chop": "vocal", "vox": "vocal",
+            "guitar": "guitar", "rhodes": "keys", "rhode": "keys",
+            "piano": "keys", "keys": "keys",
+            "synth": "synth", "texture": "pad", "texture 2": "pad",
+            "bell chords": "pad", "accent": "oneshot", "accent keys": "keys",
+            "harp": "pluck", "shaker": "perc",
+        }
+        for stem_kw, stem_role in stem_map.items():
+            if stem_kw in last_parts:
+                return stem_role
+    
+    # SS_RNBL pattern: "SS_RNBL_Song_Stem_Type.wav"
+    # e.g. "SS_RNBL_Amor_One_Shot_Bass_C_.wav"
+    if "ss_rnbl" in filename or "ss rnbl" in filename:
+        ss_map = {
+            "kick": "kick", "snare": "snare", "hats": "hihat", "hat": "hihat",
+            "perc": "perc", "bass": "bass", "lead": "lead", "pad": "pad",
+            "fx": "fx", "top": "drumloop", "drum": "drumloop",
+            "v.chop": "vocal", "phrases": "vocal",
+            "one shot": "oneshot", "music": "drumloop",
+            "double": "drumloop", "add": "drumloop",
+            "gustas": "drumloop",  # "Gustas" are full loop sections
+        }
+        for kw, role in ss_map.items():
+            if kw in filename:
+                return role
+    
+    # ====================================================================
+    # LAYER 3: Folder-based hints (MEDIUM PRIORITY)
+    # Only for folders that are explicitly categorized
+    # ====================================================================
+    folder_map = {
+        "kick": "kick", "kicks": "kick", "8. kicks": "kick",
+        "snare": "snare", "snares": "snare", "9. snare": "snare",
+        "hi-hat": "hihat", "hihat": "hihat", "hi-hats": "hihat",
+        "bass": "bass",
+        "perc": "perc", "percs": "perc", "10. percs": "perc",
+        "fx": "fx", "5. fx": "fx",
+        "drum loops": "drumloop", "4. drum loops": "drumloop", "drumloops": "drumloop",
+        "vocal": "vocal", "vocals": "vocal", "11. vocals": "vocal",
+        "fill": "fill", "fills": "fill", "7. fill": "fill",
+        "3. one shots": "oneshot",
+    }
+    for key, role in folder_map.items():
+        if key in folder_hint.lower():
+            return role
+
+    # ====================================================================
+    # LAYER 4: Spectral heuristics (LOWEST PRIORITY — fallback only)
+    # Only used when filename and folder give no signal
+    # ====================================================================
+    centroid = signal["spectral_centroid_mean"]
+    low_r = signal["low_energy_ratio"]
+    high_r = signal["high_energy_ratio"]
+    dur = signal["duration"]
+    onsets = perceptual["onset_count"]
+    is_tonal = musical["is_tonal"]
+    attack = signal["attack_time"]
+    rms_std = signal["rms_std"]
+
+    # Percussive one-shots
+    if centroid < 600 and low_r > 0.5 and dur < 1.0 and attack < 0.01 and onsets <= 3:
+        return "kick"
+    if centroid > 5000 and high_r > 0.4 and dur < 0.3:
+        return "hihat"
+    if 1000 < centroid < 5000 and attack < 0.005 and onsets <= 2:
+        return "snare"
+    if dur < 0.5 and onsets <= 2 and 500 < centroid < 5000:
+        return "perc"
+
+    # Tonal classification (for long tonal samples that didn't match filename)
+    if is_tonal:
+        # Sub-bass / bass
+        if centroid < 200 and low_r > 0.7:
+            return "bass"
+        # Pad: sustained, low variance, long
+        if rms_std < 0.05 and dur > 1.0 and centroid < 4000:
+            return "pad"
+        # Pluck: short, tonal
+        if dur < 0.8 and onsets <= 3:
+            return "pluck"
+        # Lead: prominent, mid-high frequency
+        if 500 < centroid < 6000:
+            return "lead"
+        # Keys: mid frequency, moderate dynamics
+        if 200 < centroid < 2000 and rms_std < 0.1:
+            return "keys"
+        # Generic tonal loop
+        if dur > 2.0 and onsets > 4:
+            return "drumloop"
+        return "synth"
+
+    # Atonal loops
+    if dur > 2.0 and onsets >= 4:
+        return "drumloop"
+
+    # Short atonal
+    if dur < 2.0 and onsets <= 1:
+        return "oneshot"
+
+    return "fx"
+
+
+def classify_character(signal: dict, perceptual: dict, musical: dict) -> str:
+    """Classify the sonic character."""
+    centroid = signal["spectral_centroid_mean"]
+    low_r = signal["low_energy_ratio"]
+    high_r = signal["high_energy_ratio"]
+    mid_r = signal["mid_energy_ratio"]
+    dur = signal["duration"]
+    attack = signal["attack_time"]
+    peak_rms = signal["peak_rms_ratio"]
+    flatness = signal["spectral_flatness_mean"]
+    rms_std = signal["rms_std"]
+
+    scores = {}
+    if low_r >= 0.6 and centroid <= 400:
+        scores["boomy"] = low_r * 2
+    if low_r >= 0.5 and centroid <= 500:
+        scores["deep"] = low_r * 1.5
+    if high_r >= 0.4 and centroid >= 3000 and attack <= 0.005:
+        scores["sharp"] = high_r * 2
+    if high_r >= 0.3 and centroid >= 4000 and dur <= 0.2:
+        scores["crisp"] = high_r * 1.5
+    if 300 <= centroid <= 2000 and mid_r >= 0.4:
+        scores["warm"] = mid_r * 1.5
+    if centroid >= 3000 and high_r >= 0.3:
+        scores["bright"] = high_r * 1.5
+    if centroid <= 800 and low_r >= 0.4:
+        scores["dark"] = low_r * 1.5
+    if 1500 <= centroid <= 5000 and rms_std <= 0.03:
+        scores["ethereal"] = 1.0
+    if dur <= 0.15:
+        scores["short"] = 1.0
+    if attack <= 0.005 and peak_rms >= 5.0:
+        scores["impact"] = peak_rms / 5.0
+    if dur >= 1.0 and signal["spectral_bandwidth_mean"] >= 4000:
+        scores["full"] = 1.0
+    if mid_r <= 0.2 and low_r >= 0.3 and high_r >= 0.3:
+        scores["hollow"] = 1.0
+    if attack <= 0.003 and dur <= 0.3 and centroid >= 1000:
+        scores["tight"] = 1.0
+    if flatness >= 0.1 and mid_r >= 0.3 and dur >= 0.5:
+        scores["lush"] = flatness * 5
+    if peak_rms >= 4.0 and centroid >= 2000:
+        scores["aggressive"] = peak_rms / 4.0
+    if peak_rms <= 3.0 and attack >= 0.01:
+        scores["soft"] = 1.0
+
+    return max(scores, key=scores.get) if scores else "neutral"
+
+
+# ---------------------------------------------------------------------------
+# Full Analysis Pipeline (single file)
+# ---------------------------------------------------------------------------
+def analyze_file(filepath: str) -> Optional[dict]:
+    """Full 4-layer analysis of a single audio file.
+    This function is picklable and runs in separate processes."""
+    try:
+        y, sr = librosa.load(filepath, sr=SAMPLE_RATE, mono=True, duration=30.0)
+        if len(y) < 512:
+            return None
+
+        peak = np.max(np.abs(y))
+        if peak > 1e-6:
+            y = y / peak
+
+        path = Path(filepath)
+        # Pass BOTH the full filename and folder structure to classifier
+        classify_hint = f"{path.parent.parent.name} {path.parent.name} {path.stem}"
+
+        signal = analyze_signal(y, sr)
+        perceptual = analyze_perceptual(y, sr)
+        musical = analyze_musical(signal, perceptual, y, sr)
+        timbre = analyze_timbre(y, sr)
+
+        role = classify_role(signal, perceptual, musical, classify_hint)
+        character = classify_character(signal, perceptual, musical)
+        new_name = _generate_name(role, musical, perceptual, character, filepath)
+        file_hash = _hash_file(filepath)
+
+        return {
+            "original_path": filepath,
+            "original_name": path.name,
+            "new_name": new_name,
+            "file_hash": file_hash,
+            "file_size": os.path.getsize(filepath),
+            "role": role,
+            "character": character,
+            "musical": musical,
+            "signal": signal,
+            "perceptual": {
+                "mfcc_means": perceptual["mfcc_means"],
+                "mfcc_stds": perceptual["mfcc_stds"],
+                "onset_count": perceptual["onset_count"],
+                "onset_density": perceptual["onset_density"],
+                "tempo": perceptual["tempo"],
+                "lufs": perceptual["lufs"],
+            },
+            "timbre": timbre,
+        }
+    except Exception as e:
+        return {"original_path": filepath, "error": str(e)}
+
+
+def _generate_name(role: str, musical: dict, perceptual: dict, character: str, filepath: str) -> str:
+    key = musical["fundamental_note"] if musical["is_tonal"] else "X"
+    if key == "X" and musical["key"] != "X":
+        key = musical["key"]
+    bpm = int(perceptual["tempo"]) if perceptual["tempo"] > 0 else 0
+    short_id = hashlib.md5(filepath.encode()).hexdigest()[:6]
+    ext = Path(filepath).suffix
+    return f"{role}_{key}_{bpm:03d}_{character}_{short_id}{ext}"
+
+
+def _hash_file(filepath: str) -> str:
+    h = hashlib.md5()
+    size = os.path.getsize(filepath)
+    with open(filepath, "rb") as f:
+        h.update(f.read(65536))
+        if size > 131072:
+            f.seek(size - 65536)
+            h.update(f.read(65536))
+    return h.hexdigest()
+
+
+# ---------------------------------------------------------------------------
+# File Collection
+# ---------------------------------------------------------------------------
+def collect_audio_files(*directories: str) -> list[str]:
+    files = []
+    for d in directories:
+        base = Path(d)
+        if not base.exists():
+            continue
+        for f in base.rglob("*"):
+            if f.is_file() and f.suffix.lower() in AUDIO_EXT:
+                files.append(str(f))
+    return sorted(files)
+
+
+# ---------------------------------------------------------------------------
+# Batch Analysis (TRUE multiprocessing)
+# ---------------------------------------------------------------------------
+def batch_analyze(files: list[str], workers: int = MAX_WORKERS, checkpoint_path: Optional[str] = None) -> list[dict]:
+    """Analyze all files using ProcessPoolExecutor for real multi-core parallelism.
+    Each process runs independently — no GIL contention, no shared memory."""
+    results = []
+    errors = []
+    done = 0
+    total = len(files)
+
+    # Resume from checkpoint
+    completed_paths = set()
+    if checkpoint_path and os.path.exists(checkpoint_path):
+        with open(checkpoint_path, "r", encoding="utf-8") as f:
+            for line in f:
+                try:
+                    entry = json.loads(line.strip())
+                    completed_paths.add(entry["original_path"])
+                    results.append(entry)
+                except (json.JSONDecodeError, KeyError):
+                    pass
+        done = len(results)
+        print(f"Resumed from checkpoint: {done}/{total}")
+
+    remaining = [f for f in files if f not in completed_paths]
+    if not remaining:
+        print("All files already analyzed.")
+        return results
+
+    print(f"Analyzing {len(remaining)} files with {workers} PROCESSES (true parallel)...")
+
+    with ProcessPoolExecutor(max_workers=workers) as executor:
+        futures = {executor.submit(analyze_file, f): f for f in remaining}
+
+        for future in as_completed(futures):
+            filepath = futures[future]
+            done += 1
+            try:
+                result = future.result()
+                if result is None:
+                    errors.append(filepath)
+                    continue
+                if "error" in result:
+                    errors.append(f"{filepath}: {result['error']}")
+                    continue
+
+                results.append(result)
+
+                if checkpoint_path and done % 50 == 0:
+                    _save_checkpoint(results, checkpoint_path)
+
+                if done % 25 == 0 or done == total:
+                    print(f"  [{done}/{total}] {result.get('new_name', '?')}")
+
+            except Exception as e:
+                errors.append(f"{filepath}: {e}")
+
+    if errors:
+        print(f"\nErrors ({len(errors)}):")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... and {len(errors) - 10} more")
+
+    return results
+
+
+def _save_checkpoint(results: list[dict], path: str):
+    with open(path, "w", encoding="utf-8") as f:
+        for r in results:
+            f.write(json.dumps(r, ensure_ascii=False) + "\n")
+
+
+def save_index(results: list[dict], output_path: str):
+    roles = {}
+    keys = {}
+    characters = {}
+    for r in results:
+        if "error" in r:
+            continue
+        role = r.get("role", "unknown")
+        roles[role] = roles.get(role, 0) + 1
+        key = r.get("musical", {}).get("key", "X")
+        keys[key] = keys.get(key, 0) + 1
+        char = r.get("character", "unknown")
+        characters[char] = characters.get(char, 0) + 1
+
+    index = {
+        "metadata": {
+            "total_samples": len(results),
+            "errors": sum(1 for r in results if "error" in r),
+            "roles": roles,
+            "keys": keys,
+            "characters": characters,
+        },
+        "samples": results,
+    }
+
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(index, f, ensure_ascii=False, indent=2)
+
+    print(f"Index saved to {output_path}")
+    print(f"  Total: {len(results)} | Roles: {roles} | Keys: {len(keys)} | Characters: {len(characters)}")
+
+
+# ---------------------------------------------------------------------------
+# Rename Engine
+# ---------------------------------------------------------------------------
+def plan_renames(results: list[dict], output_dir: str) -> list[dict]:
+    out = Path(output_dir)
+    renames = []
+    seen_names = set()
+
+    for r in results:
+        if "error" in r or "new_name" not in r:
+            continue
+        old_path = Path(r["original_path"])
+        role = r["role"]
+        new_name = r["new_name"]
+
+        if new_name in seen_names:
+            stem = Path(new_name).stem
+            ext = Path(new_name).suffix
+            counter = 2
+            candidate = f"{stem}_{counter}{ext}"
+            while candidate in seen_names:
+                counter += 1
+                candidate = f"{stem}_{counter}{ext}"
+            new_name = candidate
+
+        seen_names.add(new_name)
+        new_path = out / role / new_name
+        renames.append({
+            "old_path": str(old_path),
+            "new_path": str(new_path),
+            "new_name": new_name,
+            "role": role,
+            "original_name": r["original_name"],
+        })
+
+    return renames
+
+
+def execute_renames(renames: list[dict], dry_run: bool = True) -> dict:
+    stats = {"planned": len(renames), "executed": 0, "skipped": 0, "errors": []}
+
+    for r in renames:
+        old = Path(r["old_path"])
+        new = Path(r["new_path"])
+
+        if not old.exists():
+            stats["skipped"] += 1
+            continue
+        if dry_run:
+            stats["skipped"] += 1
+            continue
+
+        new.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            import shutil
+            shutil.copy2(str(old), str(new))
+            stats["executed"] += 1
+        except Exception as e:
+            stats["errors"].append(f"{old.name} -> {new.name}: {e}")
+
+    return stats
--- a/src/analyzer/forensic_classify.py
+++ b/src/analyzer/forensic_classify.py
@@ -0,0 +1,72 @@
+"""Forensic analysis of misclassified samples."""
+import json, os
+
+PROJECT = r"C:\Users\Administrator\Documents\fl_control"
+with open(os.path.join(PROJECT, "data", "sample_index.json"), "r", encoding="utf-8") as f:
+    d = json.load(f)
+
+samples = d["samples"]
+
+# --- DRUMLOOPS ---
+drumloops = [s for s in samples if s.get("role") == "drumloop"]
+print(f"DRUMLOOPS ({len(drumloops)} total)")
+print(f"{'Orig filename':<55s} {'Dur':>5s} {'Onset':>5s} {'Centr':>7s} {'Low':>5s} {'Mid':>5s} {'High':>5s} {'Tonal':>5s} {'Key':>5s} {'Char':>10s}")
+print("-" * 120)
+for s in drumloops[:50]:
+    orig = s.get("original_name", "?")[:54]
+    dur = s["signal"]["duration"]
+    onc = s["perceptual"]["onset_count"]
+    cent = s["signal"]["spectral_centroid_mean"]
+    low = s["signal"]["low_energy_ratio"]
+    mid = s["signal"]["mid_energy_ratio"]
+    high = s["signal"]["high_energy_ratio"]
+    ton = s["musical"]["is_tonal"]
+    key = s["musical"]["key"]
+    char = s["character"]
+    print(f"{orig:<55s} {dur:5.1f} {onc:5d} {cent:7.0f} {low:5.2f} {mid:5.2f} {high:5.2f} {str(ton):>5s} {key:>5s} {char:>10s}")
+
+# --- ONESHOTS ---
+oneshots = [s for s in samples if s.get("role") == "oneshot"]
+print(f"\nONESHOTS ({len(oneshots)} total)")
+print(f"{'Orig filename':<55s} {'Dur':>5s} {'Onset':>5s} {'Centr':>7s} {'Low':>5s} {'Mid':>5s} {'High':>5s} {'Tonal':>5s} {'Key':>5s} {'Char':>10s}")
+print("-" * 120)
+for s in oneshots[:40]:
+    orig = s.get("original_name", "?")[:54]
+    dur = s["signal"]["duration"]
+    onc = s["perceptual"]["onset_count"]
+    cent = s["signal"]["spectral_centroid_mean"]
+    low = s["signal"]["low_energy_ratio"]
+    mid = s["signal"]["mid_energy_ratio"]
+    high = s["signal"]["high_energy_ratio"]
+    ton = s["musical"]["is_tonal"]
+    key = s["musical"]["key"]
+    char = s["character"]
+    print(f"{orig:<55s} {dur:5.1f} {onc:5d} {cent:7.0f} {low:5.2f} {mid:5.2f} {high:5.2f} {str(ton):>5s} {key:>5s} {char:>10s}")
+
+# --- Summary: folder source of drumloops ---
+print(f"\n\nDRUMLOOP ORIGINS:")
+from collections import Counter
+origins = Counter()
+for s in drumloops:
+    path = s.get("original_path", "")
+    parts = path.replace("\\", "/").split("/")
+    # Find the category folder
+    for i, p in enumerate(parts):
+        if "reggaeton" in p.lower() and i+1 < len(parts):
+            origins[parts[i+1]] += 1
+            break
+for k, v in origins.most_common():
+    print(f"  {k:40s} {v:4d}")
+
+# --- Summary: folder source of oneshots ---
+print(f"\nONESHOT ORIGINS:")
+origins2 = Counter()
+for s in oneshots:
+    path = s.get("original_path", "")
+    parts = path.replace("\\", "/").split("/")
+    for i, p in enumerate(parts):
+        if "reggaeton" in p.lower() and i+1 < len(parts):
+            origins2[parts[i+1]] += 1
+            break
+for k, v in origins2.most_common():
+    print(f"  {k:40s} {v:4d}")
--- a/src/analyzer/forensic_filenames.py
+++ b/src/analyzer/forensic_filenames.py
@@ -0,0 +1,72 @@
+"""Forensic analysis of misclassified samples."""
+import json, os, re
+from collections import Counter
+
+PROJECT = r"C:\Users\Administrator\Documents\fl_control"
+with open(os.path.join(PROJECT, "data", "sample_index.json"), "r", encoding="utf-8") as f:
+    d = json.load(f)
+
+samples = d["samples"]
+
+# --- Analyze filename patterns in misclassified ---
+print("=" * 70)
+print("  PATRONES DE NOMBRE EN 'DRUMLOOPS'")
+print("=" * 70)
+
+# Extract Midilatino stems
+ml_stems = Counter()
+for s in samples:
+    if s.get("role") != "drumloop":
+        continue
+    name = s.get("original_name", "")
+    # Midilatino pattern: Midilatino_Name_Key_Min_BPM_Stem.wav
+    if "Midilatino" in name or "midilatino" in name:
+        # Extract the stem type (last part before .wav)
+        parts = name.replace(".wav", "").replace(".flac", "").replace(".mp3", "")
+        # Try to find stem keywords
+        for kw in ["Drums", "Bass", "Lead", "Pad", "Pluck", "Arp", "Vocal",
+                    "Vox", "Guitar", "Rhodes", "Piano", "Synth", "Reese",
+                    "Texture", "Chords", "Reverse", "Fx", "Accent", "Harp",
+                    "Keys", "Bell", "Loop", "Stem", "Snare", "Kick", "Hat",
+                    "Perc", "Shaker", "Hi", "808"]:
+            if kw.lower() in parts.lower():
+                ml_stems[kw] += 1
+                break
+        else:
+            # No stem keyword found - it's the full mix
+            ml_stems["FULL_MIX"] += 1
+
+print("\nMidilatino stem types in 'drumloop':")
+for k, v in ml_stems.most_common():
+    print(f"  {k:15s} {v:4d}")
+
+# --- SS_RNBL patterns ---
+print("\n\nSentimientoLatino SS_RNBL patterns:")
+ss_stems = Counter()
+for s in samples:
+    name = s.get("original_name", "")
+    if "SS_RNBL" in name:
+        # Extract type: SS_RNBL_Song_Stem_Type.wav
+        parts = name.replace(".wav", "").split("_")
+        if len(parts) >= 4:
+            stem_type = parts[3] if parts[3] not in ("One", "Shot") else "_".join(parts[3:5])
+            ss_stems[stem_type] += 1
+
+for k, v in ss_stems.most_common():
+    print(f"  {k:20s} {v:4d}")
+
+# --- All filename keywords ---
+print("\n\nAll filename role keywords across library:")
+role_keywords = Counter()
+for s in samples:
+    name = s.get("original_name", "").lower()
+    for kw in ["kick", "snare", "hi-hat", "hihat", "hat", "bass", "808",
+                "lead", "pad", "pluck", "arp", "vocal", "vox", "fx",
+                "perc", "drum", "loop", "fill", "guitar", "piano", "rhodes",
+                "synth", "bell", "brass", "string", "reese", "texture",
+                "chord", "shaker", "tambourine", "conga", "rim"]:
+        if kw in name:
+            role_keywords[kw] += 1
+
+for k, v in role_keywords.most_common(25):
+    print(f"  {k:15s} {v:4d}")
--- a/src/analyzer/run_batch.py
+++ b/src/analyzer/run_batch.py
@@ -0,0 +1,143 @@
+"""
+Batch analyzer - STANDALONE for double-click execution.
+Uses ProcessPoolExecutor (16 processes) for TRUE multi-core parallelism.
+aubio replaces pyin for F0 detection (~1ms vs ~2s per file).
+
+IMPORTANT: The if __name__ == '__main__' guard is REQUIRED on Windows
+for ProcessPoolExecutor. Without it, child processes re-import this file
+and create infinite process spawning.
+"""
+from __future__ import annotations
+
+import sys
+import os
+import time
+import json
+import warnings
+import traceback
+import multiprocessing
+
+# CRITICAL: Windows multiprocessing guard - MUST be at top level
+multiprocessing.freeze_support()
+
+warnings.filterwarnings("ignore")
+
+PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.chdir(PROJECT)
+if PROJECT not in sys.path:
+    sys.path.insert(0, PROJECT)
+
+from src.analyzer import (
+    collect_audio_files,
+    batch_analyze,
+    save_index,
+    plan_renames,
+)
+
+
+def main():
+    print("=" * 60)
+    print("  ANALIZADOR FORENSE DE SAMPLES v2.0")
+    print("  ProcessPoolExecutor + aubio F0 (C-native)")
+    print("  4 capas: Signal + Perceptual + Musical + Timbre")
+    print("  16 procesos independientes = 16 cores en paralelo")
+    print("=" * 60)
+
+    lib1 = os.path.join(PROJECT, "libreria", "reggaeton")
+    lib2 = os.path.join(PROJECT, "librerias", "reggaeton")
+
+    print("\n[1/4] Colectando archivos de audio...")
+    files = collect_audio_files(lib1, lib2)
+    print(f"      Encontrados: {len(files)} archivos")
+
+    if not files:
+        print("ERROR: No se encontraron archivos de audio.")
+        return
+
+    data_dir = os.path.join(PROJECT, "data")
+    os.makedirs(data_dir, exist_ok=True)
+    checkpoint = os.path.join(data_dir, "analysis_checkpoint.jsonl")
+
+    # Delete old checkpoint from failed thread-based run
+    if os.path.exists(checkpoint):
+        old_size = os.path.getsize(checkpoint)
+        if old_size < 1000:  # Probably broken from the thread run
+            os.remove(checkpoint)
+            print("      (Removed broken checkpoint)")
+
+    print(f"\n[2/4] Analizando con 16 PROCESOS (70% CPU)...")
+    print(f"      Cada proceso en su propio core, sin GIL")
+    print(f"      Checkpoint: {checkpoint}")
+    print(f"      (Si se corta, re-ejecuta y continua desde donde quedo)")
+    print()
+
+    start = time.time()
+    results = batch_analyze(files, workers=16, checkpoint_path=checkpoint)
+    elapsed = time.time() - start
+
+    valid = [r for r in results if "error" not in r]
+    errors = [r for r in results if "error" in r]
+
+    print(f"\n      Tiempo: {elapsed:.1f}s ({elapsed / max(len(files), 1):.2f}s/archivo)")
+    print(f"      Exitosos: {len(valid)} | Errores: {len(errors)}")
+
+    if errors:
+        err_path = os.path.join(data_dir, "analysis_errors.json")
+        with open(err_path, "w", encoding="utf-8") as f:
+            json.dump(errors, f, ensure_ascii=False, indent=2)
+        print(f"      Errores guardados en: {err_path}")
+
+    print(f"\n[3/4] Guardando indice...")
+    index_path = os.path.join(data_dir, "sample_index.json")
+    save_index(results, index_path)
+
+    print(f"\n[4/4] Plan de renombrado...")
+    output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples")
+    renames = plan_renames(results, output_dir)
+    rename_path = os.path.join(data_dir, "rename_plan.json")
+    with open(rename_path, "w", encoding="utf-8") as f:
+        json.dump(renames, f, ensure_ascii=False, indent=2)
+    print(f"      {len(renames)} archivos para renombrar")
+    print(f"      Plan guardado en: {rename_path}")
+
+    # Summary
+    print("\n" + "=" * 60)
+    print("  RESUMEN")
+    print("=" * 60)
+
+    roles = {}
+    chars = {}
+    keys = {}
+    for r in valid:
+        role = r.get("role", "?")
+        roles[role] = roles.get(role, 0) + 1
+        char = r.get("character", "?")
+        chars[char] = chars.get(char, 0) + 1
+        key = r.get("musical", {}).get("key", "X")
+        keys[key] = keys.get(key, 0) + 1
+
+    print(f"\n  Roles:")
+    for role, count in sorted(roles.items(), key=lambda x: -x[1]):
+        bar = "#" * min(count, 60)
+        print(f"    {role:12s} {count:4d}  {bar}")
+
+    print(f"\n  Caracteres:")
+    for char, count in sorted(chars.items(), key=lambda x: -x[1]):
+        bar = "#" * min(count, 50)
+        print(f"    {char:12s} {count:4d}  {bar}")
+
+    print(f"\n  Tonalidades (top 10):")
+    for key, count in sorted(keys.items(), key=lambda x: -x[1])[:10]:
+        print(f"    {key:5s} {count:4d}")
+
+    print(f"\n  Proximo paso: ejecuta 2_RENOMBRAR.bat")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        print(f"\nFATAL ERROR: {e}")
+        traceback.print_exc()
+        input("Presiona Enter para cerrar...")
--- a/src/analyzer/run_rename.py
+++ b/src/analyzer/run_rename.py
@@ -0,0 +1,88 @@
+"""
+Rename executor - Copies files to analyzed_samples/ with standardized names.
+Reads from data/rename_plan.json generated by the batch analyzer.
+"""
+from __future__ import annotations
+
+import sys
+import os
+import json
+import shutil
+import warnings
+
+warnings.filterwarnings("ignore")
+
+PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.chdir(PROJECT)
+if PROJECT not in sys.path:
+    sys.path.insert(0, PROJECT)
+
+from src.analyzer import plan_renames, execute_renames
+
+
+def main():
+    rename_path = os.path.join(PROJECT, "data", "rename_plan.json")
+    index_path = os.path.join(PROJECT, "data", "sample_index.json")
+    output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples")
+
+    # Load rename plan if exists, otherwise generate from index
+    if os.path.exists(rename_path):
+        print("Cargando plan de renombrado existente...")
+        with open(rename_path, "r", encoding="utf-8") as f:
+            renames = json.load(f)
+    elif os.path.exists(index_path):
+        print("Generando plan desde indice...")
+        with open(index_path, "r", encoding="utf-8") as f:
+            index = json.load(f)
+        renames = plan_renames(index["samples"], output_dir)
+        with open(rename_path, "w", encoding="utf-8") as f:
+            json.dump(renames, f, ensure_ascii=False, indent=2)
+    else:
+        print("ERROR: No existe data/sample_index.json ni data/rename_plan.json")
+        print("       Ejecuta primero 1_ANALIZAR.bat")
+        return
+
+    print(f"\n{len(renames)} archivos para renombrar")
+    print(f"Destino: {output_dir}")
+    print()
+
+    # Show sample renames
+    print("Ejemplos:")
+    for r in renames[:15]:
+        print(f"  {r['original_name']:50s} -> {r['role']:10s}\\{r['new_name']}")
+    if len(renames) > 15:
+        print(f"  ... y {len(renames) - 15} mas")
+    print()
+
+    # Confirm
+    answer = input("Ejecutar renombrado? (s/n): ").strip().lower()
+    if answer != "s":
+        print("Cancelado.")
+        return
+
+    # Execute
+    print("\nCopiando archivos...")
+    stats = execute_renames(renames, dry_run=False)
+
+    print(f"\nResultado: {stats['executed']} copiados, {stats['skipped']} omitidos, {len(stats.get('errors', []))} errores")
+
+    if stats.get("errors"):
+        print("Errores:")
+        for e in stats["errors"][:10]:
+            print(f"  {e}")
+
+    # Save rename log
+    log_path = os.path.join(PROJECT, "data", "rename_log.json")
+    with open(log_path, "w", encoding="utf-8") as f:
+        json.dump({"stats": stats, "renames": renames}, f, ensure_ascii=False, indent=2)
+    print(f"\nLog guardado en: {log_path}")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        print(f"\nFATAL ERROR: {e}")
+        import traceback
+        traceback.print_exc()
+        input("Presiona Enter para cerrar...")
--- a/src/analyzer/show_stats.py
+++ b/src/analyzer/show_stats.py
@@ -0,0 +1,117 @@
+"""
+Show statistics from the analysis index.
+"""
+import sys
+import os
+import json
+
+PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.chdir(PROJECT)
+
+
+def main():
+    index_path = os.path.join(PROJECT, "data", "sample_index.json")
+
+    if not os.path.exists(index_path):
+        print("ERROR: No existe data/sample_index.json")
+        print("       Ejecuta primero 1_ANALIZAR.bat")
+        return
+
+    with open(index_path, "r", encoding="utf-8") as f:
+        index = json.load(f)
+
+    samples = index["samples"]
+    valid = [s for s in samples if "error" not in s]
+
+    print("=" * 60)
+    print(f"  ESTADISTICAS DE LA BIBLIOTECA ({len(valid)} samples)")
+    print("=" * 60)
+
+    # Roles
+    roles = {}
+    for s in valid:
+        r = s.get("role", "?")
+        roles[r] = roles.get(r, 0) + 1
+
+    print("\n  Roles:")
+    max_count = max(roles.values()) if roles else 1
+    for role, count in sorted(roles.items(), key=lambda x: -x[1]):
+        bar_len = int(40 * count / max_count)
+        print(f"    {role:12s} {count:4d}  {'█' * bar_len}")
+
+    # Characters
+    chars = {}
+    for s in valid:
+        c = s.get("character", "?")
+        chars[c] = chars.get(c, 0) + 1
+
+    print("\n  Caracteres sonoros:")
+    max_count = max(chars.values()) if chars else 1
+    for char, count in sorted(chars.items(), key=lambda x: -x[1]):
+        bar_len = int(40 * count / max_count)
+        print(f"    {char:12s} {count:4d}  {'█' * bar_len}")
+
+    # Keys
+    keys = {}
+    for s in valid:
+        k = s.get("musical", {}).get("key", "X")
+        keys[k] = keys.get(k, 0) + 1
+
+    print("\n  Tonalidades:")
+    for key, count in sorted(keys.items(), key=lambda x: -x[1])[:15]:
+        print(f"    {key:5s} {count:4d}")
+
+    # Tempo distribution
+    tempos = [s.get("perceptual", {}).get("tempo", 0) for s in valid]
+    tempos_nonzero = [t for t in tempos if t > 0]
+    if tempos_nonzero:
+        print(f"\n  Tempo:")
+        print(f"    Rango: {min(tempos_nonzero):.0f} - {max(tempos_nonzero):.0f} BPM")
+        print(f"    Promedio: {sum(tempos_nonzero) / len(tempos_nonzero):.0f} BPM")
+
+    # LUFS distribution
+    lufs = [s.get("perceptual", {}).get("lufs", 0) for s in valid]
+    lufs_valid = [l for l in lufs if l > -70]
+    if lufs_valid:
+        print(f"\n  Loudness (LUFS):")
+        print(f"    Rango: {min(lufs_valid):.1f} a {max(lufs_valid):.1f} LUFS")
+        print(f"    Promedio: {sum(lufs_valid) / len(lufs_valid):.1f} LUFS")
+
+    # Tonal vs atonal
+    tonal = sum(1 for s in valid if s.get("musical", {}).get("is_tonal", False))
+    atonal = len(valid) - tonal
+    print(f"\n  Tonalidad:")
+    print(f"    Tonal: {tonal} ({100 * tonal / len(valid):.0f}%)")
+    print(f"    Atonal: {atonal} ({100 * atonal / len(valid):.0f}%)")
+
+    # One-shot vs loop
+    oneshot = sum(1 for s in valid if s.get("musical", {}).get("is_oneshot", False))
+    loops = sum(1 for s in valid if s.get("musical", {}).get("is_loop", False))
+    print(f"\n  Tipo:")
+    print(f"    One-shots: {oneshot}")
+    print(f"    Loops: {loops}")
+    print(f"    Otros: {len(valid) - oneshot - loops}")
+
+    print("\n" + "=" * 60)
+
+    # Show samples per role for quick reference
+    print("\n  EJEMPLOS POR ROL:")
+    by_role = {}
+    for s in valid:
+        role = s.get("role", "?")
+        if role not in by_role:
+            by_role[role] = []
+        by_role[role].append(s)
+
+    for role in sorted(by_role.keys()):
+        samples_list = by_role[role][:5]
+        print(f"\n  [{role}] ({len(by_role[role])} total)")
+        for s in samples_list:
+            key = s.get("musical", {}).get("key", "X")
+            char = s.get("character", "?")
+            bpm = s.get("perceptual", {}).get("tempo", 0)
+            print(f"    {s.get('new_name', '?'):50s} key={key:5s} bpm={bpm:5.0f} char={char}")
+
+
+if __name__ == "__main__":
+    main()
--- a/src/composer/init.py
+++ b/src/composer/init.py
@@ -0,0 +1,310 @@
+from __future__ import annotations
+import json
+import math
+from pathlib import Path
+from typing import Optional
+
+KNOWLEDGE_DIR = Path(__file__).parent.parent.parent / "knowledge"
+
+NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
+
+SCALE_INTERVALS = {
+    "major": [0, 2, 4, 5, 7, 9, 11],
+    "minor": [0, 2, 3, 5, 7, 8, 10],
+    "harmonic_minor": [0, 2, 3, 5, 7, 8, 11],
+    "melodic_minor": [0, 2, 3, 5, 7, 9, 11],
+    "dorian": [0, 2, 3, 5, 7, 9, 10],
+    "phrygian": [0, 1, 3, 5, 7, 8, 10],
+}
+
+CHORD_TYPES = {
+    "maj": [0, 4, 7],
+    "min": [0, 3, 7],
+    "dim": [0, 3, 6],
+    "aug": [0, 4, 8],
+    "7": [0, 4, 7, 10],
+    "m7": [0, 3, 7, 10],
+    "sus2": [0, 2, 7],
+    "sus4": [0, 5, 7],
+}
+
+
+def note_to_midi(note_str: str) -> int:
+    note_str = note_str.strip()
+    if len(note_str) == 1:
+        name = note_str[0]
+        octave = 4
+    elif len(note_str) == 2:
+        if note_str[1] == "#":
+            name = note_str[:2]
+            octave = 4
+        else:
+            name = note_str[0]
+            octave = int(note_str[1])
+    else:
+        name = note_str[:2] if note_str[1] == "#" else note_str[0]
+        octave = int(note_str[-1])
+
+    base = NOTE_NAMES.index(name)
+    return (octave + 1) * 12 + base
+
+
+def parse_chord_name(chord_str: str) -> tuple[int, list[int]]:
+    chord_str = chord_str.strip()
+    root = chord_str[0]
+    idx = 1
+    if len(chord_str) > 1 and chord_str[1] == "#":
+        root += "#"
+        idx = 2
+
+    suffix = chord_str[idx:]
+    if suffix == "m" or suffix == "min":
+        chord_type = "min"
+    elif suffix == "dim":
+        chord_type = "dim"
+    elif suffix == "7":
+        chord_type = "7"
+    elif suffix == "m7":
+        chord_type = "m7"
+    elif suffix == "sus2":
+        chord_type = "sus2"
+    elif suffix == "sus4":
+        chord_type = "sus4"
+    elif suffix == "":
+        chord_type = "maj"
+    else:
+        chord_type = "maj"
+
+    root_midi = note_to_midi(root + "4")
+    intervals = CHORD_TYPES.get(chord_type, [0, 4, 7])
+    return root_midi, intervals
+
+
+def generate_dembow(bars: int = 8, ppq: int = 96) -> list[dict]:
+    notes = []
+    for bar in range(bars):
+        offset = bar * 4.0
+        kick_positions = [0.0, 2.5]
+        snare_positions = [2.0, 4.0]
+        hihat_positions = [i * 0.5 for i in range(8)]
+
+        for p in kick_positions:
+            notes.append({"position": offset + p, "length": 0.25, "key": 36, "velocity": 110})
+        for p in snare_positions:
+            notes.append({"position": offset + p, "length": 0.15, "key": 38, "velocity": 105})
+        for p in hihat_positions:
+            notes.append({"position": offset + p, "length": 0.1, "key": 42, "velocity": 75})
+        for i in [1, 3, 5, 7]:
+            notes.append({
+                "position": offset + hihat_positions[i],
+                "length": 0.1,
+                "key": 46,
+                "velocity": 60,
+            })
+    return notes
+
+
+def generate_bass_808(
+    chord_progression: list[str],
+    beats_per_chord: int = 4,
+    octave: int = 2,
+    bars: int = 8,
+) -> list[dict]:
+    notes = []
+    pos = 0.0
+    total_beats = bars * 4
+    while pos < total_beats:
+        for chord_name in chord_progression:
+            root_midi, _ = parse_chord_name(chord_name)
+            bass_note = root_midi - (4 - octave) * 12
+            notes.append({
+                "position": pos,
+                "length": min(beats_per_chord - 0.1, total_beats - pos),
+                "key": bass_note,
+                "velocity": 100,
+            })
+            pos += beats_per_chord
+            if pos >= total_beats:
+                break
+    return notes
+
+
+def generate_piano_stabs(
+    chord_progression: list[str],
+    beats_per_chord: int = 4,
+    bars: int = 8,
+) -> list[dict]:
+    notes = []
+    pos = 0.0
+    total_beats = bars * 4
+    while pos < total_beats:
+        for chord_name in chord_progression:
+            root_midi, intervals = parse_chord_name(chord_name)
+            chord_notes = [root_midi + iv for iv in intervals]
+            for stab_pos in [0.5, 1.5, 2.5, 3.5]:
+                actual_pos = pos + stab_pos
+                if actual_pos >= total_beats:
+                    break
+                for cn in chord_notes:
+                    notes.append({
+                        "position": actual_pos,
+                        "length": 0.2,
+                        "key": cn,
+                        "velocity": 70,
+                    })
+            pos += beats_per_chord
+            if pos >= total_beats:
+                break
+    return notes
+
+
+def generate_lead_hook(
+    chord_progression: list[str],
+    beats_per_chord: int = 4,
+    bars: int = 8,
+    octave: int = 5,
+) -> list[dict]:
+    notes = []
+    pos = 0.0
+    total_beats = bars * 4
+
+    hook_patterns = [
+        [0, 0.5, 1.0, 2.0, 3.0],
+        [0, 1.0, 1.5, 2.0, 3.5],
+        [0, 0.25, 0.5, 2.0, 2.5, 3.0],
+    ]
+
+    pattern_idx = 0
+    while pos < total_beats:
+        for chord_name in chord_progression:
+            root_midi, intervals = parse_chord_name(chord_name)
+            scale_notes = [root_midi + iv for iv in [0, 2, 3, 5, 7, 8, 10]]
+            target_octave_notes = [n + (octave - 4) * 12 for n in scale_notes]
+
+            pattern = hook_patterns[pattern_idx % len(hook_patterns)]
+            for i, p in enumerate(pattern):
+                actual_pos = pos + p
+                if actual_pos >= total_beats:
+                    break
+                note_idx = i % len(target_octave_notes)
+                notes.append({
+                    "position": actual_pos,
+                    "length": 0.4 if i < len(pattern) - 1 else 0.8,
+                    "key": target_octave_notes[note_idx],
+                    "velocity": 90 if i % 2 == 0 else 75,
+                })
+            pos += beats_per_chord
+            pattern_idx += 1
+            if pos >= total_beats:
+                break
+    return notes
+
+
+def generate_pad(
+    chord_progression: list[str],
+    beats_per_chord: int = 4,
+    bars: int = 8,
+    octave: int = 4,
+) -> list[dict]:
+    notes = []
+    pos = 0.0
+    total_beats = bars * 4
+    while pos < total_beats:
+        for chord_name in chord_progression:
+            root_midi, intervals = parse_chord_name(chord_name)
+            chord_notes = [root_midi + (octave - 4) * 12 + iv for iv in intervals]
+            duration = min(beats_per_chord, total_beats - pos)
+            for cn in chord_notes:
+                notes.append({
+                    "position": pos,
+                    "length": duration,
+                    "key": cn,
+                    "velocity": 45,
+                })
+            pos += beats_per_chord
+            if pos >= total_beats:
+                break
+    return notes
+
+
+def generate_latin_perc(bars: int = 8) -> list[dict]:
+    notes = []
+    for bar in range(bars):
+        offset = bar * 4.0
+        shaker = [(i * 0.25) + 0.125 for i in range(16)]
+        for p in shaker:
+            notes.append({"position": offset + p, "length": 0.1, "key": 50, "velocity": 55})
+        congas = [0.0, 1.0, 2.0, 3.0]
+        for p in congas:
+            notes.append({"position": offset + p, "length": 0.2, "key": 54, "velocity": 65})
+        rim = [0.75, 2.75]
+        for p in rim:
+            notes.append({"position": offset + p, "length": 0.1, "key": 37, "velocity": 50})
+    return notes
+
+
+def compose_from_genre(
+    genre_path: str | Path,
+    custom_overrides: Optional[dict] = None,
+) -> dict:
+    with open(genre_path, "r", encoding="utf-8") as f:
+        genre = json.load(f)
+
+    if custom_overrides:
+        genre.update(custom_overrides)
+
+    bpm = genre["bpm"]["default"]
+    ppq = genre.get("ppq", 96)
+    key = genre["keys"][0]
+    progression = genre["chord_progressions"][0]["chords"]
+    beats_per_chord = genre["chord_progressions"][0].get("beats_per_chord", 4)
+    bars = genre["structure"]["sections"][1]["bars"]
+
+    composition = {
+        "meta": {
+            "genre": genre["genre"],
+            "era": genre.get("era", ""),
+            "bpm": bpm,
+            "ppq": ppq,
+            "key": key,
+            "chord_progression": progression,
+            "beats_per_chord": beats_per_chord,
+            "bars": bars,
+        },
+        "tracks": [],
+    }
+
+    for role_name, role_config in genre["roles"].items():
+        track = {
+            "role": role_name,
+            "description": role_config["description"],
+            "preferred_plugins": role_config["preferred_plugins"],
+            "mixer_slot": role_config.get("mixer_slot", 0),
+        }
+
+        if role_name == "drums":
+            track["notes"] = generate_dembow(bars, ppq)
+        elif role_name == "bass":
+            track["notes"] = generate_bass_808(
+                progression, beats_per_chord,
+                octave=role_config.get("octave", 2),
+                bars=bars,
+            )
+        elif role_name == "harmony":
+            track["notes"] = generate_piano_stabs(progression, beats_per_chord, bars)
+        elif role_name == "lead":
+            track["notes"] = generate_lead_hook(
+                progression, beats_per_chord, bars,
+                octave=role_config.get("octave", 5),
+            )
+        elif role_name == "pad":
+            track["notes"] = generate_pad(
+                progression, beats_per_chord, bars,
+                octave=role_config.get("octave", 4),
+            )
+        elif role_name == "perc":
+            track["notes"] = generate_latin_perc(bars)
+
+        composition["tracks"].append(track)
+
+    return composition
--- a/src/composer/melodic.py
+++ b/src/composer/melodic.py
@@ -0,0 +1,288 @@
+"""Melodic pattern generators for reggaeton production.
+
+All generators return list[dict] with format {pos, len, key, vel}.
+Designed to feed MelodicTrack notes in SongDefinition.
+"""
+
+# ---------------------------------------------------------------------------
+# Scale definitions
+# ---------------------------------------------------------------------------
+
+SCALES = {
+    "minor":     [0, 2, 3, 5, 7, 8, 10],   # natural minor
+    "major":     [0, 2, 4, 5, 7, 9, 11],
+    "phrygian":  [0, 1, 3, 5, 7, 8, 10],
+    "dorian":    [0, 2, 3, 5, 7, 9, 10],
+}
+
+ROOT_SEMITONE = {
+    "C": 0, "C#": 1, "Db": 1, "D": 2, "D#": 3, "Eb": 3,
+    "E": 4, "F": 5, "F#": 6, "Gb": 6, "G": 7, "G#": 8,
+    "Ab": 8, "A": 9, "A#": 10, "Bb": 10, "B": 11,
+}
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+def _parse_key(key_str: str) -> tuple[int, str]:
+    """Parse a key like 'Am', 'C#m', 'Dm', 'C' into (root_semitone, scale_name)."""
+    if key_str.endswith("m") and key_str != "m":
+        root_str = key_str[:-1]
+        scale_name = "minor"
+    else:
+        root_str = key_str
+        scale_name = "major"
+
+    root = ROOT_SEMITONE.get(root_str)
+    if root is None:
+        raise ValueError(f"Unknown root: {root_str}")
+    return root, scale_name
+
+
+def _get_scale_notes(root: int, scale: str, octave: int) -> list[int]:
+    """Return MIDI note numbers for all degrees of the scale in given octave."""
+    intervals = SCALES.get(scale, SCALES["major"])
+    return [root + octave * 12 + interval for interval in intervals]
+
+
+def _clamp_vel(v: int) -> int:
+    """Clamp velocity to valid MIDI range [1, 127]."""
+    return max(1, min(127, v))
+
+
+# ---------------------------------------------------------------------------
+# Bass: tresillo
+# ---------------------------------------------------------------------------
+
+def bass_tresillo(
+    key: str,
+    bars: int,
+    octave: int = 3,
+    velocity_mult: float = 1.0,
+) -> list[dict]:
+    """Reggaeton tresillo bass pattern.
+
+    6 notes per bar at positions: 0.0, 0.75, 1.5, 2.25, 3.0, 3.75
+    Root note on downbeats (0.0, 1.5, 3.0), fifth (7 semitones) on upbeats.
+    Velocity: 110 for downbeats, 85 for upbeats.
+    Default octave=3 gives root in MIDI range 45-52 (A3-E4), within 36-55.
+    """
+    root, scale = _parse_key(key)
+    scale_notes = _get_scale_notes(root, scale, octave)
+    root_note = scale_notes[0]          # degree 0
+    fifth_note = root_note + 7          # up a perfect fifth
+
+    notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        # Positions within the bar
+        positions = [0.0, 0.75, 1.5, 2.25, 3.0, 3.75]
+        for idx, pos in enumerate(positions):
+            if idx % 2 == 0:           # downbeats: root
+                key_note = root_note
+                vel = 110
+            else:                       # upbeats: fifth
+                key_note = fifth_note
+                vel = 85
+
+            vel = _clamp_vel(int(vel * velocity_mult))
+            notes.append({"pos": o + pos, "len": 0.25, "key": key_note, "vel": vel})
+
+    return notes
+
+
+# ---------------------------------------------------------------------------
+# Lead: hook
+# ---------------------------------------------------------------------------
+
+def lead_hook(
+    key: str,
+    bars: int,
+    octave: int = 5,
+    density: float = 0.6,
+    velocity_mult: float = 1.0,
+) -> list[dict]:
+    """Simple melodic hook over 4-8 bars.
+
+    Uses scalar degrees: [0, 2, 4, 2, 3, 1, 0, 2, 4, 5, 4, 2, 0]
+    Note durations: 0.5 or 1.0 beats.
+    density=1.0 → every slot filled; density=0.5 → half filled.
+    """
+    root, scale = _parse_key(key)
+    intervals = SCALES.get(scale, SCALES["major"])
+
+    # Map scale degrees to MIDI notes (extend to cover octave 5 and 6 for melody)
+    scale_notes_oct5 = _get_scale_notes(root, scale, octave)   # 7 notes
+    scale_notes_oct6 = _get_scale_notes(root, scale, octave + 1)
+
+    # Degree pattern (0-indexed scale degrees)
+    degrees = [0, 2, 4, 2, 3, 1, 0, 2, 4, 5, 4, 2, 0]
+
+    notes: list[dict] = []
+
+    # Step through the pattern at half-beat intervals
+    # density controls whether we actually place a note
+    step = max(1, round(1.0 / density)) if density > 0 else 1
+
+    pos = 0.0
+    degree_idx = 0
+    while pos < bars * 4.0:
+        slot = int(pos * 2)   # 0.5-beat slots
+        if slot % step == 0:
+            # Pick note alternating between octave 5 and 6 for contour
+            use_oct6 = (degree_idx // 2) % 3 == 0   # every few notes go higher
+            midi_note = scale_notes_oct6[degrees[degree_idx] % 7] \
+                if use_oct6 else scale_notes_oct5[degrees[degree_idx] % 7]
+
+            # Duration: 1.0 beat on strong beats (quarter), 0.5 elsewhere
+            is_strong = (slot % 4 == 0)
+            length = 1.0 if is_strong else 0.5
+
+            vel = 100 if is_strong else 80
+            vel = _clamp_vel(int(vel * velocity_mult))
+
+            notes.append({"pos": pos, "len": length, "key": midi_note, "vel": vel})
+
+            # Advance degree index
+            degree_idx = (degree_idx + 1) % len(degrees)
+            if is_strong:
+                pos += 1.0
+            else:
+                pos += 0.5
+        else:
+            pos += 0.5
+
+    return notes
+
+
+# ---------------------------------------------------------------------------
+# Chords: block chords
+# ---------------------------------------------------------------------------
+
+def chords_block(
+    key: str,
+    bars: int,
+    octave: int = 4,
+    velocity_mult: float = 1.0,
+) -> list[dict]:
+    """Blocked chords every 2 beats (half-bar).
+
+    Minor progression: i - VII - VI - VII  (degrees 0, 6, 5, 6 in natural minor)
+    Major progression: I - V - vi - IV    (degrees 0, 4, 5, 3 in major)
+    Each chord: root + third + fifth (3 notes stacked at same position).
+    """
+    root, scale = _parse_key(key)
+    scale_notes_oct4 = _get_scale_notes(root, scale, octave)
+
+    if scale == "minor":
+        # i - VII - VI - VII  (natural minor)
+        # VII = degree 6 (raised 7th = 10 semitones from root in minor)
+        # In natural minor: degrees 0,6,5,6
+        # We need to build chords: root, 3rd, 5th
+        chord_degrees = [
+            [0, 2, 4],    # i  — degrees 0, 2, 4 in minor
+            [6, 1, 3],    # VII — degree 6 wraps to next octave; 1=2nd, 3=4th
+            [5, 0, 2],    # VI — degree 5 wraps; 0=root of next octave
+            [6, 1, 3],    # VII (repeat)
+        ]
+        # For proper stacking, use only the first 7 scale degrees
+        # Chord VII in minor: root is degree 6 (10 semitones above)
+        # Build using absolute semitones: i = root+0,root+3,root+7
+        # VII = root+10, root+12 (=0 of next), root+15 (=3 of next)
+        pass  # We'll rebuild below
+
+    # Simpler approach: build chords using semitone intervals from root
+    if scale == "minor":
+        # i (0,3,7), VIIb (10,1,5), VI (8,11,2), VII (10,1,5)
+        chord_intervals = [
+            (0, 3, 7),    # i
+            (10, 1, 5),   # VII (raised 7th in harmonic minor: 10 semitones)
+            (8, 0, 4),    # VI
+            (10, 1, 5),   # VII
+        ]
+    else:
+        # I (0,4,7), V (7,11,2), vi (9,0,4), IV (5,9,0)
+        chord_intervals = [
+            (0, 4, 7),    # I
+            (7, 11, 2),   # V
+            (9, 0, 4),    # vi (9 = root+9)
+            (5, 9, 0),    # IV (5 = root+5)
+        ]
+
+    notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        chord_idx = b % 4
+        intervals = chord_intervals[chord_idx]
+
+        # Chord positions at half-bar: 0.0 and 2.0
+        chord_positions = [0.0, 2.0]
+        for cpos in chord_positions:
+            for interval in intervals:
+                midi_note = root + octave * 12 + interval
+                vel = 90
+                vel = _clamp_vel(int(vel * velocity_mult))
+                notes.append({
+                    "pos": o + cpos,
+                    "len": 1.75,   # almost 2 beats (leave gap)
+                    "key": midi_note,
+                    "vel": vel,
+                })
+
+    return notes
+
+
+# ---------------------------------------------------------------------------
+# Pad: sustain
+# ---------------------------------------------------------------------------
+
+def pad_sustain(
+    key: str,
+    bars: int,
+    octave: int = 4,
+    velocity_mult: float = 1.0,
+) -> list[dict]:
+    """Long sustained pad notes, one per bar.
+
+    Follows chord progression from chords_block.
+    Notes last 3.5 beats to avoid collision with next bar's note.
+    Soft velocity (65-75).
+    """
+    root, scale = _parse_key(key)
+
+    if scale == "minor":
+        chord_intervals = [
+            (0, 3, 7),
+            (10, 1, 5),
+            (8, 0, 4),
+            (10, 1, 5),
+        ]
+        root_notes_per_bar = [0, 10, 8, 10]   # root semitone offsets per bar
+    else:
+        chord_intervals = [
+            (0, 4, 7),
+            (7, 11, 2),
+            (9, 0, 4),
+            (5, 9, 0),
+        ]
+        root_notes_per_bar = [0, 7, 9, 5]
+
+    notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        cycle = b % 4
+        root_interval = root_notes_per_bar[cycle]
+        midi_note = root + octave * 12 + root_interval
+
+        vel = 70
+        vel = _clamp_vel(int(vel * velocity_mult))
+        notes.append({
+            "pos": o,
+            "len": 3.5,
+            "key": midi_note,
+            "vel": vel,
+        })
+
+    return notes
--- a/src/composer/rhythm.py
+++ b/src/composer/rhythm.py
@@ -0,0 +1,311 @@
+"""Reggaeton rhythm generators — pure functions returning note dicts per channel."""
+
+# ---------------------------------------------------------------------------
+# Channel constants — match SAMPLE_MAP in channel_skeleton.py
+# ---------------------------------------------------------------------------
+CH_P1 = 10   # perc1.wav
+CH_K  = 11   # kick.wav
+CH_S  = 12   # snare.wav
+CH_R  = 13   # rim.wav
+CH_P2 = 14   # perc2.wav
+CH_H  = 15   # hihat.wav
+CH_CL = 16   # clap.wav
+
+# Note dict format: {"pos": float, "len": float, "key": int, "vel": int}
+# pos  — in BEATS from start of bar 0 (bar 2 beat 3 → 2*4 + 2 = 10.0)
+# len  — in beats (0.25 = 16th note at 4/4)
+# key  — always 60 for drum samples (pitch irrelevant, sample just plays)
+# vel  — 1–127 after applying velocity_mult
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+def _clamp_vel(vel: int) -> int:
+    """Clamp velocity to valid MIDI range [1, 127]."""
+    return max(1, min(127, vel))
+
+
+def _apply_vel(base_vel: int, velocity_mult: float) -> int:
+    """Multiply base velocity by velocity_mult and clamp."""
+    return _clamp_vel(int(base_vel * velocity_mult))
+
+
+def _note(pos: float, length: float, vel: int) -> dict:
+    """Create a note dict with key=60."""
+    return {"pos": pos, "len": length, "key": 60, "vel": vel}
+
+
+# ---------------------------------------------------------------------------
+# Kick generators
+# ---------------------------------------------------------------------------
+
+def kick_main_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Dembow kick: beat 1 (hard, vel 115) + beat 2-and (the dembow hit, vel 105).
+
+    Positions per bar: 0.0 and 1.5 (the classic "one — &-two" reggaeton kick).
+    Returns {CH_K: [notes...]}.
+    """
+    notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        notes.append(_note(o,       0.25, _apply_vel(115, velocity_mult)))
+        notes.append(_note(o + 1.5, 0.25, _apply_vel(105, velocity_mult)))
+    return {CH_K: notes}
+
+
+def kick_sparse_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Sparse intro/outro kick: just beat 1 per bar (vel 110).
+
+    Returns {CH_K: [notes...]}.
+    """
+    notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        notes.append(_note(o, 0.25, _apply_vel(110, velocity_mult)))
+    return {CH_K: notes}
+
+
+def kick_outro_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Outro kick: dembow pattern with 0.75 baseline softness.
+
+    Delegates to kick_main_notes with an additional 0.75 velocity scaling.
+    Returns {CH_K: [notes...]}.
+    """
+    return kick_main_notes(bars, velocity_mult=velocity_mult * 0.75, density=density)
+
+
+# ---------------------------------------------------------------------------
+# Snare generators
+# ---------------------------------------------------------------------------
+
+def snare_verse_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Reggaeton snare: beats 2, 3, 3-and, 4 per bar.
+
+    Positions: 1.0 (vel 100), 2.0 (vel 95), 2.5 (vel 110), 3.0 (vel 90).
+    Returns {CH_S: [notes...]}.
+    """
+    _PATTERN = [(1.0, 100), (2.0, 95), (2.5, 110), (3.0, 90)]
+    notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        for p, v in _PATTERN:
+            notes.append(_note(o + p, 0.15, _apply_vel(v, velocity_mult)))
+    return {CH_S: notes}
+
+
+def snare_fill_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Busier snare with 16th-note fills: adds positions 2.25 and 3.75.
+
+    Verse base (1.0, 2.0, 2.5, 3.0) plus 16th fills at 2.25 and 3.75.
+    Returns {CH_S: [notes...]}.
+    """
+    _PATTERN = [
+        (1.0,  100),
+        (2.0,   95),
+        (2.25,  80),   # 16th fill
+        (2.5,  110),
+        (3.0,   90),
+        (3.75,  85),   # 16th fill
+    ]
+    notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        for p, v in _PATTERN:
+            notes.append(_note(o + p, 0.15, _apply_vel(v, velocity_mult)))
+    return {CH_S: notes}
+
+
+def snare_outro_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Softer outro snare (velocity_mult on top of 0.7 baseline).
+
+    Delegates to snare_verse_notes with an additional 0.7 velocity scaling.
+    Returns {CH_S: [notes...]}.
+    """
+    return snare_verse_notes(bars, velocity_mult=velocity_mult * 0.7, density=density)
+
+
+# ---------------------------------------------------------------------------
+# Hihat generators
+# ---------------------------------------------------------------------------
+
+def hihat_16th_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """16th-note hihat with three-tier accent mapping.
+
+    Accented on quarter notes (vel 85), medium on 8ths (vel 60), soft on
+    off-8ths (vel 40).  density=1.0 → all 16ths; density=0.5 → every other.
+    Returns {CH_H: [notes...]}.
+    """
+    notes: list[dict] = []
+    step = max(1, round(1.0 / density)) if density > 0 else 1
+    for b in range(bars):
+        o = b * 4.0
+        for i in range(0, 16, step):
+            beat_frac = i * 0.25  # position within bar in beats
+            if beat_frac % 1.0 == 0.0:       # quarter note position
+                base_vel = 85
+            elif beat_frac % 0.5 == 0.0:     # 8th note position
+                base_vel = 60
+            else:                             # 16th note position
+                base_vel = 40
+            notes.append(_note(o + beat_frac, 0.1, _apply_vel(base_vel, velocity_mult)))
+    return {CH_H: notes}
+
+
+def hihat_8th_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """8th-note hihat for intro/breakdown.
+
+    Accented on beats (vel 70), off-beats softer (vel 50).
+    Returns {CH_H: [notes...]}.
+    """
+    notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        for i in range(8):
+            base_vel = 70 if i % 2 == 0 else 50
+            notes.append(_note(o + i * 0.5, 0.1, _apply_vel(base_vel, velocity_mult)))
+    return {CH_H: notes}
+
+
+# ---------------------------------------------------------------------------
+# Clap generator
+# ---------------------------------------------------------------------------
+
+def clap_24_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Classic reggaeton clap: beats 2 and 4 → positions 1.0 and 3.0 per bar.
+
+    Hard clap (vel 120).
+    Returns {CH_CL: [notes...]}.
+    """
+    notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        notes.append(_note(o + 1.0, 0.15, _apply_vel(120, velocity_mult)))
+        notes.append(_note(o + 3.0, 0.15, _apply_vel(120, velocity_mult)))
+    return {CH_CL: notes}
+
+
+# ---------------------------------------------------------------------------
+# Percussion generators
+# ---------------------------------------------------------------------------
+
+def perc_combo_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Perc1 + Perc2 offbeat combo (tumba feel).
+
+    perc2 (CH_P2): positions 0.75 (vel 85) and 2.75 (vel 80).
+    perc1 (CH_P1): positions 1.5  (vel 70) and 3.5  (vel 65).
+    Returns {CH_P1: [...], CH_P2: [...]}.
+    """
+    p2_notes: list[dict] = []
+    p1_notes: list[dict] = []
+    for b in range(bars):
+        o = b * 4.0
+        p2_notes.append(_note(o + 0.75, 0.1, _apply_vel(85, velocity_mult)))
+        p2_notes.append(_note(o + 2.75, 0.1, _apply_vel(80, velocity_mult)))
+        p1_notes.append(_note(o + 1.5,  0.1, _apply_vel(70, velocity_mult)))
+        p1_notes.append(_note(o + 3.5,  0.1, _apply_vel(65, velocity_mult)))
+    return {CH_P1: p1_notes, CH_P2: p2_notes}
+
+
+def rim_build_notes(
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Rim roll that builds intensity across bars (4-bar cycle).
+
+    Bar N%4=0: 16th indices 0,2,8,14          (sparse opening)
+    Bar N%4=1: indices 0,2,4,8,10,14          (filling in)
+    Bar N%4=2: indices 0,2,4,6,8,10,12,14     (every other 16th)
+    Bar N%4=3: all 16 indices                  (full roll)
+
+    Velocity ramps: 50 → 65 → 80 → 100 across the 4-bar cycle.
+    Returns {CH_R: [notes...]}.
+    """
+    _PATTERNS = [
+        [0, 2, 8, 14],
+        [0, 2, 4, 8, 10, 14],
+        [0, 2, 4, 6, 8, 10, 12, 14],
+        list(range(16)),
+    ]
+    _BASE_VELS = [50, 65, 80, 100]
+
+    notes: list[dict] = []
+    for b in range(bars):
+        cycle = b % 4
+        o = b * 4.0
+        base_vel = _BASE_VELS[cycle]
+        vel = _apply_vel(base_vel, velocity_mult)
+        for idx in _PATTERNS[cycle]:
+            notes.append(_note(o + idx * 0.25, 0.1, vel))
+    return {CH_R: notes}
+
+
+# ---------------------------------------------------------------------------
+# Registry & dispatcher
+# ---------------------------------------------------------------------------
+
+GENERATORS: dict[str, callable] = {
+    "kick_main_notes":   kick_main_notes,
+    "kick_sparse_notes": kick_sparse_notes,
+    "kick_outro_notes":  kick_outro_notes,
+    "snare_verse_notes": snare_verse_notes,
+    "snare_fill_notes":  snare_fill_notes,
+    "snare_outro_notes": snare_outro_notes,
+    "hihat_16th_notes":  hihat_16th_notes,
+    "hihat_8th_notes":   hihat_8th_notes,
+    "clap_24_notes":     clap_24_notes,
+    "perc_combo_notes":  perc_combo_notes,
+    "rim_build_notes":   rim_build_notes,
+}
+
+
+def get_notes(
+    generator_name: str,
+    bars: int,
+    velocity_mult: float = 1.0,
+    density: float = 1.0,
+) -> dict[int, list[dict]]:
+    """Dispatch to the named generator. Raises KeyError if not found."""
+    gen = GENERATORS[generator_name]
+    return gen(bars, velocity_mult, density)
--- a/src/composer/variation.py
+++ b/src/composer/variation.py
@@ -0,0 +1,296 @@
+"""Variation engine — generates unique SongDefinition instances from a seed.
+
+Pure functions: no file I/O, no print statements. The only side effect is
+the deterministic randomness from ``random.Random(idx)`` — same seed always
+produces the same output.
+
+Usage::
+
+    from src.composer.variation import generate_variant, generate_batch
+
+    one_song = generate_variant(42)
+    fifty    = generate_batch(50)
+"""
+
+from __future__ import annotations
+
+import random
+from pathlib import Path
+from typing import Iterator
+
+from ..flp_builder.schema import (
+    ArrangementItemDef,
+    ArrangementTrack,
+    PatternDef,
+    SongDefinition,
+    SongMeta,
+)
+
+# ---------------------------------------------------------------------------
+# Musical constants
+# ---------------------------------------------------------------------------
+
+BPMS: list[int] = [88, 90, 92, 94, 95, 96, 98, 100, 102]
+
+KEYS_MINOR: list[str] = ["Am", "Dm", "Em", "Gm", "Bm", "Cm", "Fm"]
+KEYS_MAJOR: list[str] = ["C", "F", "G", "D", "A"]
+ALL_KEYS: list[str]   = KEYS_MINOR + KEYS_MAJOR
+
+PROGRESSIONS: list[str] = [
+    "i-VII-VI-VII",       # Am-G-F-G (classic)
+    "i-iv-VII-III",       # Am-Dm-G-C
+    "i-VI-III-VII",       # Am-F-C-G
+    "i-VII-III-VI",       # Am-G-C-F
+    "I-V-vi-IV",          # C-G-Am-F (major mode)
+    "I-IV-V-I",           # classic major
+    "i-III-VII-VI",       # minor dreamy
+    "i-v-iv-VII",         # dark minor
+    "I-vi-IV-V",          # 50s progression
+    "i-VII-VI-iv",        # modern dark
+    "i-VI-VII-i",         # loop
+    "i-iv-i-VII",         # minimal
+    "I-II-vi-V",          # modern major
+    "i-III-VI-VII",       # uplift
+    "vi-IV-I-V",          # axis
+]
+
+TITLE_PREFIXES: list[str] = [
+    "Zona", "Barrio", "Calle", "Noche", "Fuego",
+    "Ritmo", "Poder", "Flow", "Vibra", "Cuerpo",
+]
+TITLE_SUFFIXES: list[str] = [
+    "Caliente", "Oscura", "Sin Fin", "Total", "Real",
+    "Fatal", "Natural", "Del Party", "Con Flow", "Urbano",
+]
+
+# ---------------------------------------------------------------------------
+# Variation axis parameters
+# ---------------------------------------------------------------------------
+
+DENSITY_LEVELS:  list[float] = [0.6, 0.75, 1.0]
+VEL_MULT_LEVELS: list[float] = [0.85, 1.0, 1.1]
+SECTION_REPEATS: list[int]   = [1, 2]  # verse/chorus repeat multiplier
+
+SAMPLES_MAP: dict[str, str] = {
+    "kick":  "kick.wav",
+    "snare": "snare.wav",
+    "rim":   "rim.wav",
+    "clap":  "clap.wav",
+    "hihat": "hihat.wav",
+    "perc1": "perc1.wav",
+    "perc2": "perc2.wav",
+}
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_title(rng: random.Random) -> str:
+    """Combine a random prefix + suffix into a song title."""
+    return f"{rng.choice(TITLE_PREFIXES)} {rng.choice(TITLE_SUFFIXES)}"
+
+
+def _base_tracks() -> list[ArrangementTrack]:
+    """Fixed arrangement track layout — 5 tracks, same for all variants."""
+    return [
+        ArrangementTrack(index=1, name="Kick"),
+        ArrangementTrack(index=2, name="Snare"),
+        ArrangementTrack(index=3, name="Hihat"),
+        ArrangementTrack(index=4, name="Clap/Rim"),
+        ArrangementTrack(index=5, name="Perc"),
+    ]
+
+
+def _build_patterns(rng: random.Random) -> list[PatternDef]:
+    """Build 9 base patterns with per-pattern randomized density and velocity_mult.
+
+    Pattern ids, names, instruments, channels, and generators are fixed —
+    matching the reggaeton_template.json structure.  The variation axes
+    (density, velocity_mult) are randomized per pattern.
+    """
+    # (id, name, instrument, channel, bars, generator)
+    base: list[tuple[int, str, str, int, int, str]] = [
+        (1, "Kick Main",   "kick",  11, 8, "kick_main_notes"),
+        (2, "Snare Verse", "snare", 12, 8, "snare_verse_notes"),
+        (3, "Hihat 16th",  "hihat", 15, 8, "hihat_16th_notes"),
+        (4, "Clap 2-4",    "clap",  16, 8, "clap_24_notes"),
+        (5, "Perc Combo",  "perc2", 14, 8, "perc_combo_notes"),
+        (6, "Kick Sparse", "kick",  11, 8, "kick_sparse_notes"),
+        (7, "Hihat 8th",   "hihat", 15, 8, "hihat_8th_notes"),
+        (8, "Rim Build",   "rim",   13, 4, "rim_build_notes"),
+        (9, "Kick Outro",  "kick",  11, 8, "kick_outro_notes"),
+    ]
+    patterns: list[PatternDef] = []
+    for pid, name, inst, ch, bars, gen in base:
+        patterns.append(
+            PatternDef(
+                id=pid,
+                name=name,
+                instrument=inst,
+                channel=ch,
+                bars=bars,
+                generator=gen,
+                velocity_mult=rng.choice(VEL_MULT_LEVELS),
+                density=rng.choice(DENSITY_LEVELS),
+            )
+        )
+    return patterns
+
+
+def _build_arrangement(
+    rng: random.Random,
+    patterns: list[PatternDef],  # noqa: ARG001 – reserved for future use
+) -> list[ArrangementItemDef]:
+    """Build arrangement items with variable verse/chorus lengths and optional
+    breakdown.
+
+    Structure::
+
+        INTRO       (4 bars)  — kick_sparse + hihat_8th
+        VERSE       (8|16)    — kick_main + snare + hihat_16th + perc_combo
+        PRE-CHORUS  (4 bars)  — above + rim_build
+        CHORUS      (8|16)    — kick_main + snare + hihat_16th + clap_24 + perc_combo
+        [VERSE 2 + PRE-CHORUS 2 + CHORUS 2]
+        [BREAKDOWN  (8 bars, 50% chance)] — hihat_8th + kick_sparse
+        OUTRO       (8 bars)  — kick_outro + snare + hihat_16th + clap_24
+    """
+    items: list[ArrangementItemDef] = []
+    cursor: float = 0.0
+
+    verse_bars: int  = 8 * rng.choice(SECTION_REPEATS)
+    chorus_bars: int = 8 * rng.choice(SECTION_REPEATS)
+    has_breakdown: bool = rng.random() < 0.5
+
+    def add(pattern_id: int, track: int, length: float) -> None:
+        """Append one arrangement item at the current cursor (no advance)."""
+        items.append(
+            ArrangementItemDef(
+                pattern=pattern_id,
+                bar=cursor,
+                bars=length,
+                track=track,
+            )
+        )
+
+    # --- INTRO (4 bars) ---
+    add(6, 1, 4)   # kick_sparse on Kick
+    add(7, 3, 4)   # hihat_8th on Hihat
+    cursor += 4
+
+    # --- VERSE / PRE-CHORUS / CHORUS × 2 ---
+    for _ in range(2):
+        # VERSE
+        add(1, 1, verse_bars)   # kick_main
+        add(2, 2, verse_bars)   # snare_verse
+        add(3, 3, verse_bars)   # hihat_16th
+        add(5, 5, verse_bars)   # perc_combo
+        cursor += verse_bars
+
+        # PRE-CHORUS (4 bars)
+        add(1, 1, 4)   # kick_main
+        add(2, 2, 4)   # snare_verse
+        add(3, 3, 4)   # hihat_16th
+        add(5, 5, 4)   # perc_combo
+        add(8, 4, 4)   # rim_build on Clap/Rim
+        cursor += 4
+
+        # CHORUS
+        add(1, 1, chorus_bars)  # kick_main
+        add(2, 2, chorus_bars)  # snare_verse
+        add(3, 3, chorus_bars)  # hihat_16th
+        add(4, 4, chorus_bars)  # clap_24
+        add(5, 5, chorus_bars)  # perc_combo
+        cursor += chorus_bars
+
+    # --- BREAKDOWN (optional, 8 bars) ---
+    if has_breakdown:
+        add(6, 1, 8)   # kick_sparse
+        add(7, 3, 8)   # hihat_8th
+        cursor += 8
+
+    # --- OUTRO (8 bars) ---
+    add(9, 1, 8)   # kick_outro on Kick
+    add(2, 2, 8)   # snare_verse on Snare
+    add(3, 3, 8)   # hihat_16th on Hihat
+    add(4, 4, 8)   # clap_24 on Clap/Rim
+    cursor += 8
+
+    return items
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def generate_variant(idx: int) -> SongDefinition:
+    """Generate a unique ``SongDefinition`` from integer seed *idx*.
+
+    Uses ``random.Random(idx)`` for full reproducibility.
+    Same ``idx`` → same output, always.
+
+    Varies:
+    - BPM (from ``BPMS``)
+    - Key (from ``ALL_KEYS``)
+    - Progression name (from ``PROGRESSIONS``)
+    - Title (random prefix + suffix)
+    - Pattern density (per pattern, from ``DENSITY_LEVELS``)
+    - Pattern velocity_mult (per pattern, from ``VEL_MULT_LEVELS``)
+    - Verse/chorus bar count (8 or 16 bars)
+    - Whether breakdown is included (50 % chance)
+
+    Uniqueness key: ``(bpm, key, progression_name)`` — checked externally
+    by ``generate_batch``.
+    """
+    rng = random.Random(idx)
+
+    bpm:  int  = rng.choice(BPMS)
+    key:  str  = rng.choice(ALL_KEYS)
+    prog: str  = rng.choice(PROGRESSIONS)
+    title: str = _make_title(rng)
+
+    meta = SongMeta(bpm=bpm, key=key, title=title)
+
+    patterns: list[PatternDef]      = _build_patterns(rng)
+    tracks:  list[ArrangementTrack] = _base_tracks()
+    items:   list[ArrangementItemDef] = _build_arrangement(rng, patterns)
+
+    return SongDefinition(
+        meta=meta,
+        samples=SAMPLES_MAP.copy(),
+        patterns=patterns,
+        tracks=tracks,
+        items=items,
+        progression_name=prog,
+        section_template="standard",
+    )
+
+
+def generate_batch(
+    count: int = 50,
+    max_attempts: int = 1000,
+) -> list[SongDefinition]:
+    """Generate *count* unique songs (unique on bpm+key+progression triple).
+
+    Iterates seeds 0 … *max_attempts* until *count* unique songs are found.
+    Raises ``RuntimeError`` if not enough unique combos are found.
+    """
+    seen: set[tuple[int, str, str]] = set()
+    songs: list[SongDefinition] = []
+
+    for seed in range(max_attempts):
+        song = generate_variant(seed)
+        uniq = (song.meta.bpm, song.meta.key, song.progression_name)
+        if uniq not in seen:
+            seen.add(uniq)
+            songs.append(song)
+        if len(songs) >= count:
+            break
+
+    if len(songs) < count:
+        raise RuntimeError(
+            f"Only found {len(songs)} unique songs in {max_attempts} attempts"
+        )
+
+    return songs
--- a/src/flp_builder/init.py
+++ b/src/flp_builder/init.py
@@ -0,0 +1,12 @@
+from .writer import FLPWriter
+from .writer import FLPWriter
+from .project import FLPProject, Note, Channel, Pattern, Plugin
+
+__all__ = [
+    "FLPWriter",
+    "FLPProject",
+    "Note",
+    "Channel",
+    "Pattern",
+    "Plugin",
+]
--- a/src/flp_builder/arrangement.py
+++ b/src/flp_builder/arrangement.py
@@ -0,0 +1,222 @@
+"""FL Studio arrangement/playlist encoding.
+
+Encodes playlist items (ID233) and track data (ID238) into binary format
+matching FL Studio's internal structure. Extracted from the proven v15 builder
+(output/build_reggaeton_v15.py, lines 61-90).
+
+Arrangement block sequence:
+    ArrNew(99) → ArrName(241) → Flag36(36) → Playlist(233)
+    → TrackData(238)×N → ArrCurrent(100)
+"""
+
+from dataclasses import dataclass
+import struct
+
+from .events import encode_byte_event, encode_data_event, encode_word_event
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+PPQ_DEFAULT: int = 96
+MAX_TRACKS_DEFAULT: int = 500
+PATTERN_BASE: int = 20480
+
+# Arrangement event IDs (not yet in EventID enum — raw constants)
+EID_ARR_NEW = 99
+EID_ARR_CURRENT = 100
+EID_ARR_NAME = 241
+EID_FLAG_36 = 36
+EID_PLAYLIST = 233
+EID_TRACK_DATA = 238
+
+# TrackData template size (bytes), extracted from reference FLP
+TRACK_DATA_SIZE = 66
+
+
+# ---------------------------------------------------------------------------
+# ArrangementItem dataclass
+# ---------------------------------------------------------------------------
+
+@dataclass
+class ArrangementItem:
+    """A single playlist item placed on the arrangement timeline.
+
+    Args:
+        pattern_id:  Pattern number (1-based).
+        bar:         Start bar (0-based, fractional allowed).
+        num_bars:    Length in bars (fractional allowed).
+        track_index: Track row index (0-based).
+        muted:       Whether the item is muted in the playlist.
+    """
+
+    pattern_id: int       # pattern number (1-based)
+    bar: float            # start bar (0-based)
+    num_bars: float       # length in bars
+    track_index: int      # 0-based track index
+    muted: bool = False
+
+    def to_bytes(
+        self,
+        ppq: int = PPQ_DEFAULT,
+        max_tracks: int = MAX_TRACKS_DEFAULT,
+    ) -> bytes:
+        """Encode as a 32-byte playlist item (ID233 format).
+
+        Encoding rules (from reverse-engineered FL Studio format):
+            position     = int(bar × ppq × 4)          — ticks, truncated
+            pattern_base = 20480                        — constant
+            item_index   = 20480 + pattern_id
+            length       = int(num_bars × ppq × 4)     — ticks, truncated
+            track_rvidx  = (max_tracks - 1) - track_index  — REVERSED
+            flags        = 0x2040 if muted else 0x0040
+        """
+        position = int(self.bar * ppq * 4)
+        item_index = PATTERN_BASE + self.pattern_id
+        length = int(self.num_bars * ppq * 4)
+        track_rvidx = (max_tracks - 1) - self.track_index
+        flags = 0x2040 if self.muted else 0x0040
+
+        return struct.pack(
+            "<IHHIHH HH 4B ff",
+            position,
+            PATTERN_BASE,
+            item_index,
+            length,
+            track_rvidx,
+            0,          # group
+            0x0078,
+            flags,
+            64, 100, 128, 128,
+            -1.0, -1.0,
+        )
+
+
+# ---------------------------------------------------------------------------
+# TrackData helpers
+# ---------------------------------------------------------------------------
+
+def build_track_data_template(reference_flp_bytes: bytes) -> bytes:
+    """Extract the 66-byte TrackData template from a reference FLP.
+
+    Scans the raw FLP bytes for the first ID238 event and returns its
+    66-byte payload.  This template is then cloned and patched for each
+    of the *max_tracks* track data entries in the arrangement section.
+
+    Args:
+        reference_flp_bytes: Full contents of a valid .flp file.
+
+    Returns:
+        The 66-byte track-data template.
+
+    Raises:
+        ValueError: If no ID238 event of the expected size is found.
+    """
+    pos = 22  # skip FLhd (14 bytes) + FLdt header (8 bytes)
+
+    while pos < len(reference_flp_bytes):
+        ib = reference_flp_bytes[pos]
+        pos += 1
+
+        if ib < 64:
+            # Byte event: 1-byte value
+            pos += 1
+        elif ib < 128:
+            # Word event: 2-byte value
+            pos += 2
+        elif ib < 192:
+            # Dword event: 4-byte value
+            pos += 4
+        else:
+            # Data / text event: varint length + payload
+            size = 0
+            shift = 0
+            while True:
+                b = reference_flp_bytes[pos]
+                pos += 1
+                size |= (b & 0x7F) << shift
+                shift += 7
+                if not (b & 0x80):
+                    break
+
+            if ib == EID_TRACK_DATA and size == TRACK_DATA_SIZE:
+                return bytes(reference_flp_bytes[pos:pos + size])
+
+            pos += size
+
+    raise ValueError(
+        f"No ID{EID_TRACK_DATA} TrackData event ({TRACK_DATA_SIZE} bytes) "
+        "found in reference FLP"
+    )
+
+
+def encode_track_data(iid: int, enabled: int, template: bytes) -> bytes:
+    """Clone *template*, patch iid at byte 0 (uint32 LE) and enabled at byte 12.
+
+    Args:
+        iid:      Internal track ID (sequential from 1).
+        enabled:  0 = disabled, 1 = enabled.
+        template: 66-byte template extracted by :func:`build_track_data_template`.
+
+    Returns:
+        66-byte patched track data.
+    """
+    td = bytearray(template)
+    struct.pack_into("<I", td, 0, iid)
+    td[12] = enabled & 0xFF
+    return bytes(td)
+
+
+# ---------------------------------------------------------------------------
+# Full arrangement section builder
+# ---------------------------------------------------------------------------
+
+def build_arrangement_section(
+    items: list[ArrangementItem],
+    track_data_template: bytes,
+    ppq: int = PPQ_DEFAULT,
+    max_tracks: int = MAX_TRACKS_DEFAULT,
+) -> bytes:
+    """Build the full post-channel arrangement section bytes.
+
+    Produces the exact byte sequence FL Studio expects after the channel
+    events:
+
+        ArrNew(99) → ArrName(241) → Flag36(36) → Playlist(233)
+        → TrackData(238) × *max_tracks* → ArrCurrent(100)
+
+    Args:
+        items:               Playlist items to encode.
+        track_data_template: 66-byte template from :func:`build_track_data_template`.
+        ppq:                 Pulses-per-quarter-note (default 96).
+        max_tracks:          Total track-data entries to write (default 500).
+
+    Returns:
+        Complete arrangement section as raw bytes.
+    """
+    result = bytearray()
+
+    # 1. ArrNew — word event, value = 0
+    result.extend(encode_word_event(EID_ARR_NEW, 0))
+
+    # 2. ArrName — "Arrangement" as UTF-16-LE + null terminator
+    arr_name = "Arrangement".encode("utf-16-le") + b"\x00\x00"
+    result.extend(encode_data_event(EID_ARR_NAME, arr_name))
+
+    # 3. Flag36 — byte event, value = 0
+    result.extend(encode_byte_event(EID_FLAG_36, 0))
+
+    # 4. Playlist — data event, concatenation of all 32-byte items
+    pl_data = b"".join(item.to_bytes(ppq, max_tracks) for item in items)
+    result.extend(encode_data_event(EID_PLAYLIST, pl_data))
+
+    # 5. TrackData × max_tracks — first track (iid=1) disabled, rest enabled
+    for i in range(1, max_tracks + 1):
+        enabled = 0 if i == 1 else 1
+        td = encode_track_data(i, enabled, track_data_template)
+        result.extend(encode_data_event(EID_TRACK_DATA, td))
+
+    # 6. ArrCurrent — word event, value = 0
+    result.extend(encode_word_event(EID_ARR_CURRENT, 0))
+
+    return bytes(result)
--- a/src/flp_builder/builder.py
+++ b/src/flp_builder/builder.py
@@ -0,0 +1,382 @@
+"""JSON->FLP builder - converts SongDefinition to a valid FL Studio FLP file.
+
+Replicates the proven assembly logic from ``output/build_reggaeton_v15.py`` but
+driven entirely by a :class:`SongDefinition` object instead of hardcoded values.
+
+Assembly order (matches v15):
+    FLhd header + FLdt wrapper around:
+        header_events + pattern_events + channel_events + arrangement_events
+
+Usage::
+
+    builder = FLPBuilder()
+    flp_bytes = builder.build(song)
+    Path("out.flp").write_bytes(flp_bytes)
+"""
+
+import struct
+from pathlib import Path
+
+from .schema import SongDefinition, PatternDef, MelodicTrack
+from .skeleton import ChannelSkeletonLoader
+from .arrangement import ArrangementItem, build_arrangement_section, build_track_data_template
+from .events import (
+    EventID,
+    encode_text_event,
+    encode_word_event,
+    encode_data_event,
+    encode_notes_block,
+)
+from ..composer.rhythm import get_notes
+
+# ---------------------------------------------------------------------------
+# Default paths (relative to project root)
+# ---------------------------------------------------------------------------
+
+REF_FLP = Path(__file__).parents[2] / "my space ryt" / "my space ryt.flp"
+CH11_TMPL = Path(__file__).parents[2] / "output" / "ch11_kick_template.bin"
+SAMPLES = Path(__file__).parents[2] / "output" / "samples"
+
+
+# ---------------------------------------------------------------------------
+# Note format conversion
+# ---------------------------------------------------------------------------
+
+def _convert_rhythm_notes(notes: list[dict]) -> list[dict]:
+    """Convert rhythm.py note format to events.py format.
+
+    rhythm.py:  ``{"pos", "len", "key", "vel"}``
+    events.py:  ``{"position", "length", "key", "velocity"}``
+    """
+    return [
+        {"position": n["pos"], "length": n["len"], "key": n["key"], "velocity": n["vel"]}
+        for n in notes
+    ]
+
+
+def _convert_melodic_notes(notes: list) -> list[dict]:
+    """Convert MelodicNote (pos/len/key/vel) to events.py format.
+
+    MelodicNote:  ``{pos, len, key, vel}``
+    events.py:    ``{"position", "length", "key", "velocity"}``
+    """
+    return [
+        {"position": n.pos, "length": n.len, "key": n.key, "velocity": n.vel}
+        for n in notes
+    ]
+
+
+# ---------------------------------------------------------------------------
+# FLPBuilder
+# ---------------------------------------------------------------------------
+
+class FLPBuilder:
+    """Builds an FLP binary from a :class:`SongDefinition`.
+
+    Parameters
+    ----------
+    ref_flp:
+        Path to a reference FLP used for header events and channel skeleton.
+    ch11_template:
+        Path to the ch11_kick_template.bin for empty sampler channels.
+    samples_dir:
+        Directory containing .wav sample files.
+    """
+
+    def __init__(
+        self,
+        ref_flp: str | Path = REF_FLP,
+        ch11_template: str | Path = CH11_TMPL,
+        samples_dir: str | Path = SAMPLES,
+    ):
+        self._ref_flp = Path(ref_flp)
+        self._ch11 = Path(ch11_template)
+        self._samples = Path(samples_dir)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def build(self, song: SongDefinition) -> bytes:
+        """Convert *song* to raw FLP bytes.
+
+        Raises
+        ------
+        ValueError
+            If song validation fails or the reference FLP is malformed.
+        FileNotFoundError
+            If reference FLP or templates are missing.
+        """
+        # 1. Validate
+        errors = song.validate()
+        if errors:
+            raise ValueError(
+                "Song validation failed:\n  - " + "\n  - ".join(errors)
+            )
+
+        # 2. Read reference FLP
+        ref_bytes = self._ref_flp.read_bytes()
+        num_channels = struct.unpack("<H", ref_bytes[10:12])[0]
+
+        # 3. Build each section
+        header_bytes = self._build_header(song, ref_bytes)
+        pattern_bytes = self._build_all_patterns(song)
+
+        # 3b. Build melodic map and melodic pattern bytes
+        melodic_map: dict[int, tuple[str, str]] = {}
+        melodic_pattern_bytes = b""
+        if song.melodic_tracks:
+            for mt in song.melodic_tracks:
+                wav_dir = str(Path(mt.sample_path).parent)
+                wav_name = Path(mt.sample_path).name
+                melodic_map[mt.channel_index] = (wav_dir, wav_name)
+
+            # Assign pattern IDs after drum patterns (1-based)
+            drum_pattern_count = len(song.patterns)
+            for i, mt in enumerate(song.melodic_tracks):
+                pattern_id = drum_pattern_count + i + 1
+                melodic_pattern_bytes += self._build_melodic_pattern(
+                    mt, pattern_id, song.meta.ppq
+                )
+        else:
+            # No melodic tracks: melodic_map stays empty, same as before
+            pass
+
+        loader = ChannelSkeletonLoader(
+            str(self._ref_flp),
+            str(self._ch11),
+            str(self._samples),
+        )
+        channel_bytes = loader.load(song.samples, melodic_map=melodic_map)
+
+        track_data_template = build_track_data_template(ref_bytes)
+        arrangement_bytes = self._build_arrangement(song, track_data_template)
+
+        # 4. Assemble body: header + patterns + melodic_patterns + channels + arrangement
+        body = (
+            header_bytes
+            + pattern_bytes
+            + melodic_pattern_bytes
+            + channel_bytes
+            + arrangement_bytes
+        )
+
+        # 5. Wrap with FLhd + FLdt headers (matches v15 line 317-318)
+        flp = (
+            struct.pack("<4sIhHH", b"FLhd", 6, 0, num_channels, song.meta.ppq)
+            + b"FLdt"
+            + struct.pack("<I", len(body))
+            + body
+        )
+
+        return flp
+
+    # ------------------------------------------------------------------
+    # Header
+    # ------------------------------------------------------------------
+
+    def _build_header(self, song: SongDefinition, ref_bytes: bytes) -> bytes:
+        """Extract header events from reference FLP and patch with song.meta values.
+
+        The "header" is everything between offset 22 (after FLhd+FLdt chunk
+        headers) and the first ``PatNew`` event.  This includes version info,
+        tempo, time-signature, etc.  We patch the tempo (BPM) to match the
+        song definition.
+
+        This replicates v15 lines 133-141.
+        """
+        # Find first PatNew event
+        first_pat = self._find_first_event(ref_bytes, EventID.PatNew)
+        if first_pat is None:
+            raise ValueError("No PatNew event found in reference FLP")
+
+        # Extract header events (everything before first pattern)
+        header = bytearray(ref_bytes[22:first_pat])
+
+        # Patch BPM — Tempo event (ID 156) is a dword, value = BPM * 1000
+        p = 0
+        while p < len(header):
+            np, _, ib, _v, _vt = self._read_ev(bytes(header), p)
+            if ib == EventID.Tempo:
+                struct.pack_into("<I", header, p + 1, int(song.meta.bpm * 1000))
+                break
+            p = np
+
+        return bytes(header)
+
+    # ------------------------------------------------------------------
+    # Patterns
+    # ------------------------------------------------------------------
+
+    def _build_pattern_bytes(self, pattern: PatternDef, ppq: int) -> bytes:
+        """Build all FLP events for one pattern.
+
+        Sequence:
+            1. ``PatNew`` (word event) — value = pattern.id - 1 (0-based)
+            2. ``PatName`` (text event) — UTF-16-LE pattern name
+            3. ``PatNotes`` (data event) per channel from ``get_notes()``
+
+        Returns raw bytes for this pattern.
+        """
+        buf = bytearray()
+
+        # 1. PatNew — word event, 0-based index
+        buf += encode_word_event(EventID.PatNew, pattern.id - 1)
+
+        # 2. PatName — text event (UTF-16-LE + null terminator)
+        if pattern.name:
+            buf += encode_text_event(EventID.PatName, pattern.name)
+
+        # 3. Generate notes via rhythm.py dispatcher
+        notes_by_channel = get_notes(
+            pattern.generator,
+            pattern.bars,
+            pattern.velocity_mult,
+            pattern.density,
+        )
+
+        # 4. Encode notes for each channel
+        for ch_idx, raw_notes in notes_by_channel.items():
+            converted = _convert_rhythm_notes(raw_notes)
+            buf += encode_data_event(
+                EventID.PatNotes,
+                encode_notes_block(ch_idx, converted, ppq),
+            )
+
+        return bytes(buf)
+
+    def _build_all_patterns(self, song: SongDefinition) -> bytes:
+        """Build bytes for all patterns in *song.patterns*."""
+        buf = bytearray()
+        for pattern in song.patterns:
+            buf += self._build_pattern_bytes(pattern, song.meta.ppq)
+        return bytes(buf)
+
+    def _build_melodic_pattern(
+        self, mt: MelodicTrack, pattern_id: int, ppq: int
+    ) -> bytes:
+        """Build FLP events for one melodic track pattern.
+
+        Sequence:
+            1. ``PatNew`` (word event) — value = pattern_id - 1 (0-based)
+            2. ``PatName`` (text event) — UTF-16-LE with ``mt.role`` as name
+            3. ``PatNotes`` (data event) with notes for the melodic channel
+
+        Returns raw bytes for this melodic pattern.
+        """
+        buf = bytearray()
+
+        # 1. PatNew — word event, 0-based index
+        buf += encode_word_event(EventID.PatNew, pattern_id - 1)
+
+        # 2. PatName — text event (UTF-16-LE + null terminator)
+        if mt.role:
+            buf += encode_text_event(EventID.PatName, mt.role)
+
+        # 3. Convert MelodicNotes to events.py format and encode
+        converted = _convert_melodic_notes(mt.notes)
+        buf += encode_data_event(
+            EventID.PatNotes,
+            encode_notes_block(mt.channel_index, converted, ppq),
+        )
+
+        return bytes(buf)
+
+    # ------------------------------------------------------------------
+    # Arrangement
+    # ------------------------------------------------------------------
+
+    def _build_arrangement(
+        self, song: SongDefinition, track_data_template: bytes
+    ) -> bytes:
+        """Convert *song.items* to arrangement section bytes.
+
+        Each :class:`ArrangementItemDef` (1-based track) is converted to an
+        :class:`ArrangementItem` (0-based track_index) and fed to
+        :func:`build_arrangement_section`.
+        """
+        items = [
+            ArrangementItem(
+                pattern_id=item.pattern,
+                bar=item.bar,
+                num_bars=item.bars,
+                track_index=item.track - 1,  # 1-based -> 0-based
+                muted=item.muted,
+            )
+            for item in song.items
+        ]
+
+        # Add melodic track items after drum items
+        if song.melodic_tracks:
+            drum_pattern_count = len(song.patterns)
+            # Determine starting track index (after drum tracks)
+            max_drum_track = max((item.track for item in song.items), default=1)
+            for i, mt in enumerate(song.melodic_tracks):
+                pattern_id = drum_pattern_count + i + 1
+                track_index = max_drum_track + i  # 0-based, after drum tracks
+                items.append(
+                    ArrangementItem(
+                        pattern_id=pattern_id,
+                        bar=0,
+                        num_bars=4,  # default 4 bars
+                        track_index=track_index,
+                        muted=False,
+                    )
+                )
+
+        return build_arrangement_section(
+            items,
+            track_data_template,
+            ppq=song.meta.ppq,
+        )
+
+    # ------------------------------------------------------------------
+    # Event parsing helpers (minimal, for header scanning)
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _read_ev(data: bytes, pos: int) -> tuple:
+        """Read one FLP event from *data* starting at *pos*.
+
+        Returns ``(next_pos, start, event_id, value, value_type)``.
+        """
+        start = pos
+        ib = data[pos]
+        pos += 1
+
+        if ib < 64:
+            # Byte event: 1 byte ID + 1 byte value
+            return pos + 1, start, ib, data[start + 1], "byte"
+        elif ib < 128:
+            # Word event: 1 byte ID + 2 byte value
+            return pos + 2, start, ib, struct.unpack("<H", data[pos : pos + 2])[0], "word"
+        elif ib < 192:
+            # Dword event: 1 byte ID + 4 byte value
+            return pos + 4, start, ib, struct.unpack("<I", data[pos : pos + 4])[0], "dword"
+        else:
+            # Data/text event: 1 byte ID + varint size + payload
+            sz = 0
+            sh = 0
+            while True:
+                b = data[pos]
+                pos += 1
+                sz |= (b & 0x7F) << sh
+                sh += 7
+                if not (b & 0x80):
+                    break
+            return pos + sz, start, ib, data[pos : pos + sz], "data"
+
+    @classmethod
+    def _find_first_event(cls, data: bytes, event_id: int) -> int | None:
+        """Find the byte offset of the first occurrence of *event_id*.
+
+        Starts scanning at offset 22 (past FLhd + FLdt chunk headers).
+        Returns ``None`` if the event is not found.
+        """
+        pos = 22
+        while pos < len(data):
+            np, start, ib, _val, _vt = cls._read_ev(data, pos)
+            if ib == event_id:
+                return start
+            pos = np
+        return None
--- a/src/flp_builder/events.py
+++ b/src/flp_builder/events.py
@@ -0,0 +1,225 @@
+import struct
+from enum import IntEnum
+
+
+class EventID(IntEnum):
+    WORD = 64
+    DWORD = 128
+    TEXT = 192
+    DATA = 208
+
+    LoopActive = 9
+    ShowInfo = 10
+    Volume = 12
+    PanLaw = 23
+    Licensed = 28
+    TempoCoarse = 66
+    Pitch = 80
+    TempoFine = 93
+    CurGroupId = 146
+    Tempo = 156
+    FLBuild = 159
+    Title = 194
+    Comments = 195
+    Url = 197
+    RTFComments = 198
+    FLVersion = 199
+    Licensee = 200
+    DataPath = 202
+    Genre = 206
+    Artists = 207
+    Timestamp = 237
+
+    ChIsEnabled = 0
+    ChVolByte = 2
+    ChPanByte = 3
+    ChZipped = 15
+    ChType = 21
+    ChRoutedTo = 22
+    ChIsLocked = 32
+    ChNew = 64
+    ChFreqTilt = 69
+    ChFXFlags = 70
+    ChCutoff = 71
+    ChVolWord = 72
+    ChPanWord = 73
+    ChPreamp = 74
+    ChFadeOut = 75
+    ChFadeIn = 76
+    ChResonance = 83
+    ChStereoDelay = 85
+    ChPogo = 86
+    ChTimeShift = 89
+    ChChildren = 94
+    ChSwing = 97
+    ChRingMod = 131
+    ChCutGroup = 132
+    ChRootNote = 135
+    ChDelayModXY = 138
+    ChReverb = 139
+    ChStretchTime = 140
+    ChFineTune = 142
+    ChSamplerFlags = 143
+    ChLayerFlags = 144
+    ChGroupNum = 145
+    ChAUSampleRate = 153
+    ChName = 192
+    ChSamplePath = 196
+    ChDelay = 209
+    ChParameters = 215
+    ChEnvelopeLFO = 218
+    ChLevels = 219
+    ChPolyphony = 221
+    ChTracking = 228
+    ChLevelAdjusts = 229
+    ChAutomation = 234
+
+    PatLooped = 26
+    PatNew = 65
+    PatColor = 150
+    PatName = 193
+    PatChannelIID = 160
+    PatLength = 164
+    PatControllers = 223
+    PatNotes = 224
+
+    PluginColor = 128
+    PluginIcon = 155
+    PluginInternalName = 201
+    PluginName = 203
+    PluginWrapper = 212
+    PluginData = 213
+
+    MixerAPDC = 29
+    MixerParams = 225
+
+
+def encode_varint(value: int) -> bytes:
+    result = bytearray()
+    while True:
+        byte = value & 0x7F
+        value >>= 7
+        if value:
+            byte |= 0x80
+        result.append(byte)
+        if not value:
+            break
+    return bytes(result)
+
+
+def encode_text(text: str, utf16: bool = True) -> bytes:
+    if utf16:
+        return text.encode("utf-16-le") + b"\x00\x00"
+    return text.encode("ascii") + b"\x00"
+
+
+def encode_byte_event(id_: int, value: int) -> bytes:
+    return bytes([id_, value & 0xFF])
+
+
+def encode_word_event(id_: int, value: int) -> bytes:
+    return bytes([id_]) + struct.pack("<H", value)
+
+
+def encode_dword_event(id_: int, value: int) -> bytes:
+    return bytes([id_]) + struct.pack("<I", value)
+
+
+def encode_text_event(id_: int, text: str) -> bytes:
+    data = encode_text(text)
+    return bytes([id_]) + encode_varint(len(data)) + data
+
+
+def encode_data_event(id_: int, data: bytes) -> bytes:
+    return bytes([id_]) + encode_varint(len(data)) + data
+
+
+def encode_note_24(
+    position: int,
+    flags: int,
+    rack_channel: int,
+    length: int,
+    key: int,
+    group: int,
+    fine_pitch: int,
+    release: int,
+    midi_channel: int,
+    pan: int,
+    velocity: int,
+    mod_x: int,
+    mod_y: int,
+) -> bytes:
+    """Encode a single note in FL Studio's 24-byte format.
+
+    Format (24 bytes, all absolute values):
+      position:     uint32  (4) - absolute position in PPQ ticks
+      flags:        uint16  (2) - note flags (0x4000 = standard note)
+      rack_channel: uint16  (2) - channel rack index
+      length:       uint32  (4) - duration in PPQ ticks
+      key:          uint16  (2) - MIDI note number (0-127)
+      group:        uint16  (2) - note group
+      fine_pitch:   uint8   (1) - fine pitch (0x78 = 120 = no detune)
+      _u1:          uint8   (1) - unknown (0x40)
+      release:      uint8   (1) - release value
+      midi_channel: uint8   (1) - MIDI channel
+      pan:          int8    (1) - stereo pan (64 = center)
+      velocity:     uint8   (1) - note velocity
+      mod_x:        uint8   (1) - modulation X (128 = center)
+      mod_y:        uint8   (1) - modulation Y (128 = center)
+    """
+    return struct.pack(
+        "<IHHIHHBBBBBBBB",
+        position,
+        flags,
+        rack_channel,
+        length,
+        key,
+        group,
+        fine_pitch,
+        0x40,  # unknown byte, always 0x40 in observed data
+        release,
+        midi_channel,
+        pan,
+        velocity,
+        mod_x,
+        mod_y,
+    )
+
+
+def encode_notes_block(
+    channel_index: int,
+    notes: list[dict],
+    ppq: int = 96,
+) -> bytes:
+    """Encode all notes for a pattern as raw note data (no header).
+
+    FL Studio stores notes as a flat array of 24-byte structs.
+    No header or count prefix needed - the event size determines count.
+    """
+    note_data = bytearray()
+
+    for note in notes:
+        pos = int(note.get("position", 0) * ppq)
+        length = int(note.get("length", 1) * ppq)
+        key = note.get("key", 60)
+        velocity = note.get("velocity", 100)
+        rack_channel = note.get("rack_channel", channel_index)
+
+        note_bytes = encode_note_24(
+            position=pos,
+            flags=0x4000,
+            rack_channel=rack_channel,
+            length=max(length, 1),
+            key=key & 0x7F,
+            group=0,
+            fine_pitch=120,
+            release=64,
+            midi_channel=0,
+            pan=64,
+            velocity=velocity & 0x7F,
+            mod_x=128,
+            mod_y=128,
+        )
+        note_data.extend(note_bytes)
+
+    return bytes(note_data)
--- a/src/flp_builder/project.py
+++ b/src/flp_builder/project.py
@@ -0,0 +1,134 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+@dataclass
+class Note:
+    position: float
+    length: float
+    key: int
+    velocity: int = 100
+    fine_pitch: int = 0
+    pan: int = 0
+    midi_channel: int = 0
+    slide: bool = False
+    release: int = 0
+    mod_x: int = 0
+    mod_y: int = 0
+    group: int = 0
+
+    def to_dict(self) -> dict:
+        return {
+            "position": self.position,
+            "length": self.length,
+            "key": self.key,
+            "velocity": self.velocity,
+            "fine_pitch": self.fine_pitch,
+            "pan": self.pan,
+            "midi_channel": self.midi_channel,
+            "slide": self.slide,
+            "release": self.release,
+            "mod_x": self.mod_x,
+            "mod_y": self.mod_y,
+            "group": self.group,
+        }
+
+
+@dataclass
+class Pattern:
+    name: str = ""
+    index: int = 0
+    notes: dict[int, list[Note]] = field(default_factory=dict)
+    color: int = 0
+    length: int = 0
+
+    def add_note(self, channel_index: int, note: Note):
+        if channel_index not in self.notes:
+            self.notes[channel_index] = []
+        self.notes[channel_index].append(note)
+
+
+@dataclass
+class Plugin:
+    internal_name: str = ""
+    display_name: str = ""
+    plugin_data: Optional[bytes] = None
+    color: int = 0
+    icon: int = 0
+
+
+@dataclass
+class Channel:
+    name: str = ""
+    index: int = 0
+    enabled: bool = True
+    volume: int = 256
+    pan: int = 0
+    plugin: Optional[Plugin] = None
+    mixer_track: int = 0
+    color: int = 0
+    root_note: int = 60
+    channel_type: int = 0
+
+    FL_TYPE_GENERATOR = 2
+    FL_TYPE_SAMPLER = 0
+
+
+@dataclass
+class MixerTrack:
+    name: str = ""
+    index: int = 0
+    volume: float = 1.0
+    pan: float = 0.0
+    muted: bool = False
+    effects: list[Plugin] = field(default_factory=list)
+
+
+@dataclass
+class FLPProject:
+    tempo: float = 140.0
+    title: str = ""
+    genre: str = ""
+    artists: str = ""
+    comments: str = ""
+    fl_version: str = "24.7.1.73"
+    ppq: int = 96
+    channels: list[Channel] = field(default_factory=list)
+    patterns: list[Pattern] = field(default_factory=list)
+    mixer_tracks: list[MixerTrack] = field(default_factory=list)
+
+    def add_channel(
+        self,
+        name: str,
+        plugin_internal_name: str = "",
+        plugin_display_name: str = "",
+        plugin_data: Optional[bytes] = None,
+        mixer_track: int = -1,
+        channel_type: int = 2,
+        volume: int = 256,
+    ) -> Channel:
+        idx = len(self.channels)
+        plugin = None
+        if plugin_internal_name:
+            plugin = Plugin(
+                internal_name=plugin_internal_name,
+                display_name=plugin_display_name or plugin_internal_name,
+                plugin_data=plugin_data,
+            )
+        ch = Channel(
+            name=name,
+            index=idx,
+            plugin=plugin,
+            mixer_track=mixer_track if mixer_track >= 0 else idx,
+            channel_type=channel_type,
+            volume=volume,
+        )
+        self.channels.append(ch)
+        return ch
+
+    def add_pattern(self, name: str = "") -> Pattern:
+        idx = len(self.patterns) + 1
+        pat = Pattern(name=name, index=idx)
+        self.patterns.append(pat)
+        return pat
--- a/src/flp_builder/schema.py
+++ b/src/flp_builder/schema.py
@@ -0,0 +1,395 @@
+"""Song definition schema for FL Studio FLP generation.
+
+Provides the JSON contract that decouples song composition from FLP rendering.
+A SongDefinition is the single source of truth for one ``.flp`` file.
+
+Usage::
+
+    song = SongDefinition.load_file("knowledge/songs/reggaeton_template.json")
+    errors = song.validate()
+    json_str = song.to_json()
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any
+
+
+# ---------------------------------------------------------------------------
+# Key validation pattern: A-G, optional flat/sharp, optional minor 'm'
+# ---------------------------------------------------------------------------
+_KEY_RE = re.compile(r"^[A-G][b#]?m?$")
+
+# Allowed top-level keys in the JSON document
+_TOP_LEVEL_KEYS = frozenset({
+    "meta", "samples", "patterns", "tracks", "items",
+    "melodic_tracks", "progression_name", "section_template",
+})
+
+# Allowed keys in nested objects
+_META_KEYS = frozenset({
+    "bpm", "key", "title", "ppq", "time_sig_num", "time_sig_den",
+})
+_PATTERN_KEYS = frozenset({
+    "id", "name", "instrument", "channel", "bars", "generator",
+    "velocity_mult", "density",
+})
+_TRACK_KEYS = frozenset({"index", "name"})
+_ITEM_KEYS = frozenset({"pattern", "bar", "bars", "track", "muted"})
+
+
+# ---------------------------------------------------------------------------
+# Dataclasses
+# ---------------------------------------------------------------------------
+
+@dataclass
+class SongMeta:
+    """Song metadata — tempo, key, time signature."""
+
+    bpm: float                # 20–999
+    key: str                  # e.g. "Am", "Dm", "Gm"
+    title: str                # song title
+    ppq: int = 96             # ticks per quarter note
+    time_sig_num: int = 4
+    time_sig_den: int = 4
+
+
+@dataclass
+class PatternNote:
+    """A single note within a pattern (used when embedding notes directly)."""
+
+    pos: float    # beat position (0.0 = beat 1 of bar)
+    len: float    # duration in beats
+    key: int      # MIDI note (60 = C4)
+    vel: int      # velocity 0–127
+
+
+@dataclass
+class PatternDef:
+    """Pattern definition — recipe for generating note data.
+
+    The ``generator`` field names a function in ``composer/rhythm.py``
+    that produces the actual MIDI notes for this pattern.
+    """
+
+    id: int                                        # pattern number (1-based)
+    name: str                                      # human label
+    instrument: str                                # "kick", "snare", "hihat", etc.
+    channel: int                                   # channel rack index (10–16)
+    bars: int                                      # pattern length in bars
+    generator: str                                 # rhythm.py function name
+    velocity_mult: float = 1.0                     # scales all velocities
+    density: float = 1.0                           # 0.5=sparse, 1.0=full
+
+
+@dataclass
+class ArrangementTrack:
+    """A track row in the FL Studio playlist / arrangement."""
+
+    index: int    # 1-based track index in arrangement
+    name: str     # display name
+
+
+@dataclass
+class ArrangementItemDef:
+    """Placement of a pattern on the arrangement timeline."""
+
+    pattern: int        # pattern id
+    bar: float          # start bar (0-based)
+    bars: float         # duration in bars
+    track: int          # track index (1-based, must exist in tracks[])
+    muted: bool = False
+
+
+@dataclass
+class MelodicNote:
+    """A single note in a melodic track. Unified format: {pos, len, key, vel}."""
+
+    pos: float    # beat position (0.0 = beat 1 of bar)
+    len: float    # duration in beats
+    key: int      # MIDI note (60 = C4)
+    vel: int      # velocity 0–127
+
+
+@dataclass
+class MelodicTrack:
+    """A melodic track referencing an audio sample with MIDI note triggers.
+
+    The sample is loaded into a sampler channel and notes trigger playback.
+    """
+
+    role: str                  # "bass", "lead", "pad", "pluck", etc.
+    sample_path: str           # absolute path to .wav file
+    notes: list[MelodicNote]   # note events
+    channel_index: int         # FL Studio channel (17+ for melodic)
+    volume: float = 0.85       # 0.0–1.0
+    pan: float = 0.0           # -1.0 to 1.0
+
+
+@dataclass
+class SongDefinition:
+    """Complete song definition — the single source of truth for one .flp.
+
+    Serialization round-trips through ``to_json()`` / ``from_json()``.
+    Use ``validate()`` to check constraints before rendering.
+    """
+
+    meta: SongMeta
+    samples: dict[str, str]                # {"kick": "kick.wav", ...}
+    patterns: list[PatternDef]
+    tracks: list[ArrangementTrack]
+    items: list[ArrangementItemDef]
+    melodic_tracks: list[MelodicTrack] = field(default_factory=list)
+
+    # Optional metadata for variation engine
+    progression_name: str = ""
+    section_template: str = "standard"
+
+    # ------------------------------------------------------------------
+    # Validation
+    # ------------------------------------------------------------------
+
+    def validate(self) -> list[str]:
+        """Return list of validation errors (empty list = valid).
+
+        Checks:
+            1. meta.bpm in 20–999
+            2. meta.key matches ``^[A-G][b#]?m?$``
+            3. meta.ppq == 96
+            4. All pattern ``id`` values are unique
+            5. All ``item.pattern`` reference an existing pattern id
+            6. All ``item.track`` reference an existing track index
+        """
+        errors: list[str] = []
+
+        # 1. BPM range
+        if not (20 <= self.meta.bpm <= 999):
+            errors.append(
+                f"meta.bpm must be 20–999, got {self.meta.bpm}"
+            )
+
+        # 2. Key format
+        if not _KEY_RE.match(self.meta.key):
+            errors.append(
+                f"meta.key must match ^[A-G][b#]?m?$, got '{self.meta.key}'"
+            )
+
+        # 3. PPQ
+        if self.meta.ppq != 96:
+            errors.append(
+                f"meta.ppq must be 96, got {self.meta.ppq}"
+            )
+
+        # 4. Unique pattern ids
+        pattern_ids = [p.id for p in self.patterns]
+        seen: set[int] = set()
+        for pid in pattern_ids:
+            if pid in seen:
+                errors.append(f"Duplicate pattern id: {pid}")
+            seen.add(pid)
+
+        valid_pattern_ids = set(pattern_ids)
+
+        # 5. All items reference valid pattern id
+        for i, item in enumerate(self.items):
+            if item.pattern not in valid_pattern_ids:
+                errors.append(
+                    f"items[{i}].pattern={item.pattern} does not reference "
+                    f"an existing pattern id"
+                )
+
+        # 6. All items reference valid track index
+        valid_track_indices = {t.index for t in self.tracks}
+        for i, item in enumerate(self.items):
+            if item.track not in valid_track_indices:
+                errors.append(
+                    f"items[{i}].track={item.track} does not reference "
+                    f"an existing track index"
+                )
+
+        return errors
+
+    # ------------------------------------------------------------------
+    # Serialization
+    # ------------------------------------------------------------------
+
+    def to_json(self, indent: int = 2) -> str:
+        """Serialize to a JSON string."""
+        return json.dumps(asdict(self), indent=indent, ensure_ascii=False)
+
+    @classmethod
+    def from_json(cls, data: str | dict) -> SongDefinition:
+        """Deserialize from a JSON string or dict.
+
+        Raises:
+            ValueError: On unknown keys, missing fields, or validation errors.
+        """
+        if isinstance(data, str):
+            raw = json.loads(data)
+        else:
+            raw = data
+
+        if not isinstance(raw, dict):
+            raise ValueError(f"Expected dict, got {type(raw).__name__}")
+
+        # Reject unknown top-level keys
+        unknown = set(raw.keys()) - _TOP_LEVEL_KEYS
+        if unknown:
+            raise ValueError(f"Unknown top-level keys: {sorted(unknown)}")
+
+        # --- meta ---
+        meta_raw = raw.get("meta")
+        if not isinstance(meta_raw, dict):
+            raise ValueError("Missing or invalid 'meta' object")
+
+        unknown_meta = set(meta_raw.keys()) - _META_KEYS
+        if unknown_meta:
+            raise ValueError(f"Unknown meta keys: {sorted(unknown_meta)}")
+
+        try:
+            meta = SongMeta(
+                bpm=float(meta_raw["bpm"]),
+                key=str(meta_raw["key"]),
+                title=str(meta_raw.get("title", "")),
+                ppq=int(meta_raw.get("ppq", 96)),
+                time_sig_num=int(meta_raw.get("time_sig_num", 4)),
+                time_sig_den=int(meta_raw.get("time_sig_den", 4)),
+            )
+        except KeyError as exc:
+            raise ValueError(f"Missing required meta field: {exc}") from exc
+
+        # --- samples ---
+        samples = raw.get("samples")
+        if not isinstance(samples, dict):
+            raise ValueError("Missing or invalid 'samples' dict")
+
+        # --- patterns ---
+        patterns_raw = raw.get("patterns")
+        if not isinstance(patterns_raw, list):
+            raise ValueError("Missing or invalid 'patterns' list")
+
+        patterns: list[PatternDef] = []
+        for idx, p in enumerate(patterns_raw):
+            if not isinstance(p, dict):
+                raise ValueError(f"patterns[{idx}] must be a dict")
+            unknown_p = set(p.keys()) - _PATTERN_KEYS
+            if unknown_p:
+                raise ValueError(
+                    f"patterns[{idx}] unknown keys: {sorted(unknown_p)}"
+                )
+            try:
+                patterns.append(PatternDef(
+                    id=int(p["id"]),
+                    name=str(p["name"]),
+                    instrument=str(p["instrument"]),
+                    channel=int(p["channel"]),
+                    bars=int(p["bars"]),
+                    generator=str(p["generator"]),
+                    velocity_mult=float(p.get("velocity_mult", 1.0)),
+                    density=float(p.get("density", 1.0)),
+                ))
+            except KeyError as exc:
+                raise ValueError(
+                    f"patterns[{idx}] missing required field: {exc}"
+                ) from exc
+
+        # --- tracks ---
+        tracks_raw = raw.get("tracks")
+        if not isinstance(tracks_raw, list):
+            raise ValueError("Missing or invalid 'tracks' list")
+
+        tracks: list[ArrangementTrack] = []
+        for idx, t in enumerate(tracks_raw):
+            if not isinstance(t, dict):
+                raise ValueError(f"tracks[{idx}] must be a dict")
+            unknown_t = set(t.keys()) - _TRACK_KEYS
+            if unknown_t:
+                raise ValueError(
+                    f"tracks[{idx}] unknown keys: {sorted(unknown_t)}"
+                )
+            try:
+                tracks.append(ArrangementTrack(
+                    index=int(t["index"]),
+                    name=str(t["name"]),
+                ))
+            except KeyError as exc:
+                raise ValueError(
+                    f"tracks[{idx}] missing required field: {exc}"
+                ) from exc
+
+        # --- items ---
+        items_raw = raw.get("items")
+        if not isinstance(items_raw, list):
+            raise ValueError("Missing or invalid 'items' list")
+
+        items: list[ArrangementItemDef] = []
+        for idx, it in enumerate(items_raw):
+            if not isinstance(it, dict):
+                raise ValueError(f"items[{idx}] must be a dict")
+            unknown_it = set(it.keys()) - _ITEM_KEYS
+            if unknown_it:
+                raise ValueError(
+                    f"items[{idx}] unknown keys: {sorted(unknown_it)}"
+                )
+            try:
+                items.append(ArrangementItemDef(
+                    pattern=int(it["pattern"]),
+                    bar=float(it["bar"]),
+                    bars=float(it["bars"]),
+                    track=int(it["track"]),
+                    muted=bool(it.get("muted", False)),
+                ))
+            except KeyError as exc:
+                raise ValueError(
+                    f"items[{idx}] missing required field: {exc}"
+                ) from exc
+
+        song = cls(
+            meta=meta,
+            samples=samples,
+            patterns=patterns,
+            tracks=tracks,
+            items=items,
+            progression_name=str(raw.get("progression_name", "")),
+            section_template=str(raw.get("section_template", "standard")),
+        )
+
+        # Validate and raise on errors
+        errors = song.validate()
+        if errors:
+            raise ValueError(
+                "Song validation failed:\n  - " + "\n  - ".join(errors)
+            )
+
+        return song
+
+    @classmethod
+    def load_file(cls, path: str | Path) -> SongDefinition:
+        """Load and validate from a ``.json`` file.
+
+        Raises:
+            FileNotFoundError: If the file does not exist.
+            ValueError: If validation fails.
+        """
+        p = Path(path)
+        if not p.exists():
+            raise FileNotFoundError(f"Song file not found: {p}")
+        return cls.from_json(p.read_text(encoding="utf-8"))
+
+
+# ---------------------------------------------------------------------------
+# Convenience
+# ---------------------------------------------------------------------------
+
+def load_song_json(path: str | Path) -> SongDefinition:
+    """Load + validate a song definition from a JSON file.
+
+    Raises:
+        ValueError: If validation fails.
+        FileNotFoundError: If file does not exist.
+    """
+    return SongDefinition.load_file(path)
--- a/src/flp_builder/skeleton.py
+++ b/src/flp_builder/skeleton.py
@@ -0,0 +1,382 @@
+"""Channel skeleton loader — extracts sampler channels from reference FLP and patches sample paths."""
+
+import os
+import struct
+from pathlib import Path
+
+# Default channel→sample mapping (index: sample_key)
+# Only Ch10-19 are sampler channels in the reference FLP
+DEFAULT_CHANNEL_MAP = {
+    10: "channel10",
+    11: "channel11",
+    12: "channel12",
+    13: "channel13",
+    14: "channel14",
+    15: "channel15",
+    16: "channel16",
+    17: "channel17",
+    18: "channel18",
+    19: "channel19",
+}
+
+# Channels to replace with empty sampler (non-drum channels from original)
+EMPTY_SAMPLER_CHANNELS = {3, 4, 8, 17, 18, 19}
+
+
+class ChannelSkeletonLoader:
+    """Loads sampler channel configuration from a reference FLP binary.
+
+    Usage:
+        loader = ChannelSkeletonLoader(ref_flp_path, ch11_template_path, samples_dir)
+        channel_bytes = loader.load(sample_map={"kick": "kick.wav", ...})
+    """
+
+    def __init__(self, ref_flp_path: str, ch11_template_path: str, samples_dir: str):
+        self.ref_flp_path = ref_flp_path
+        self.ch11_template_path = ch11_template_path
+        self.samples_dir = samples_dir
+        self._cache: bytes | None = None
+        self._ch11_template: bytes | None = None
+
+    def load(
+        self,
+        sample_map: dict[str, str] | None = None,
+        melodic_map: dict[int, tuple[str, str]] | None = None,
+    ) -> bytes:
+        """Return assembled channel bytes with sample paths patched.
+
+        sample_map: {"kick": "kick.wav", "snare": "snare.wav", ...}
+                    Keys must match DEFAULT_CHANNEL_MAP values.
+                    If None, uses DEFAULT_CHANNEL_MAP with filenames as "<key>.wav"
+        melodic_map: {ch_idx: (samples_dir, wav_name), ...}
+                    Maps melodic channel indices to their sample file.
+                    These channels get sampler clones with real samples instead of empty.
+        Returns raw bytes for all channels (stripped of post-channel data).
+        Caches result — calling load() multiple times returns same bytes.
+        """
+        if self._cache is not None:
+            return self._cache
+
+        # Resolve sample_map: map channel_index → wav filename
+        if sample_map is None:
+            ch_to_wav = {ch: f"{key}.wav" for ch, key in DEFAULT_CHANNEL_MAP.items()}
+        else:
+            ch_to_wav = {ch: sample_map[key] for ch, key in DEFAULT_CHANNEL_MAP.items() if key in sample_map}
+
+        melodic_channels = set(melodic_map.keys()) if melodic_map else set()
+
+        extracted = self._extract_channels()
+        order = extracted["order"]
+        segments: dict[int, bytearray] = extracted["segments"]
+
+        # Replace channels not in drum/melodic maps with empty sampler clones
+        channels_with_samples = set(ch_to_wav.keys()) | melodic_channels
+        for ch_idx in list(segments.keys()):
+            if ch_idx not in channels_with_samples:
+                segments[ch_idx] = bytearray(self._make_empty_sampler(ch_idx))
+
+        # For melodic channels: clone ch11 template and patch with real sample path
+        if melodic_map:
+            for ch_idx, (sample_dir, wav_name) in melodic_map.items():
+                if ch_idx in segments:
+                    segments[ch_idx] = bytearray(
+                        self._make_sampler_with_sample(ch_idx, sample_dir, wav_name)
+                    )
+
+        # Patch sample paths for drum channels (skip melodic — already patched)
+        for ch_idx, wav_name in ch_to_wav.items():
+            if ch_idx in segments and ch_idx not in melodic_channels:
+                segments[ch_idx] = bytearray(self._patch_sample_path(bytes(segments[ch_idx]), wav_name))
+
+        # Assemble in original order
+        buf = bytearray()
+        for ch_idx in order:
+            buf += segments[ch_idx]
+
+        self._cache = bytes(buf)
+        return self._cache
+
+    # ── Event parsing ──────────────────────────────────────────────────────────
+
+    def _read_ev(self, data: bytes, pos: int) -> tuple:
+        """Read one FLP event. Returns (next_pos, start, event_id, value, value_type)."""
+        start = pos
+        ib = data[pos]
+        pos += 1
+
+        if ib < 64:
+            # Byte event: 1 byte ID + 1 byte value
+            return pos + 1, start, ib, data[start + 1], "byte"
+        elif ib < 128:
+            # Word event: 1 byte ID + 2 byte value
+            return pos + 2, start, ib, struct.unpack("<H", data[pos : pos + 2])[0], "word"
+        elif ib < 192:
+            # Dword event: 1 byte ID + 4 byte value
+            return pos + 4, start, ib, struct.unpack("<I", data[pos : pos + 4])[0], "dword"
+        else:
+            # Data/TEXT event: 1 byte ID + varint size + payload
+            sz = 0
+            sh = 0
+            while True:
+                b = data[pos]
+                pos += 1
+                sz |= (b & 0x7F) << sh
+                sh += 7
+                if not (b & 0x80):
+                    break
+            return pos + sz, start, ib, data[pos : pos + sz], "data"
+
+    def _encode_varint(self, n: int) -> bytes:
+        """Encode an integer as a varint (LEB128)."""
+        r = bytearray()
+        while True:
+            b = n & 0x7F
+            n >>= 7
+            if n:
+                b |= 0x80
+            r.append(b)
+            if not n:
+                break
+        return bytes(r)
+
+    # ── Channel extraction ─────────────────────────────────────────────────────
+
+    def _extract_channels(self) -> dict:
+        """Parse reference FLP, extract channel segments, find post-channel boundary.
+
+        Returns:
+            {
+                'order': [ch_idx, ...],       # channels in original order
+                'segments': {idx: bytes},     # raw bytes per channel
+                'last_ch': idx,               # index of last channel
+            }
+        """
+        with open(self.ref_flp_path, "rb") as f:
+            data = f.read()
+
+        # Skip FLhd header (6 bytes) + FLdt chunk header (8 bytes) = 14 bytes,
+        # then the FLhd body. v15 starts scanning at offset 22.
+        pos = 22
+        first_ch = None
+        current_ch = -1
+        ch_ranges: dict[int, list[int]] = {}
+        channels_order: list[int] = []
+
+        # Import here to avoid circular — events is a leaf module
+        from src.flp_builder.events import EventID
+
+        while pos < len(data):
+            np, st, ib, val, vt = self._read_ev(data, pos)
+            if ib == EventID.ChNew:
+                if first_ch is None:
+                    first_ch = st
+                if current_ch >= 0:
+                    ch_ranges[current_ch] = (ch_ranges[current_ch][0], st)
+                current_ch = val
+                ch_ranges[current_ch] = (st, st)
+                channels_order.append(current_ch)
+            pos = np
+
+        if current_ch >= 0:
+            ch_ranges[current_ch] = (ch_ranges[current_ch][0], len(data))
+
+        if not channels_order:
+            raise ValueError("No channels found in reference FLP")
+
+        # Find post-channel boundary in last channel segment
+        # Scan for ID 99 (ArrNew) — everything from there onward is post-channel
+        last_ch = channels_order[-1]
+        last_seg_start = ch_ranges[last_ch][0]
+        last_seg_data = data[last_seg_start:]
+        p = 0
+        post_ch_offset = len(last_seg_data)
+        while p < len(last_seg_data):
+            np, st, ib, val, vt = self._read_ev(last_seg_data, p)
+            if ib == 99:  # ArrNew
+                post_ch_offset = st
+                break
+            p = np
+
+        # Build channel segments, stripping post-channel data from last one
+        segments: dict[int, bytearray] = {}
+        for ch_idx in channels_order:
+            s, e = ch_ranges[ch_idx]
+            if ch_idx == last_ch:
+                segments[ch_idx] = bytearray(data[s : s + post_ch_offset])
+            else:
+                segments[ch_idx] = bytearray(data[s:e])
+
+        return {
+            "order": channels_order,
+            "segments": segments,
+            "last_ch": last_ch,
+        }
+
+    # ── Sampler with real sample ────────────────────────────────────────────────
+
+    # Events to strip when cloning: old sample path, old sample name, cached data
+    STRIP_EVENTS = {0xC4, 0xCB, 0xDA, 0xD7, 0xE4, 0xE5, 0xDD, 0xD1}
+
+    def _make_sampler_with_sample(self, ch_idx: int, samples_dir: str, wav_name: str) -> bytes:
+        """Clone the FL Studio-created sampler template and patch with real sample.
+        
+        Uses output/flstudio_sampler_template.bin which was extracted from a
+        channel that FL Studio itself created (guaranteed correct format).
+        """
+        template_path = os.path.join(
+            os.path.dirname(self.ref_flp_path), "..", "output", "flstudio_sampler_template.bin"
+        )
+        template_path = os.path.normpath(template_path)
+        if not os.path.isfile(template_path):
+            # Fallback: extract from debug_sampler.flp
+            raise FileNotFoundError(f"Sampler template not found: {template_path}")
+
+        with open(template_path, "rb") as f:
+            source = f.read()
+
+        # Rebuild: keep non-cached events, patch ChNew index
+        seg = bytearray()
+        pos = 0
+        while pos < len(source):
+            np, st, ib, val, vt = self._read_ev(source, pos)
+            if ib in self.STRIP_EVENTS:
+                pass  # Remove stale cached data
+            elif ib == 0x40 and vt == "word":
+                seg += struct.pack("<BH", 0x40, ch_idx)
+            else:
+                seg += source[st:np]
+            pos = np
+
+        # Add sample name (0xCB)
+        sample_name = os.path.splitext(wav_name)[0]
+        encoded_name = sample_name.encode("utf-16-le") + b"\x00\x00"
+        seg += bytes([0xCB]) + self._encode_varint(len(encoded_name)) + encoded_name
+
+        # Add sample path (0xC4) — absolute path, no %USERPROFILE%
+        full_path = os.path.join(samples_dir, wav_name)
+        encoded_path = full_path.encode("utf-16-le") + b"\x00\x00"
+        seg += bytes([0xC4]) + self._encode_varint(len(encoded_path)) + encoded_path
+
+        return bytes(seg)
+
+    def _extract_channels_raw(self) -> dict[int, bytes]:
+        """Extract raw channel segments from reference FLP without caching.
+        Returns {ch_idx: bytes}."""
+        with open(self.ref_flp_path, "rb") as f:
+            data = f.read()
+        
+        from src.flp_builder.events import EventID
+        
+        pos = 22
+        current_ch = -1
+        ch_ranges: dict[int, tuple[int, int]] = {}
+        channels_order: list[int] = []
+        
+        while pos < len(data):
+            np, st, ib, val, vt = self._read_ev(data, pos)
+            if ib == EventID.ChNew:
+                if current_ch >= 0:
+                    ch_ranges[current_ch] = (ch_ranges[current_ch][0], st)
+                current_ch = val
+                ch_ranges[current_ch] = (st, st)
+                channels_order.append(current_ch)
+            pos = np
+        
+        if current_ch >= 0:
+            ch_ranges[current_ch] = (ch_ranges[current_ch][0], len(data))
+        
+        # Strip post-channel data from last channel
+        last_ch = channels_order[-1]
+        last_start = ch_ranges[last_ch][0]
+        last_data = data[last_start:]
+        p = 0
+        post_offset = len(last_data)
+        while p < len(last_data):
+            np, st, ib, val, vt = self._read_ev(last_data, p)
+            if ib == 99:
+                post_offset = st
+                break
+            p = np
+        
+        segments: dict[int, bytes] = {}
+        for ch_idx in channels_order:
+            s, e = ch_ranges[ch_idx]
+            if ch_idx == last_ch:
+                segments[ch_idx] = data[s:s + post_offset]
+            else:
+                segments[ch_idx] = data[s:e]
+        
+        return segments
+
+    def _patch_chnew_index(self, seg: bytearray, new_idx: int):
+        """Find and patch the ChNew word event to a new channel index."""
+        pos = 0
+        while pos < len(seg):
+            np, st, ib, val, vt = self._read_ev(bytes(seg), pos)
+            if ib == 64 and vt == "word":  # ChNew
+                struct.pack_into("<H", seg, st + 1, new_idx)
+                return
+            pos = np
+
+    # ── Empty sampler ──────────────────────────────────────────────────────────
+
+    def _make_empty_sampler(self, ch_idx: int) -> bytes:
+        """Create a minimal empty sampler channel with no sample loaded."""
+        extracted = self._extract_channels_raw()
+        source_idx = 10
+        if source_idx not in extracted:
+            for alt in [11, 12, 13, 14, 15, 16, 17, 18, 19]:
+                if alt in extracted:
+                    source_idx = alt
+                    break
+
+        seg = bytearray()
+        source = extracted[source_idx]
+        pos = 0
+        while pos < len(source):
+            np, st, ib, val, vt = self._read_ev(source, pos)
+            if ib in self.STRIP_EVENTS or ib == 0xC4:
+                pass  # Remove cached data AND old sample path
+            elif ib == 0x40 and vt == "word":
+                seg += struct.pack("<BH", 0x40, ch_idx)
+            else:
+                seg += source[st:np]
+            pos = np
+
+        # Add empty sample path
+        seg += bytes([0xC4, 0x02, 0x00, 0x00])
+        return bytes(seg)
+
+    # ── Sample path patching ───────────────────────────────────────────────────
+
+    def _patch_sample_path(self, seg: bytes, wav_name: str) -> bytes:
+        """Replace 0xC4 (ChSamplePath) event with encoded wav_path.
+
+        Uses %USERPROFILE% substitution for portability.
+        Paths are encoded as UTF-16-LE + null terminator (\\x00\\x00).
+        """
+        seg = bytearray(seg)
+
+        # Build full path and substitute USERPROFILE for portability
+        full_path = os.path.join(self.samples_dir, wav_name)
+        userprofile = os.environ.get("USERPROFILE", "")
+        rel_path = full_path.replace(userprofile, "%USERPROFILE%")
+        encoded_path = rel_path.encode("utf-16-le") + b"\x00\x00"
+
+        # Build replacement event: ID byte + varint(size) + encoded path
+        path_ev = bytes([0xC4]) + self._encode_varint(len(encoded_path)) + encoded_path
+
+        # Find all ChSamplePath events
+        local = 0
+        replacements: list[tuple[int, int, bytes]] = []
+        while local < len(seg):
+            nl, es, ib, v, vt = self._read_ev(bytes(seg), local)
+            if ib == 0xC4:
+                replacements.append((es, nl, path_ev))
+            local = nl
+
+        # Apply in reverse to preserve offsets
+        for es, el, nd in reversed(replacements):
+            seg[es:el] = nd
+
+        return bytes(seg)
--- a/src/flp_builder/writer.py
+++ b/src/flp_builder/writer.py
@@ -0,0 +1,145 @@
+from __future__ import annotations
+import struct
+from .events import (
+    EventID,
+    encode_byte_event,
+    encode_word_event,
+    encode_dword_event,
+    encode_text_event,
+    encode_data_event,
+    encode_varint,
+    encode_notes_block,
+)
+from .project import FLPProject, Pattern, Note
+
+
+class FLPWriter:
+    def __init__(self, project: FLPProject):
+        self.project = project
+        self._events: list[bytes] = []
+
+    def build(self) -> bytes:
+        self._events = []
+        self._write_project_header()
+        self._write_patterns()
+        self._write_channels()
+        return self._serialize()
+
+    def _add_event(self, data: bytes):
+        self._events.append(data)
+
+    def _write_project_header(self):
+        p = self.project
+        self._add_event(encode_text_event(EventID.FLVersion, p.fl_version))
+        self._add_event(encode_dword_event(EventID.FLBuild, 1773))
+        self._add_event(encode_byte_event(EventID.Licensed, 1))
+        self._add_event(encode_dword_event(EventID.Tempo, int(p.tempo * 1000)))
+        self._add_event(encode_byte_event(EventID.LoopActive, 1))
+        self._add_event(encode_word_event(EventID.Pitch, 0))
+        self._add_event(encode_byte_event(EventID.PanLaw, 0))
+        if p.title:
+            self._add_event(encode_text_event(EventID.Title, p.title))
+        if p.genre:
+            self._add_event(encode_text_event(EventID.Genre, p.genre))
+        if p.artists:
+            self._add_event(encode_text_event(EventID.Artists, p.artists))
+        if p.comments:
+            self._add_event(encode_text_event(EventID.Comments, p.comments))
+
+    def _write_patterns(self):
+        p = self.project
+        for pat in p.patterns:
+            self._add_event(encode_word_event(EventID.PatNew, pat.index))
+            if pat.name:
+                self._add_event(encode_text_event(EventID.PatName, pat.name))
+            for ch_idx, notes in pat.notes.items():
+                if notes:
+                    notes_data = encode_notes_block(
+                        ch_idx,
+                        [n.to_dict() if isinstance(n, Note) else n for n in notes],
+                        ppq=p.ppq,
+                    )
+                    self._add_event(encode_data_event(EventID.PatNotes, notes_data))
+
+    def _write_channels(self):
+        p = self.project
+        for ch in p.channels:
+            self._add_event(encode_word_event(EventID.ChNew, ch.index))
+            self._add_event(encode_byte_event(EventID.ChType, ch.channel_type))
+
+            if ch.plugin:
+                self._add_event(
+                    encode_text_event(EventID.PluginInternalName, ch.plugin.internal_name)
+                )
+                if ch.plugin.plugin_data:
+                    self._add_event(
+                        encode_data_event(EventID.PluginData, ch.plugin.plugin_data)
+                    )
+                elif ch.plugin.internal_name == "Fruity Wrapper":
+                    self._add_event(
+                        encode_text_event(EventID.PluginName, ch.plugin.display_name)
+                    )
+                    wrapper_data = self._build_wrapper_stub(ch.plugin.display_name)
+                    self._add_event(encode_data_event(EventID.PluginData, wrapper_data))
+                else:
+                    self._add_event(
+                        encode_text_event(EventID.PluginName, ch.plugin.display_name)
+                    )
+                    plugin_data = self._build_native_plugin_stub(ch.plugin.internal_name)
+                    self._add_event(encode_data_event(EventID.PluginData, plugin_data))
+
+                if ch.plugin.color:
+                    self._add_event(
+                        encode_dword_event(EventID.PluginColor, ch.plugin.color)
+                    )
+
+            self._add_event(encode_text_event(EventID.ChName, ch.name))
+            self._add_event(encode_byte_event(EventID.ChIsEnabled, 1 if ch.enabled else 0))
+            self._add_event(encode_byte_event(EventID.ChRoutedTo, ch.mixer_track & 0xFF))
+            self._add_event(encode_word_event(EventID.ChVolWord, ch.volume))
+            self._add_event(encode_byte_event(EventID.ChRootNote, ch.root_note))
+
+    def _build_wrapper_stub(self, plugin_name: str) -> bytes:
+        # Minimal VST wrapper state - FL Studio will initialize the plugin fresh
+        # 10 params with default values
+        stub = struct.pack("<II", 10, 1)  # param_count=10, unknown=1
+        stub += struct.pack("<II", 20, 0)  # version=20, flags=0
+        stub += b"\xff\xff\xff\xff\xff\xff\xff\xff"  # GUID placeholder
+        stub += b"\x0c\x00\x0c\x00\x0c\x00\x0c\x00"  # padding
+        stub += b"\x00" * 16  # zeros
+        return stub
+
+    def _build_native_plugin_stub(self, internal_name: str) -> bytes:
+        # Minimal native plugin state
+        stub = struct.pack("<II", 10, 1)
+        stub += struct.pack("<II", 20, 0)
+        stub += b"\xff\xff\xff\xff\xff\xff\xff\xff"
+        stub += b"\x0c\x00\x0c\x00\x0c\x00\x0c\x00"
+        stub += b"\x00" * 16
+        return stub
+
+    def _serialize(self) -> bytes:
+        num_channels = len(self.project.channels)
+        ppq = self.project.ppq
+
+        header = struct.pack(
+            "<4sIhHH",
+            b"FLhd",
+            6,
+            0,
+            num_channels,
+            ppq,
+        )
+
+        all_events = b"".join(self._events)
+        total_size = len(all_events)
+
+        data_header = b"FLdt" + struct.pack("<I", total_size)
+
+        return header + data_header + all_events
+
+    def write(self, filepath: str):
+        data = self.build()
+        with open(filepath, "wb") as f:
+            f.write(data)
+        return filepath
--- a/src/scanner/init.py
+++ b/src/scanner/init.py
@@ -0,0 +1,194 @@
+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from typing import Optional
+
+
+FL_USER_DIR = Path(os.path.expanduser("~")) / "Documents" / "Image-Line" / "FL Studio"
+PLUGIN_DB_DIR = FL_USER_DIR / "Presets" / "Plugin database" / "Installed"
+PROJECT_ROOT = Path(os.path.expanduser("~")) / "Documents" / "fl_control"
+
+
+def scan_installed_plugins() -> dict:
+    generators = []
+    effects = []
+
+    gen_dir = PLUGIN_DB_DIR / "Generators"
+    if gen_dir.exists():
+        for category_dir in gen_dir.iterdir():
+            if not category_dir.is_dir():
+                continue
+            category = category_dir.name
+            for fst_file in category_dir.glob("*.fst"):
+                name = fst_file.stem
+                generators.append({
+                    "name": name,
+                    "category": category,
+                    "type": "generator",
+                    "format": category,
+                    "fst_path": str(fst_file),
+                })
+
+    fx_dir = PLUGIN_DB_DIR / "Effects"
+    if fx_dir.exists():
+        for category_dir in fx_dir.iterdir():
+            if not category_dir.is_dir():
+                continue
+            category = category_dir.name
+            for fst_file in category_dir.glob("*.fst"):
+                name = fst_file.stem
+                effects.append({
+                    "name": name,
+                    "category": category,
+                    "type": "effect",
+                    "format": category,
+                    "fst_path": str(fst_file),
+                })
+
+    return {
+        "generators": generators,
+        "effects": effects,
+        "generator_names": sorted(set(g["name"] for g in generators)),
+        "effect_names": sorted(set(e["name"] for e in effects)),
+    }
+
+
+def scan_samples(base_dir: Optional[Path] = None) -> dict:
+    if base_dir is None:
+        base_dir = PROJECT_ROOT / "librerias" / "organized_samples"
+
+    categories = {}
+    if not base_dir.exists():
+        return {"categories": {}, "total_files": 0}
+
+    for cat_dir in base_dir.iterdir():
+        if not cat_dir.is_dir():
+            continue
+        files = []
+        for f in cat_dir.rglob("*"):
+            if f.is_file() and f.suffix.lower() in (".wav", ".mp3", ".flac", ".ogg", ".aif", ".aiff"):
+                files.append({
+                    "name": f.stem,
+                    "path": str(f),
+                    "size": f.stat().st_size,
+                    "ext": f.suffix.lower(),
+                })
+        categories[cat_dir.name] = files
+
+    total = sum(len(v) for v in categories.values())
+    return {"categories": categories, "total_files": total}
+
+
+def scan_library_packs(base_dir: Optional[Path] = None) -> dict:
+    if base_dir is None:
+        base_dir = PROJECT_ROOT / "librerias" / "reggaeton"
+
+    packs = []
+    if not base_dir.exists():
+        return {"packs": packs}
+
+    for pack_dir in base_dir.iterdir():
+        if not pack_dir.is_dir():
+            continue
+        pack = {
+            "name": pack_dir.name,
+            "path": str(pack_dir),
+            "contents": {},
+        }
+        for sub in pack_dir.rglob("*"):
+            if sub.is_dir():
+                continue
+            ext = sub.suffix.lower()
+            rel = str(sub.relative_to(pack_dir))
+            content_type = "other"
+            if ext in (".wav", ".mp3", ".flac", ".ogg", ".aif", ".aiff"):
+                content_type = "audio"
+            elif ext == ".mid":
+                content_type = "midi"
+            elif ext in (".fxp", ".fxb", ".fst"):
+                content_type = "preset"
+
+            if content_type not in pack["contents"]:
+                pack["contents"][content_type] = []
+            pack["contents"][content_type].append({
+                "name": sub.stem,
+                "path": str(sub),
+                "ext": ext,
+                "type": content_type,
+            })
+
+        packs.append(pack)
+
+    return {"packs": packs}
+
+
+def scan_vector_store_metadata(vs_dir: Optional[Path] = None) -> dict:
+    if vs_dir is None:
+        vs_dir = PROJECT_ROOT / "librerias" / "vector_store"
+
+    metadata_path = vs_dir / "metadata.json"
+    if not metadata_path.exists():
+        return {"items": [], "total": 0}
+
+    with open(metadata_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+
+    types = {}
+    for item in data:
+        t = item.get("type", "unknown")
+        types[t] = types.get(t, 0) + 1
+
+    return {
+        "total": len(data),
+        "types": types,
+        "items_with_key": sum(1 for i in data if i.get("key")),
+        "items_with_bpm": sum(1 for i in data if i.get("bpm")),
+        "sample_items": data,
+    }
+
+
+def full_inventory() -> dict:
+    plugins = scan_installed_plugins()
+    samples = scan_samples()
+    packs = scan_library_packs()
+    vector_store = scan_vector_store_metadata()
+
+    return {
+        "plugins": plugins,
+        "samples": samples,
+        "packs": packs,
+        "vector_store": vector_store,
+    }
+
+
+if __name__ == "__main__":
+    import sys
+    sys.stdout.reconfigure(encoding="utf-8")
+    inv = full_inventory()
+
+    summary = {
+        "plugins": {
+            "generators": inv["plugins"]["generator_names"],
+            "effects": inv["plugins"]["effect_names"],
+            "total_generators": len(inv["plugins"]["generators"]),
+            "total_effects": len(inv["plugins"]["effects"]),
+        },
+        "samples": {
+            "categories": {k: len(v) for k, v in inv["samples"]["categories"].items()},
+            "total_files": inv["samples"]["total_files"],
+        },
+        "packs": [
+            {
+                "name": p["name"],
+                "audio_count": len(p["contents"].get("audio", [])),
+                "midi_count": len(p["contents"].get("midi", [])),
+            }
+            for p in inv["packs"]
+        ],
+        "vector_store": {
+            "total": inv["vector_store"]["total"],
+            "types": inv["vector_store"]["types"],
+        },
+    }
+    print(json.dumps(summary, indent=2, ensure_ascii=False))
--- a/src/selector/init.py
+++ b/src/selector/init.py
@@ -0,0 +1,330 @@
+"""Sample Selector — queries the forensic sample index by musical criteria.
+
+Loads data/sample_index.json and provides scored, ranked queries:
+  - Role matching (exact)
+  - Key compatibility (exact, relative major/minor, dominant/subdominant)
+  - BPM tolerance (±5%, half/double time)
+  - Character similarity (grouped characters)
+  - Tonal/atonal filtering
+
+Usage:
+    selector = SampleSelector()
+    results = selector.select(role="kick", bpm=95, limit=5)
+    results = selector.select(role="bass", key="Am", bpm=92, character="deep")
+"""
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Optional
+from dataclasses import dataclass, field
+
+
+# ---------------------------------------------------------------------------
+# Key Compatibility
+# ---------------------------------------------------------------------------
+CIRCLE_OF_FIFTHS = ["C", "G", "D", "A", "E", "B", "F#", "C#", "G#", "D#", "A#", "F"]
+
+# Relative major/minor pairs (each minor → its relative major)
+RELATIVE_MAJOR = {
+    "Am": "C", "Em": "G", "Bm": "D", "F#m": "A", "C#m": "E",
+    "G#m": "B", "D#m": "F#", "A#m": "C#", "Fm": "G#", "Cm": "Eb",
+    "Gm": "Bb", "Dm": "F",
+    # Enharmonic equivalents
+    "Bbm": "Db", "Ebm": "Gb", "Abm": "B", "Bbm": "Cb",
+}
+
+# Build reverse: major → relative minor
+RELATIVE_MINOR = {v: k for k, v in RELATIVE_MAJOR.items()}
+
+# Dominant (V) and subdominant (IV) relationships
+DOMINANT = {"C": "G", "G": "D", "D": "A", "A": "E", "E": "B", "B": "F#",
+            "F#": "C#", "C#": "G#", "G#": "D#", "D#": "A#", "A#": "F", "F": "C"}
+SUBDOMINANT = {v: k for k, v in DOMINANT.items()}
+
+# Character similarity groups
+CHARACTER_GROUPS = [
+    {"warm", "soft", "lush"},
+    {"boomy", "deep", "dark"},
+    {"sharp", "crisp", "bright"},
+    {"aggressive", "tight"},
+    {"ethereal", "neutral"},
+    {"impact", "short"},
+    {"hollow", "full"},
+]
+
+# All roles the classifier produces
+KNOWN_ROLES = {
+    "kick", "snare", "hihat", "bass", "lead", "pad", "pluck",
+    "vocal", "arp", "guitar", "keys", "synth", "brass",
+    "perc", "drumloop", "fx", "fill", "oneshot",
+}
+
+# Roles that are typically atonal (key doesn't matter)
+ATONAL_ROLES = {"kick", "snare", "hihat", "perc", "fx", "fill", "oneshot"}
+
+
+def _normalize_key(key: str) -> str:
+    """Normalize key names: Eb→D#, Bb→A#, Db→C#, Gb→F#, Ab→G#."""
+    enharmonics = {"Eb": "D#", "Bb": "A#", "Db": "C#", "Gb": "F#", "Ab": "G#", "Cb": "B"}
+    return enharmonics.get(key, key)
+
+
+def _key_compatibility(query_key: str, sample_key: str) -> float:
+    """Score how compatible a sample's key is with the query key.
+    
+    Returns:
+        1.0 = exact match
+        0.9 = same root, different mode (C ↔ Cm)
+        0.8 = relative major/minor (Am ↔ C)
+        0.7 = dominant/subdominant (C ↔ G or C ↔ F)
+        0.5 = compatible (nearby in circle of fifths)
+        0.0 = atonal or no match
+    """
+    if query_key == "X" or sample_key == "X":
+        return 0.0  # Atonal, no key compatibility
+
+    q = _normalize_key(query_key)
+    s = _normalize_key(sample_key)
+
+    # Exact match
+    if q == s:
+        return 1.0
+
+    # Separate root and mode
+    q_root = q.rstrip("m")
+    q_minor = q.endswith("m")
+    s_root = s.rstrip("m")
+    s_minor = s.endswith("m")
+
+    # Same root, different mode (C ↔ Cm)
+    if q_root == s_root:
+        return 0.9
+
+    # Relative major/minor (Am ↔ C)
+    if q_minor and not s_minor:
+        rel = RELATIVE_MAJOR.get(q, "")
+        if s_root == _normalize_key(rel):
+            return 0.8
+    if not q_minor and s_minor:
+        rel = RELATIVE_MINOR.get(q, "")
+        if s_root == _normalize_key(rel.rstrip("m")):
+            return 0.8
+
+    # Dominant/subdominant
+    q_root_norm = _normalize_key(q_root)
+    s_root_norm = _normalize_key(s_root)
+    if DOMINANT.get(q_root_norm) == s_root_norm or SUBDOMINANT.get(q_root_norm) == s_root_norm:
+        return 0.7
+
+    # Circle of fifths proximity
+    try:
+        q_idx = CIRCLE_OF_FIFTHS.index(q_root_norm)
+        s_idx = CIRCLE_OF_FIFTHS.index(s_root_norm)
+        distance = min(abs(q_idx - s_idx), 12 - abs(q_idx - s_idx))
+        if distance <= 2:
+            return 0.5
+    except ValueError:
+        pass
+
+    return 0.3
+
+
+def _bpm_compatibility(query_bpm: float, sample_bpm: float) -> float:
+    """Score BPM compatibility. Handles half/double time."""
+    if query_bpm <= 0 or sample_bpm <= 0:
+        return 0.5  # Unknown BPM, neutral score
+
+    ratio = sample_bpm / query_bpm
+    tolerance = 0.05  # ±5%
+
+    # Direct match
+    if abs(ratio - 1.0) <= tolerance:
+        return 1.0
+    # Half time
+    if abs(ratio - 0.5) <= tolerance:
+        return 0.8
+    # Double time
+    if abs(ratio - 2.0) <= tolerance:
+        return 0.8
+    # Near match (±10%)
+    if abs(ratio - 1.0) <= 0.10:
+        return 0.6
+
+    return 0.3
+
+
+def _character_compatibility(query_char: Optional[str], sample_char: str) -> float:
+    """Score character compatibility using similarity groups."""
+    if not query_char:
+        return 0.5  # No preference
+    if query_char == sample_char:
+        return 1.0
+
+    # Check if in same group
+    for group in CHARACTER_GROUPS:
+        if query_char in group and sample_char in group:
+            return 0.7
+
+    return 0.3
+
+
+@dataclass
+class SampleMatch:
+    """A scored sample match from the selector."""
+    score: float
+    sample: dict
+    score_breakdown: dict = field(default_factory=dict)
+
+
+class SampleSelector:
+    """Query the forensic sample index with musical criteria."""
+
+    def __init__(self, index_path: Optional[str] = None):
+        if index_path is None:
+            project = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+            index_path = os.path.join(project, "data", "sample_index.json")
+
+        self.index_path = index_path
+        self._samples: list[dict] = []
+        self._by_role: dict[str, list[dict]] = {}
+        self._loaded = False
+
+    def _load(self):
+        """Lazy-load the index."""
+        if self._loaded:
+            return
+        with open(self.index_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        self._samples = [s for s in data.get("samples", []) if "error" not in s]
+
+        # Index by role for fast lookup
+        self._by_role = {}
+        for s in self._samples:
+            role = s.get("role", "unknown")
+            if role not in self._by_role:
+                self._by_role[role] = []
+            self._by_role[role].append(s)
+        self._loaded = True
+
+    def select(
+        self,
+        role: str,
+        key: Optional[str] = None,
+        bpm: Optional[float] = None,
+        character: Optional[str] = None,
+        is_tonal: Optional[bool] = None,
+        limit: int = 10,
+        path_prefix: Optional[str] = None,
+    ) -> list[SampleMatch]:
+        """Select samples matching criteria, ranked by compatibility score.
+        
+        Args:
+            role: Required. Production role (kick, bass, lead, etc.)
+            key: Musical key for compatibility (e.g. "Am", "C")
+            bpm: Target BPM for tempo matching
+            character: Timbre character preference (e.g. "warm", "boomy")
+            is_tonal: Filter by tonal/atonal status
+            limit: Maximum results to return
+            path_prefix: Filter by file path prefix
+            
+        Returns:
+            List of SampleMatch objects sorted by score (descending)
+        """
+        self._load()
+
+        if role not in KNOWN_ROLES:
+            # Try fuzzy match
+            role_lower = role.lower()
+            for known in KNOWN_ROLES:
+                if known in role_lower:
+                    role = known
+                    break
+
+        candidates = self._by_role.get(role, [])
+        if not candidates:
+            return []
+
+        # Score each candidate
+        matches: list[SampleMatch] = []
+        for s in candidates:
+            # Path prefix filter
+            if path_prefix:
+                if path_prefix.lower() not in s.get("original_path", "").lower():
+                    continue
+
+            # Tonal filter
+            if is_tonal is not None:
+                sample_tonal = s.get("musical", {}).get("is_tonal", False)
+                if sample_tonal != is_tonal:
+                    continue
+
+            breakdown = {}
+            total = 0.0
+
+            # Role score (always 1.0 since we filtered by role)
+            breakdown["role"] = 1.0
+            total += 1.0
+
+            # Key compatibility
+            if key and role not in ATONAL_ROLES:
+                sample_key = s.get("musical", {}).get("key", "X")
+                kc = _key_compatibility(key, sample_key)
+                breakdown["key"] = kc
+                total += kc * 2.0  # Weight key heavily
+            else:
+                breakdown["key"] = 0.5
+
+            # BPM compatibility
+            if bpm:
+                sample_bpm = s.get("perceptual", {}).get("tempo", 0)
+                bc = _bpm_compatibility(bpm, sample_bpm)
+                breakdown["bpm"] = bc
+                total += bc * 1.5
+            else:
+                breakdown["bpm"] = 0.5
+
+            # Character compatibility
+            cc = _character_compatibility(character, s.get("character", ""))
+            breakdown["character"] = cc
+            total += cc * 0.5
+
+            # Duration preference: shorter samples get slight bonus for flexibility
+            dur = s.get("signal", {}).get("duration", 0)
+            if dur > 0 and dur < 5.0:
+                total += 0.1  # Short bonus
+            breakdown["duration"] = dur
+
+            matches.append(SampleMatch(
+                score=round(total, 4),
+                sample=s,
+                score_breakdown=breakdown,
+            ))
+
+        # Sort by score descending
+        matches.sort(key=lambda m: m.score, reverse=True)
+        return matches[:limit]
+
+    def select_one(self, role: str, **kwargs) -> Optional[dict]:
+        """Select the single best matching sample."""
+        results = self.select(role=role, limit=1, **kwargs)
+        return results[0].sample if results else None
+
+    def get_roles(self) -> list[str]:
+        """Get all available roles and their counts."""
+        self._load()
+        return sorted(self._by_role.keys())
+
+    def get_stats(self) -> dict[str, int]:
+        """Get count per role."""
+        self._load()
+        return {role: len(samples) for role, samples in sorted(self._by_role.items())}
+
+    def random_sample(self, role: str, **kwargs) -> Optional[dict]:
+        """Select a random sample from the top candidates for variation."""
+        import random
+        results = self.select(role=role, limit=5, **kwargs)
+        if not results:
+            return None
+        return random.choice(results).sample