feat: reggaeton production system with intelligent sample selection and FLP generation

2026-05-02 21:40:18 -03:00
commit 4d941f3f90
62 changed files with 8656 additions and 0 deletions
--- a/src/analyzer/init.py
+++ b/src/analyzer/init.py
@@ -0,0 +1,827 @@
+"""Deep forensic audio sample analyzer.
+
+4-layer analysis pipeline:
+  Layer 1 - Signal:    FFT, spectral centroid, bandwidth, rolloff, flatness, ZCR, RMS, crest factor
+  Layer 2 - Perceptual: MFCC (20), chromagram (12), onset envelope, tempo, LUFS
+  Layer 3 - Musical:   Key estimation (Krumhansl-Schmuckler), F0 via aubio (C-native), tonal/atonal
+  Layer 4 - Timbre:    Mel band stats, spectral contrast, tonnetz
+
+Architecture: ProcessPoolExecutor with 16 workers for TRUE multi-core parallelism.
+              aubio for F0 (C-native, ~1ms per file vs pyin ~2s per file).
+"""
+from __future__ import annotations
+
+import os
+import json
+import hashlib
+from pathlib import Path
+from typing import Optional
+from concurrent.futures import ProcessPoolExecutor, as_completed
+
+import numpy as np
+import librosa
+import soundfile as sf
+import aubio
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+SAMPLE_RATE = 44100
+HOP_LENGTH = 512
+N_FFT = 2048
+N_MFCC = 20
+N_CHROMA = 12
+MAX_WORKERS = 16  # 70% of 24 cores
+
+AUDIO_EXT = {".wav", ".flac", ".mp3", ".aif", ".aiff"}
+
+# Krumhansl-Schmuckler key profiles
+MAJOR_PROFILE = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
+MINOR_PROFILE = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
+NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
+
+# Character classification thresholds
+CHARACTERS = {
+    "boomy":   {"low_ratio_min": 0.6, "centroid_max": 400},
+    "deep":    {"low_ratio_min": 0.5, "centroid_max": 500, "fundamental_max": 150},
+    "sharp":   {"high_ratio_min": 0.4, "centroid_min": 3000, "attack_max": 0.005},
+    "crisp":   {"high_ratio_min": 0.3, "centroid_min": 4000, "duration_max": 0.2},
+    "warm":    {"centroid_min": 300, "centroid_max": 2000, "mid_ratio_min": 0.4},
+    "bright":  {"centroid_min": 3000, "high_ratio_min": 0.3},
+    "dark":    {"centroid_max": 800, "low_ratio_min": 0.4},
+    "ethereal": {"centroid_min": 1500, "centroid_max": 5000, "rms_std_max": 0.03},
+    "short":   {"duration_max": 0.15},
+    "impact":  {"attack_max": 0.005, "peak_rms_ratio_min": 5.0},
+    "full":    {"duration_min": 1.0, "bandwidth_min": 4000},
+    "hollow":  {"mid_ratio_max": 0.2, "low_ratio_min": 0.3, "high_ratio_min": 0.3},
+    "tight":   {"attack_max": 0.003, "duration_max": 0.3, "centroid_min": 1000},
+    "lush":    {"spectral_flatness_min": 0.1, "mid_ratio_min": 0.3, "duration_min": 0.5},
+    "aggressive": {"peak_rms_ratio_min": 4.0, "centroid_min": 2000},
+    "soft":    {"peak_rms_ratio_max": 3.0, "attack_min": 0.01},
+}
+
+
+# ---------------------------------------------------------------------------
+# Layer 1: Signal Analysis
+# ---------------------------------------------------------------------------
+def analyze_signal(y: np.ndarray, sr: int) -> dict:
+    """Layer 1: Time-domain and spectral signal features."""
+    duration = len(y) / sr
+    rms = librosa.feature.rms(y=y, hop_length=HOP_LENGTH)[0]
+    rms_mean = float(np.mean(rms))
+    rms_std = float(np.std(rms))
+    peak = float(np.max(np.abs(y)))
+    crest_factor = peak / (rms_mean + 1e-10)
+    peak_rms_ratio = float(np.max(rms) / (np.mean(rms) + 1e-10))
+    zcr = librosa.feature.zero_crossing_rate(y, hop_length=HOP_LENGTH)[0]
+    zcr_mean = float(np.mean(zcr))
+
+    S = np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH))
+    S_power = S ** 2
+    spectral_centroid = librosa.feature.spectral_centroid(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
+    spectral_bandwidth = librosa.feature.spectral_bandwidth(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
+    spectral_rolloff = librosa.feature.spectral_rolloff(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
+    spectral_flatness = librosa.feature.spectral_flatness(S=S_power)[0]
+
+    freqs = librosa.fft_frequencies(sr=sr, n_fft=N_FFT)
+    low_mask = freqs < 300
+    mid_mask = (freqs >= 300) & (freqs < 3000)
+    high_mask = freqs >= 3000
+    band_energy = np.mean(S_power, axis=1)
+    total_energy = np.sum(band_energy) + 1e-10
+    low_ratio = float(np.sum(band_energy[low_mask]) / total_energy)
+    mid_ratio = float(np.sum(band_energy[mid_mask]) / total_energy)
+    high_ratio = float(np.sum(band_energy[high_mask]) / total_energy)
+
+    rms_peak_idx = int(np.argmax(rms))
+    attack_time = float(rms_peak_idx * HOP_LENGTH / sr)
+
+    return {
+        "duration": round(duration, 4),
+        "rms_mean": round(rms_mean, 6),
+        "rms_std": round(rms_std, 6),
+        "peak_amplitude": round(peak, 6),
+        "crest_factor": round(crest_factor, 2),
+        "peak_rms_ratio": round(peak_rms_ratio, 2),
+        "zcr_mean": round(zcr_mean, 4),
+        "spectral_centroid_mean": round(float(np.mean(spectral_centroid)), 2),
+        "spectral_centroid_std": round(float(np.std(spectral_centroid)), 2),
+        "spectral_centroid_max": round(float(np.max(spectral_centroid)), 2),
+        "spectral_bandwidth_mean": round(float(np.mean(spectral_bandwidth)), 2),
+        "spectral_rolloff_mean": round(float(np.mean(spectral_rolloff)), 2),
+        "spectral_flatness_mean": round(float(np.mean(spectral_flatness)), 6),
+        "low_energy_ratio": round(low_ratio, 4),
+        "mid_energy_ratio": round(mid_ratio, 4),
+        "high_energy_ratio": round(high_ratio, 4),
+        "attack_time": round(attack_time, 4),
+    }
+
+
+# ---------------------------------------------------------------------------
+# Layer 2: Perceptual Analysis
+# ---------------------------------------------------------------------------
+def analyze_perceptual(y: np.ndarray, sr: int) -> dict:
+    """Layer 2: MFCC, chromagram, onset, tempo."""
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC, hop_length=HOP_LENGTH)
+    mfcc_means = [round(float(np.mean(mfcc[i])), 4) for i in range(N_MFCC)]
+    mfcc_stds = [round(float(np.std(mfcc[i])), 4) for i in range(N_MFCC)]
+
+    chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=HOP_LENGTH)
+    chroma_mean = np.mean(chroma, axis=1)
+
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=HOP_LENGTH)
+    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, hop_length=HOP_LENGTH)
+    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=HOP_LENGTH)
+    onset_count = len(onset_times)
+
+    tempo = 0.0
+    if len(onset_env) > 0:
+        tempo_vals = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=HOP_LENGTH)
+        if len(tempo_vals) > 0:
+            tempo = float(tempo_vals[0])
+
+    lufs = _compute_lufs(y, sr)
+
+    return {
+        "mfcc_means": mfcc_means,
+        "mfcc_stds": mfcc_stds,
+        "chroma_mean": [round(float(v), 4) for v in chroma_mean],
+        "onset_count": onset_count,
+        "onset_density": round(onset_count / max(len(y) / sr, 0.01), 2),
+        "tempo": round(tempo, 2),
+        "lufs": round(lufs, 2),
+    }
+
+
+def _compute_lufs(y: np.ndarray, sr: int) -> float:
+    """Simplified LUFS (integrated loudness) approximation."""
+    try:
+        from scipy.signal import butter, sosfilt
+        sos_hp = butter(2, 60, btype='high', fs=sr, output='sos')
+        y_filtered = sosfilt(sos_hp, y)
+        sos_hs = butter(1, 1500, btype='high', fs=sr, output='sos')
+        y_filtered = sosfilt(sos_hs, y_filtered)
+
+        block_size = int(0.4 * sr)
+        hop = int(0.1 * sr)
+        if len(y_filtered) < block_size:
+            block_size = len(y_filtered)
+            hop = max(1, block_size // 4)
+
+        blocks = []
+        for i in range(0, len(y_filtered) - block_size + 1, hop):
+            block = y_filtered[i:i + block_size]
+            rms = np.sqrt(np.mean(block ** 2))
+            if rms > 1e-10:
+                blocks.append(rms)
+
+        if not blocks:
+            return -70.0
+
+        mean_rms = np.mean(blocks)
+        lufs = -0.691 + 10 * np.log10(mean_rms ** 2 + 1e-20)
+        return max(lufs, -70.0)
+    except Exception:
+        return -70.0
+
+
+# ---------------------------------------------------------------------------
+# F0 Detection via aubio (C-native, ~1ms per file)
+# ---------------------------------------------------------------------------
+def _fast_f0(y: np.ndarray, sr: int) -> float:
+    """Estimate fundamental frequency using aubio's YIN implementation.
+    This is C-native code running at ~1ms per file, vs librosa.pyin at ~2s."""
+    try:
+        # aubio pitch detector
+        win_s = N_FFT
+        hop_s = HOP_LENGTH
+        pitch_o = aubio.pitch("yin", win_s, hop_s, sr)
+        pitch_o.set_unit("Hz")
+        pitch_o.set_tolerance(0.8)  # confidence threshold
+
+        # Process in chunks
+        pitches = []
+        for i in range(0, len(y) - win_s + 1, hop_s):
+            chunk = y[i:i + hop_s].astype(np.float32)
+            if len(chunk) < hop_s:
+                break
+            freq = pitch_o(chunk)
+            if freq[0] > 0:
+                pitches.append(float(freq[0]))
+
+        if pitches:
+            return float(np.median(pitches))
+        return 0.0
+    except Exception:
+        return 0.0
+
+
+# ---------------------------------------------------------------------------
+# Layer 3: Musical Analysis
+# ---------------------------------------------------------------------------
+def analyze_musical(signal_features: dict, perceptual_features: dict, y: np.ndarray, sr: int) -> dict:
+    """Layer 3: Key estimation, tonal/atonal, F0 via aubio, one-shot vs loop."""
+    chroma_mean = np.array(perceptual_features["chroma_mean"])
+
+    key_name, key_correlation, mode = _estimate_key(chroma_mean)
+
+    chroma_max = float(np.max(chroma_mean))
+    chroma_std = float(np.std(chroma_mean))
+    is_tonal = chroma_std > 0.05 and chroma_max > 0.15
+
+    duration = signal_features["duration"]
+    onset_count = perceptual_features["onset_count"]
+    is_oneshot = duration < 2.0 and onset_count <= 2
+    is_loop = duration > 1.5 and onset_count >= 4
+
+    f0 = 0.0
+    f0_note = "X"
+    if is_tonal:
+        f0 = _fast_f0(y, sr)
+        if f0 > 0:
+            midi_note = int(round(12 * np.log2(f0 / 440.0) + 69))
+            f0_note = _midi_to_note_name(midi_note)
+
+    return {
+        "key": key_name,
+        "key_correlation": round(key_correlation, 4),
+        "mode": mode,
+        "is_tonal": is_tonal,
+        "is_oneshot": is_oneshot,
+        "is_loop": is_loop,
+        "fundamental_freq": round(f0, 2),
+        "fundamental_note": f0_note,
+    }
+
+
+def _estimate_key(chroma_profile: np.ndarray) -> tuple:
+    """Krumhansl-Schmuckler key-finding algorithm."""
+    if np.max(chroma_profile) < 0.01:
+        return "X", 0.0, "atonal"
+
+    chroma_norm = chroma_profile / (np.sum(chroma_profile) + 1e-10)
+    best_key = "C"
+    best_corr = -1.0
+    best_mode = "atonal"
+
+    for i in range(12):
+        rotated = np.roll(chroma_norm, -i)
+        major_corr = float(np.corrcoef(rotated, MAJOR_PROFILE)[0, 1])
+        if np.isnan(major_corr):
+            major_corr = 0.0
+        minor_corr = float(np.corrcoef(rotated, MINOR_PROFILE)[0, 1])
+        if np.isnan(minor_corr):
+            minor_corr = 0.0
+
+        if major_corr > best_corr:
+            best_corr = major_corr
+            best_key = NOTE_NAMES[i]
+            best_mode = "major"
+        if minor_corr > best_corr:
+            best_corr = minor_corr
+            best_key = NOTE_NAMES[i]
+            best_mode = "minor"
+
+    if best_corr < 0.3:
+        return "X", best_corr, "atonal"
+
+    if best_mode == "minor":
+        return f"{best_key}m", best_corr, "minor"
+    return best_key, best_corr, "major"
+
+
+def _midi_to_note_name(midi: int) -> str:
+    if midi < 0 or midi > 127:
+        return "X"
+    note = NOTE_NAMES[midi % 12]
+    octave = midi // 12 - 1
+    return f"{note}{octave}"
+
+
+# ---------------------------------------------------------------------------
+# Layer 4: Timbre Fingerprint
+# ---------------------------------------------------------------------------
+def analyze_timbre(y: np.ndarray, sr: int) -> dict:
+    """Layer 4: Mel spectrogram statistics for timbre fingerprinting."""
+    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, hop_length=HOP_LENGTH)
+    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
+
+    n_bands = 8
+    band_size = 128 // n_bands
+    band_stats = []
+    for b in range(n_bands):
+        start = b * band_size
+        end = start + band_size
+        band = mel_spec_db[start:end, :]
+        band_stats.append({
+            "mean": round(float(np.mean(band)), 2),
+            "std": round(float(np.std(band)), 2),
+            "max": round(float(np.max(band)), 2),
+        })
+
+    contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=HOP_LENGTH)
+    contrast_mean = [round(float(np.mean(contrast[i])), 4) for i in range(min(7, contrast.shape[0]))]
+
+    try:
+        tonnetz = librosa.feature.tonnetz(y=y, sr=sr, hop_length=HOP_LENGTH)
+        tonnetz_mean = [round(float(np.mean(tonnetz[i])), 4) for i in range(min(6, tonnetz.shape[0]))]
+    except Exception:
+        tonnetz_mean = [0.0] * 6
+
+    return {
+        "mel_band_stats": band_stats,
+        "spectral_contrast": contrast_mean,
+        "tonnetz": tonnetz_mean,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Classification — 3-layer priority: filename → folder → spectral heuristic
+# ---------------------------------------------------------------------------
+def classify_role(signal: dict, perceptual: dict, musical: dict, folder_hint: str = "") -> str:
+    """Classify sample into a production role.
+    
+    Priority order:
+      1. FILENAME keywords (most reliable — producers name their files correctly)
+      2. FOLDER structure (less reliable — "SentimientoLatino" has everything mixed)
+      3. SPECTRAL heuristics (fallback for unnamed/unknown samples)
+    """
+    filename = folder_hint.lower()  # contains parent + current folder names
+    
+    # ====================================================================
+    # LAYER 1: Filename keyword extraction (HIGHEST PRIORITY)
+    # Matches specific patterns like "Lead", "Pad", "Bass" in filenames
+    # ====================================================================
+    
+    # Ordered by specificity — more specific keywords first
+    filename_map = [
+        # (keyword_pattern, role, require_not) — avoid false positives
+        (["reese"], "bass", []),
+        (["808"], "bass", []),
+        (["kick"], "kick", ["kickdown", "kick drum"]),
+        (["snare"], "snare", []),
+        (["hi-hat", "hihat", "hats", "hat "], "hihat", []),
+        (["shaker"], "perc", []),
+        (["tambourine", "tambor"], "perc", []),
+        (["conga", "bongo", "rim"], "perc", []),
+        (["timbal"], "perc", []),
+        (["vocal chop", "v.chop", "vox chop"], "vocal", []),
+        (["vocal", "vox", "vocals"], "vocal", []),
+        (["pluck"], "pluck", []),
+        (["bell"], "pluck", []),
+        (["stab"], "oneshot", []),
+        (["lead"], "lead", []),
+        (["arp", "arpeggio"], "arp", []),
+        (["pad reverse"], "pad", []),
+        (["pad", "pads"], "pad", []),
+        (["chord", "chords"], "pad", []),
+        (["rhodes", "piano", "keys", "key "], "keys", []),
+        (["guitar"], "guitar", []),
+        (["string"], "pad", []),
+        (["brass"], "brass", []),
+        (["synth"], "synth", []),
+        (["texture"], "pad", []),
+        (["riser", "sweep", "impact", "explosion"], "fx", []),
+        (["loop"], "drumloop", ["vocal loop", "melody loop"]),
+        (["fill"], "fill", []),
+        (["drum"], "drumloop", []),
+    ]
+    
+    for keywords, role, excludes in filename_map:
+        for kw in keywords:
+            if kw in filename:
+                # Check exclusions
+                excluded = False
+                for ex in excludes:
+                    if ex in filename:
+                        excluded = True
+                        break
+                if not excluded:
+                    return role
+    
+    # ====================================================================
+    # LAYER 2: Midilatino / SS_RNBL structured filename parsing
+    # These packs have naming conventions we can extract roles from
+    # ====================================================================
+    
+    # Midilatino pattern: "Midilatino_Song_Key_BPM_StemType.wav"
+    # e.g. "Midilatino_Holanda_F_Min_108BPM_Lead.wav"
+    # e.g. "Midilatino_Cookie_E_Min_89BPM_Pluck.wav"
+    parts = filename.replace(".wav", "").replace(".flac", "").replace(".mp3", "").split("_")
+    if len(parts) >= 2:
+        # Check last part for stem type
+        last_parts = " ".join(parts[-2:]).lower()
+        stem_map = {
+            "drums": "drumloop", "drum": "drumloop",
+            "bass": "bass", "reese": "bass",
+            "lead": "lead", "pluck": "pluck", "pluck fx": "fx",
+            "pad": "pad", "pad reverse": "pad",
+            "arp": "arp", "vocal": "vocal", "vocals": "vocal",
+            "vocal chop": "vocal", "vox": "vocal",
+            "guitar": "guitar", "rhodes": "keys", "rhode": "keys",
+            "piano": "keys", "keys": "keys",
+            "synth": "synth", "texture": "pad", "texture 2": "pad",
+            "bell chords": "pad", "accent": "oneshot", "accent keys": "keys",
+            "harp": "pluck", "shaker": "perc",
+        }
+        for stem_kw, stem_role in stem_map.items():
+            if stem_kw in last_parts:
+                return stem_role
+    
+    # SS_RNBL pattern: "SS_RNBL_Song_Stem_Type.wav"
+    # e.g. "SS_RNBL_Amor_One_Shot_Bass_C_.wav"
+    if "ss_rnbl" in filename or "ss rnbl" in filename:
+        ss_map = {
+            "kick": "kick", "snare": "snare", "hats": "hihat", "hat": "hihat",
+            "perc": "perc", "bass": "bass", "lead": "lead", "pad": "pad",
+            "fx": "fx", "top": "drumloop", "drum": "drumloop",
+            "v.chop": "vocal", "phrases": "vocal",
+            "one shot": "oneshot", "music": "drumloop",
+            "double": "drumloop", "add": "drumloop",
+            "gustas": "drumloop",  # "Gustas" are full loop sections
+        }
+        for kw, role in ss_map.items():
+            if kw in filename:
+                return role
+    
+    # ====================================================================
+    # LAYER 3: Folder-based hints (MEDIUM PRIORITY)
+    # Only for folders that are explicitly categorized
+    # ====================================================================
+    folder_map = {
+        "kick": "kick", "kicks": "kick", "8. kicks": "kick",
+        "snare": "snare", "snares": "snare", "9. snare": "snare",
+        "hi-hat": "hihat", "hihat": "hihat", "hi-hats": "hihat",
+        "bass": "bass",
+        "perc": "perc", "percs": "perc", "10. percs": "perc",
+        "fx": "fx", "5. fx": "fx",
+        "drum loops": "drumloop", "4. drum loops": "drumloop", "drumloops": "drumloop",
+        "vocal": "vocal", "vocals": "vocal", "11. vocals": "vocal",
+        "fill": "fill", "fills": "fill", "7. fill": "fill",
+        "3. one shots": "oneshot",
+    }
+    for key, role in folder_map.items():
+        if key in folder_hint.lower():
+            return role
+
+    # ====================================================================
+    # LAYER 4: Spectral heuristics (LOWEST PRIORITY — fallback only)
+    # Only used when filename and folder give no signal
+    # ====================================================================
+    centroid = signal["spectral_centroid_mean"]
+    low_r = signal["low_energy_ratio"]
+    high_r = signal["high_energy_ratio"]
+    dur = signal["duration"]
+    onsets = perceptual["onset_count"]
+    is_tonal = musical["is_tonal"]
+    attack = signal["attack_time"]
+    rms_std = signal["rms_std"]
+
+    # Percussive one-shots
+    if centroid < 600 and low_r > 0.5 and dur < 1.0 and attack < 0.01 and onsets <= 3:
+        return "kick"
+    if centroid > 5000 and high_r > 0.4 and dur < 0.3:
+        return "hihat"
+    if 1000 < centroid < 5000 and attack < 0.005 and onsets <= 2:
+        return "snare"
+    if dur < 0.5 and onsets <= 2 and 500 < centroid < 5000:
+        return "perc"
+
+    # Tonal classification (for long tonal samples that didn't match filename)
+    if is_tonal:
+        # Sub-bass / bass
+        if centroid < 200 and low_r > 0.7:
+            return "bass"
+        # Pad: sustained, low variance, long
+        if rms_std < 0.05 and dur > 1.0 and centroid < 4000:
+            return "pad"
+        # Pluck: short, tonal
+        if dur < 0.8 and onsets <= 3:
+            return "pluck"
+        # Lead: prominent, mid-high frequency
+        if 500 < centroid < 6000:
+            return "lead"
+        # Keys: mid frequency, moderate dynamics
+        if 200 < centroid < 2000 and rms_std < 0.1:
+            return "keys"
+        # Generic tonal loop
+        if dur > 2.0 and onsets > 4:
+            return "drumloop"
+        return "synth"
+
+    # Atonal loops
+    if dur > 2.0 and onsets >= 4:
+        return "drumloop"
+
+    # Short atonal
+    if dur < 2.0 and onsets <= 1:
+        return "oneshot"
+
+    return "fx"
+
+
+def classify_character(signal: dict, perceptual: dict, musical: dict) -> str:
+    """Classify the sonic character."""
+    centroid = signal["spectral_centroid_mean"]
+    low_r = signal["low_energy_ratio"]
+    high_r = signal["high_energy_ratio"]
+    mid_r = signal["mid_energy_ratio"]
+    dur = signal["duration"]
+    attack = signal["attack_time"]
+    peak_rms = signal["peak_rms_ratio"]
+    flatness = signal["spectral_flatness_mean"]
+    rms_std = signal["rms_std"]
+
+    scores = {}
+    if low_r >= 0.6 and centroid <= 400:
+        scores["boomy"] = low_r * 2
+    if low_r >= 0.5 and centroid <= 500:
+        scores["deep"] = low_r * 1.5
+    if high_r >= 0.4 and centroid >= 3000 and attack <= 0.005:
+        scores["sharp"] = high_r * 2
+    if high_r >= 0.3 and centroid >= 4000 and dur <= 0.2:
+        scores["crisp"] = high_r * 1.5
+    if 300 <= centroid <= 2000 and mid_r >= 0.4:
+        scores["warm"] = mid_r * 1.5
+    if centroid >= 3000 and high_r >= 0.3:
+        scores["bright"] = high_r * 1.5
+    if centroid <= 800 and low_r >= 0.4:
+        scores["dark"] = low_r * 1.5
+    if 1500 <= centroid <= 5000 and rms_std <= 0.03:
+        scores["ethereal"] = 1.0
+    if dur <= 0.15:
+        scores["short"] = 1.0
+    if attack <= 0.005 and peak_rms >= 5.0:
+        scores["impact"] = peak_rms / 5.0
+    if dur >= 1.0 and signal["spectral_bandwidth_mean"] >= 4000:
+        scores["full"] = 1.0
+    if mid_r <= 0.2 and low_r >= 0.3 and high_r >= 0.3:
+        scores["hollow"] = 1.0
+    if attack <= 0.003 and dur <= 0.3 and centroid >= 1000:
+        scores["tight"] = 1.0
+    if flatness >= 0.1 and mid_r >= 0.3 and dur >= 0.5:
+        scores["lush"] = flatness * 5
+    if peak_rms >= 4.0 and centroid >= 2000:
+        scores["aggressive"] = peak_rms / 4.0
+    if peak_rms <= 3.0 and attack >= 0.01:
+        scores["soft"] = 1.0
+
+    return max(scores, key=scores.get) if scores else "neutral"
+
+
+# ---------------------------------------------------------------------------
+# Full Analysis Pipeline (single file)
+# ---------------------------------------------------------------------------
+def analyze_file(filepath: str) -> Optional[dict]:
+    """Full 4-layer analysis of a single audio file.
+    This function is picklable and runs in separate processes."""
+    try:
+        y, sr = librosa.load(filepath, sr=SAMPLE_RATE, mono=True, duration=30.0)
+        if len(y) < 512:
+            return None
+
+        peak = np.max(np.abs(y))
+        if peak > 1e-6:
+            y = y / peak
+
+        path = Path(filepath)
+        # Pass BOTH the full filename and folder structure to classifier
+        classify_hint = f"{path.parent.parent.name} {path.parent.name} {path.stem}"
+
+        signal = analyze_signal(y, sr)
+        perceptual = analyze_perceptual(y, sr)
+        musical = analyze_musical(signal, perceptual, y, sr)
+        timbre = analyze_timbre(y, sr)
+
+        role = classify_role(signal, perceptual, musical, classify_hint)
+        character = classify_character(signal, perceptual, musical)
+        new_name = _generate_name(role, musical, perceptual, character, filepath)
+        file_hash = _hash_file(filepath)
+
+        return {
+            "original_path": filepath,
+            "original_name": path.name,
+            "new_name": new_name,
+            "file_hash": file_hash,
+            "file_size": os.path.getsize(filepath),
+            "role": role,
+            "character": character,
+            "musical": musical,
+            "signal": signal,
+            "perceptual": {
+                "mfcc_means": perceptual["mfcc_means"],
+                "mfcc_stds": perceptual["mfcc_stds"],
+                "onset_count": perceptual["onset_count"],
+                "onset_density": perceptual["onset_density"],
+                "tempo": perceptual["tempo"],
+                "lufs": perceptual["lufs"],
+            },
+            "timbre": timbre,
+        }
+    except Exception as e:
+        return {"original_path": filepath, "error": str(e)}
+
+
+def _generate_name(role: str, musical: dict, perceptual: dict, character: str, filepath: str) -> str:
+    key = musical["fundamental_note"] if musical["is_tonal"] else "X"
+    if key == "X" and musical["key"] != "X":
+        key = musical["key"]
+    bpm = int(perceptual["tempo"]) if perceptual["tempo"] > 0 else 0
+    short_id = hashlib.md5(filepath.encode()).hexdigest()[:6]
+    ext = Path(filepath).suffix
+    return f"{role}_{key}_{bpm:03d}_{character}_{short_id}{ext}"
+
+
+def _hash_file(filepath: str) -> str:
+    h = hashlib.md5()
+    size = os.path.getsize(filepath)
+    with open(filepath, "rb") as f:
+        h.update(f.read(65536))
+        if size > 131072:
+            f.seek(size - 65536)
+            h.update(f.read(65536))
+    return h.hexdigest()
+
+
+# ---------------------------------------------------------------------------
+# File Collection
+# ---------------------------------------------------------------------------
+def collect_audio_files(*directories: str) -> list[str]:
+    files = []
+    for d in directories:
+        base = Path(d)
+        if not base.exists():
+            continue
+        for f in base.rglob("*"):
+            if f.is_file() and f.suffix.lower() in AUDIO_EXT:
+                files.append(str(f))
+    return sorted(files)
+
+
+# ---------------------------------------------------------------------------
+# Batch Analysis (TRUE multiprocessing)
+# ---------------------------------------------------------------------------
+def batch_analyze(files: list[str], workers: int = MAX_WORKERS, checkpoint_path: Optional[str] = None) -> list[dict]:
+    """Analyze all files using ProcessPoolExecutor for real multi-core parallelism.
+    Each process runs independently — no GIL contention, no shared memory."""
+    results = []
+    errors = []
+    done = 0
+    total = len(files)
+
+    # Resume from checkpoint
+    completed_paths = set()
+    if checkpoint_path and os.path.exists(checkpoint_path):
+        with open(checkpoint_path, "r", encoding="utf-8") as f:
+            for line in f:
+                try:
+                    entry = json.loads(line.strip())
+                    completed_paths.add(entry["original_path"])
+                    results.append(entry)
+                except (json.JSONDecodeError, KeyError):
+                    pass
+        done = len(results)
+        print(f"Resumed from checkpoint: {done}/{total}")
+
+    remaining = [f for f in files if f not in completed_paths]
+    if not remaining:
+        print("All files already analyzed.")
+        return results
+
+    print(f"Analyzing {len(remaining)} files with {workers} PROCESSES (true parallel)...")
+
+    with ProcessPoolExecutor(max_workers=workers) as executor:
+        futures = {executor.submit(analyze_file, f): f for f in remaining}
+
+        for future in as_completed(futures):
+            filepath = futures[future]
+            done += 1
+            try:
+                result = future.result()
+                if result is None:
+                    errors.append(filepath)
+                    continue
+                if "error" in result:
+                    errors.append(f"{filepath}: {result['error']}")
+                    continue
+
+                results.append(result)
+
+                if checkpoint_path and done % 50 == 0:
+                    _save_checkpoint(results, checkpoint_path)
+
+                if done % 25 == 0 or done == total:
+                    print(f"  [{done}/{total}] {result.get('new_name', '?')}")
+
+            except Exception as e:
+                errors.append(f"{filepath}: {e}")
+
+    if errors:
+        print(f"\nErrors ({len(errors)}):")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... and {len(errors) - 10} more")
+
+    return results
+
+
+def _save_checkpoint(results: list[dict], path: str):
+    with open(path, "w", encoding="utf-8") as f:
+        for r in results:
+            f.write(json.dumps(r, ensure_ascii=False) + "\n")
+
+
+def save_index(results: list[dict], output_path: str):
+    roles = {}
+    keys = {}
+    characters = {}
+    for r in results:
+        if "error" in r:
+            continue
+        role = r.get("role", "unknown")
+        roles[role] = roles.get(role, 0) + 1
+        key = r.get("musical", {}).get("key", "X")
+        keys[key] = keys.get(key, 0) + 1
+        char = r.get("character", "unknown")
+        characters[char] = characters.get(char, 0) + 1
+
+    index = {
+        "metadata": {
+            "total_samples": len(results),
+            "errors": sum(1 for r in results if "error" in r),
+            "roles": roles,
+            "keys": keys,
+            "characters": characters,
+        },
+        "samples": results,
+    }
+
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(index, f, ensure_ascii=False, indent=2)
+
+    print(f"Index saved to {output_path}")
+    print(f"  Total: {len(results)} | Roles: {roles} | Keys: {len(keys)} | Characters: {len(characters)}")
+
+
+# ---------------------------------------------------------------------------
+# Rename Engine
+# ---------------------------------------------------------------------------
+def plan_renames(results: list[dict], output_dir: str) -> list[dict]:
+    out = Path(output_dir)
+    renames = []
+    seen_names = set()
+
+    for r in results:
+        if "error" in r or "new_name" not in r:
+            continue
+        old_path = Path(r["original_path"])
+        role = r["role"]
+        new_name = r["new_name"]
+
+        if new_name in seen_names:
+            stem = Path(new_name).stem
+            ext = Path(new_name).suffix
+            counter = 2
+            candidate = f"{stem}_{counter}{ext}"
+            while candidate in seen_names:
+                counter += 1
+                candidate = f"{stem}_{counter}{ext}"
+            new_name = candidate
+
+        seen_names.add(new_name)
+        new_path = out / role / new_name
+        renames.append({
+            "old_path": str(old_path),
+            "new_path": str(new_path),
+            "new_name": new_name,
+            "role": role,
+            "original_name": r["original_name"],
+        })
+
+    return renames
+
+
+def execute_renames(renames: list[dict], dry_run: bool = True) -> dict:
+    stats = {"planned": len(renames), "executed": 0, "skipped": 0, "errors": []}
+
+    for r in renames:
+        old = Path(r["old_path"])
+        new = Path(r["new_path"])
+
+        if not old.exists():
+            stats["skipped"] += 1
+            continue
+        if dry_run:
+            stats["skipped"] += 1
+            continue
+
+        new.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            import shutil
+            shutil.copy2(str(old), str(new))
+            stats["executed"] += 1
+        except Exception as e:
+            stats["errors"].append(f"{old.name} -> {new.name}: {e}")
+
+    return stats
--- a/src/analyzer/forensic_classify.py
+++ b/src/analyzer/forensic_classify.py
@@ -0,0 +1,72 @@
+"""Forensic analysis of misclassified samples."""
+import json, os
+
+PROJECT = r"C:\Users\Administrator\Documents\fl_control"
+with open(os.path.join(PROJECT, "data", "sample_index.json"), "r", encoding="utf-8") as f:
+    d = json.load(f)
+
+samples = d["samples"]
+
+# --- DRUMLOOPS ---
+drumloops = [s for s in samples if s.get("role") == "drumloop"]
+print(f"DRUMLOOPS ({len(drumloops)} total)")
+print(f"{'Orig filename':<55s} {'Dur':>5s} {'Onset':>5s} {'Centr':>7s} {'Low':>5s} {'Mid':>5s} {'High':>5s} {'Tonal':>5s} {'Key':>5s} {'Char':>10s}")
+print("-" * 120)
+for s in drumloops[:50]:
+    orig = s.get("original_name", "?")[:54]
+    dur = s["signal"]["duration"]
+    onc = s["perceptual"]["onset_count"]
+    cent = s["signal"]["spectral_centroid_mean"]
+    low = s["signal"]["low_energy_ratio"]
+    mid = s["signal"]["mid_energy_ratio"]
+    high = s["signal"]["high_energy_ratio"]
+    ton = s["musical"]["is_tonal"]
+    key = s["musical"]["key"]
+    char = s["character"]
+    print(f"{orig:<55s} {dur:5.1f} {onc:5d} {cent:7.0f} {low:5.2f} {mid:5.2f} {high:5.2f} {str(ton):>5s} {key:>5s} {char:>10s}")
+
+# --- ONESHOTS ---
+oneshots = [s for s in samples if s.get("role") == "oneshot"]
+print(f"\nONESHOTS ({len(oneshots)} total)")
+print(f"{'Orig filename':<55s} {'Dur':>5s} {'Onset':>5s} {'Centr':>7s} {'Low':>5s} {'Mid':>5s} {'High':>5s} {'Tonal':>5s} {'Key':>5s} {'Char':>10s}")
+print("-" * 120)
+for s in oneshots[:40]:
+    orig = s.get("original_name", "?")[:54]
+    dur = s["signal"]["duration"]
+    onc = s["perceptual"]["onset_count"]
+    cent = s["signal"]["spectral_centroid_mean"]
+    low = s["signal"]["low_energy_ratio"]
+    mid = s["signal"]["mid_energy_ratio"]
+    high = s["signal"]["high_energy_ratio"]
+    ton = s["musical"]["is_tonal"]
+    key = s["musical"]["key"]
+    char = s["character"]
+    print(f"{orig:<55s} {dur:5.1f} {onc:5d} {cent:7.0f} {low:5.2f} {mid:5.2f} {high:5.2f} {str(ton):>5s} {key:>5s} {char:>10s}")
+
+# --- Summary: folder source of drumloops ---
+print(f"\n\nDRUMLOOP ORIGINS:")
+from collections import Counter
+origins = Counter()
+for s in drumloops:
+    path = s.get("original_path", "")
+    parts = path.replace("\\", "/").split("/")
+    # Find the category folder
+    for i, p in enumerate(parts):
+        if "reggaeton" in p.lower() and i+1 < len(parts):
+            origins[parts[i+1]] += 1
+            break
+for k, v in origins.most_common():
+    print(f"  {k:40s} {v:4d}")
+
+# --- Summary: folder source of oneshots ---
+print(f"\nONESHOT ORIGINS:")
+origins2 = Counter()
+for s in oneshots:
+    path = s.get("original_path", "")
+    parts = path.replace("\\", "/").split("/")
+    for i, p in enumerate(parts):
+        if "reggaeton" in p.lower() and i+1 < len(parts):
+            origins2[parts[i+1]] += 1
+            break
+for k, v in origins2.most_common():
+    print(f"  {k:40s} {v:4d}")
--- a/src/analyzer/forensic_filenames.py
+++ b/src/analyzer/forensic_filenames.py
@@ -0,0 +1,72 @@
+"""Forensic analysis of misclassified samples."""
+import json, os, re
+from collections import Counter
+
+PROJECT = r"C:\Users\Administrator\Documents\fl_control"
+with open(os.path.join(PROJECT, "data", "sample_index.json"), "r", encoding="utf-8") as f:
+    d = json.load(f)
+
+samples = d["samples"]
+
+# --- Analyze filename patterns in misclassified ---
+print("=" * 70)
+print("  PATRONES DE NOMBRE EN 'DRUMLOOPS'")
+print("=" * 70)
+
+# Extract Midilatino stems
+ml_stems = Counter()
+for s in samples:
+    if s.get("role") != "drumloop":
+        continue
+    name = s.get("original_name", "")
+    # Midilatino pattern: Midilatino_Name_Key_Min_BPM_Stem.wav
+    if "Midilatino" in name or "midilatino" in name:
+        # Extract the stem type (last part before .wav)
+        parts = name.replace(".wav", "").replace(".flac", "").replace(".mp3", "")
+        # Try to find stem keywords
+        for kw in ["Drums", "Bass", "Lead", "Pad", "Pluck", "Arp", "Vocal",
+                    "Vox", "Guitar", "Rhodes", "Piano", "Synth", "Reese",
+                    "Texture", "Chords", "Reverse", "Fx", "Accent", "Harp",
+                    "Keys", "Bell", "Loop", "Stem", "Snare", "Kick", "Hat",
+                    "Perc", "Shaker", "Hi", "808"]:
+            if kw.lower() in parts.lower():
+                ml_stems[kw] += 1
+                break
+        else:
+            # No stem keyword found - it's the full mix
+            ml_stems["FULL_MIX"] += 1
+
+print("\nMidilatino stem types in 'drumloop':")
+for k, v in ml_stems.most_common():
+    print(f"  {k:15s} {v:4d}")
+
+# --- SS_RNBL patterns ---
+print("\n\nSentimientoLatino SS_RNBL patterns:")
+ss_stems = Counter()
+for s in samples:
+    name = s.get("original_name", "")
+    if "SS_RNBL" in name:
+        # Extract type: SS_RNBL_Song_Stem_Type.wav
+        parts = name.replace(".wav", "").split("_")
+        if len(parts) >= 4:
+            stem_type = parts[3] if parts[3] not in ("One", "Shot") else "_".join(parts[3:5])
+            ss_stems[stem_type] += 1
+
+for k, v in ss_stems.most_common():
+    print(f"  {k:20s} {v:4d}")
+
+# --- All filename keywords ---
+print("\n\nAll filename role keywords across library:")
+role_keywords = Counter()
+for s in samples:
+    name = s.get("original_name", "").lower()
+    for kw in ["kick", "snare", "hi-hat", "hihat", "hat", "bass", "808",
+                "lead", "pad", "pluck", "arp", "vocal", "vox", "fx",
+                "perc", "drum", "loop", "fill", "guitar", "piano", "rhodes",
+                "synth", "bell", "brass", "string", "reese", "texture",
+                "chord", "shaker", "tambourine", "conga", "rim"]:
+        if kw in name:
+            role_keywords[kw] += 1
+
+for k, v in role_keywords.most_common(25):
+    print(f"  {k:15s} {v:4d}")
--- a/src/analyzer/run_batch.py
+++ b/src/analyzer/run_batch.py
@@ -0,0 +1,143 @@
+"""
+Batch analyzer - STANDALONE for double-click execution.
+Uses ProcessPoolExecutor (16 processes) for TRUE multi-core parallelism.
+aubio replaces pyin for F0 detection (~1ms vs ~2s per file).
+
+IMPORTANT: The if __name__ == '__main__' guard is REQUIRED on Windows
+for ProcessPoolExecutor. Without it, child processes re-import this file
+and create infinite process spawning.
+"""
+from __future__ import annotations
+
+import sys
+import os
+import time
+import json
+import warnings
+import traceback
+import multiprocessing
+
+# CRITICAL: Windows multiprocessing guard - MUST be at top level
+multiprocessing.freeze_support()
+
+warnings.filterwarnings("ignore")
+
+PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.chdir(PROJECT)
+if PROJECT not in sys.path:
+    sys.path.insert(0, PROJECT)
+
+from src.analyzer import (
+    collect_audio_files,
+    batch_analyze,
+    save_index,
+    plan_renames,
+)
+
+
+def main():
+    print("=" * 60)
+    print("  ANALIZADOR FORENSE DE SAMPLES v2.0")
+    print("  ProcessPoolExecutor + aubio F0 (C-native)")
+    print("  4 capas: Signal + Perceptual + Musical + Timbre")
+    print("  16 procesos independientes = 16 cores en paralelo")
+    print("=" * 60)
+
+    lib1 = os.path.join(PROJECT, "libreria", "reggaeton")
+    lib2 = os.path.join(PROJECT, "librerias", "reggaeton")
+
+    print("\n[1/4] Colectando archivos de audio...")
+    files = collect_audio_files(lib1, lib2)
+    print(f"      Encontrados: {len(files)} archivos")
+
+    if not files:
+        print("ERROR: No se encontraron archivos de audio.")
+        return
+
+    data_dir = os.path.join(PROJECT, "data")
+    os.makedirs(data_dir, exist_ok=True)
+    checkpoint = os.path.join(data_dir, "analysis_checkpoint.jsonl")
+
+    # Delete old checkpoint from failed thread-based run
+    if os.path.exists(checkpoint):
+        old_size = os.path.getsize(checkpoint)
+        if old_size < 1000:  # Probably broken from the thread run
+            os.remove(checkpoint)
+            print("      (Removed broken checkpoint)")
+
+    print(f"\n[2/4] Analizando con 16 PROCESOS (70% CPU)...")
+    print(f"      Cada proceso en su propio core, sin GIL")
+    print(f"      Checkpoint: {checkpoint}")
+    print(f"      (Si se corta, re-ejecuta y continua desde donde quedo)")
+    print()
+
+    start = time.time()
+    results = batch_analyze(files, workers=16, checkpoint_path=checkpoint)
+    elapsed = time.time() - start
+
+    valid = [r for r in results if "error" not in r]
+    errors = [r for r in results if "error" in r]
+
+    print(f"\n      Tiempo: {elapsed:.1f}s ({elapsed / max(len(files), 1):.2f}s/archivo)")
+    print(f"      Exitosos: {len(valid)} | Errores: {len(errors)}")
+
+    if errors:
+        err_path = os.path.join(data_dir, "analysis_errors.json")
+        with open(err_path, "w", encoding="utf-8") as f:
+            json.dump(errors, f, ensure_ascii=False, indent=2)
+        print(f"      Errores guardados en: {err_path}")
+
+    print(f"\n[3/4] Guardando indice...")
+    index_path = os.path.join(data_dir, "sample_index.json")
+    save_index(results, index_path)
+
+    print(f"\n[4/4] Plan de renombrado...")
+    output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples")
+    renames = plan_renames(results, output_dir)
+    rename_path = os.path.join(data_dir, "rename_plan.json")
+    with open(rename_path, "w", encoding="utf-8") as f:
+        json.dump(renames, f, ensure_ascii=False, indent=2)
+    print(f"      {len(renames)} archivos para renombrar")
+    print(f"      Plan guardado en: {rename_path}")
+
+    # Summary
+    print("\n" + "=" * 60)
+    print("  RESUMEN")
+    print("=" * 60)
+
+    roles = {}
+    chars = {}
+    keys = {}
+    for r in valid:
+        role = r.get("role", "?")
+        roles[role] = roles.get(role, 0) + 1
+        char = r.get("character", "?")
+        chars[char] = chars.get(char, 0) + 1
+        key = r.get("musical", {}).get("key", "X")
+        keys[key] = keys.get(key, 0) + 1
+
+    print(f"\n  Roles:")
+    for role, count in sorted(roles.items(), key=lambda x: -x[1]):
+        bar = "#" * min(count, 60)
+        print(f"    {role:12s} {count:4d}  {bar}")
+
+    print(f"\n  Caracteres:")
+    for char, count in sorted(chars.items(), key=lambda x: -x[1]):
+        bar = "#" * min(count, 50)
+        print(f"    {char:12s} {count:4d}  {bar}")
+
+    print(f"\n  Tonalidades (top 10):")
+    for key, count in sorted(keys.items(), key=lambda x: -x[1])[:10]:
+        print(f"    {key:5s} {count:4d}")
+
+    print(f"\n  Proximo paso: ejecuta 2_RENOMBRAR.bat")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        print(f"\nFATAL ERROR: {e}")
+        traceback.print_exc()
+        input("Presiona Enter para cerrar...")
--- a/src/analyzer/run_rename.py
+++ b/src/analyzer/run_rename.py
@@ -0,0 +1,88 @@
+"""
+Rename executor - Copies files to analyzed_samples/ with standardized names.
+Reads from data/rename_plan.json generated by the batch analyzer.
+"""
+from __future__ import annotations
+
+import sys
+import os
+import json
+import shutil
+import warnings
+
+warnings.filterwarnings("ignore")
+
+PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.chdir(PROJECT)
+if PROJECT not in sys.path:
+    sys.path.insert(0, PROJECT)
+
+from src.analyzer import plan_renames, execute_renames
+
+
+def main():
+    rename_path = os.path.join(PROJECT, "data", "rename_plan.json")
+    index_path = os.path.join(PROJECT, "data", "sample_index.json")
+    output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples")
+
+    # Load rename plan if exists, otherwise generate from index
+    if os.path.exists(rename_path):
+        print("Cargando plan de renombrado existente...")
+        with open(rename_path, "r", encoding="utf-8") as f:
+            renames = json.load(f)
+    elif os.path.exists(index_path):
+        print("Generando plan desde indice...")
+        with open(index_path, "r", encoding="utf-8") as f:
+            index = json.load(f)
+        renames = plan_renames(index["samples"], output_dir)
+        with open(rename_path, "w", encoding="utf-8") as f:
+            json.dump(renames, f, ensure_ascii=False, indent=2)
+    else:
+        print("ERROR: No existe data/sample_index.json ni data/rename_plan.json")
+        print("       Ejecuta primero 1_ANALIZAR.bat")
+        return
+
+    print(f"\n{len(renames)} archivos para renombrar")
+    print(f"Destino: {output_dir}")
+    print()
+
+    # Show sample renames
+    print("Ejemplos:")
+    for r in renames[:15]:
+        print(f"  {r['original_name']:50s} -> {r['role']:10s}\\{r['new_name']}")
+    if len(renames) > 15:
+        print(f"  ... y {len(renames) - 15} mas")
+    print()
+
+    # Confirm
+    answer = input("Ejecutar renombrado? (s/n): ").strip().lower()
+    if answer != "s":
+        print("Cancelado.")
+        return
+
+    # Execute
+    print("\nCopiando archivos...")
+    stats = execute_renames(renames, dry_run=False)
+
+    print(f"\nResultado: {stats['executed']} copiados, {stats['skipped']} omitidos, {len(stats.get('errors', []))} errores")
+
+    if stats.get("errors"):
+        print("Errores:")
+        for e in stats["errors"][:10]:
+            print(f"  {e}")
+
+    # Save rename log
+    log_path = os.path.join(PROJECT, "data", "rename_log.json")
+    with open(log_path, "w", encoding="utf-8") as f:
+        json.dump({"stats": stats, "renames": renames}, f, ensure_ascii=False, indent=2)
+    print(f"\nLog guardado en: {log_path}")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        print(f"\nFATAL ERROR: {e}")
+        import traceback
+        traceback.print_exc()
+        input("Presiona Enter para cerrar...")
--- a/src/analyzer/show_stats.py
+++ b/src/analyzer/show_stats.py
@@ -0,0 +1,117 @@
+"""
+Show statistics from the analysis index.
+"""
+import sys
+import os
+import json
+
+PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.chdir(PROJECT)
+
+
+def main():
+    index_path = os.path.join(PROJECT, "data", "sample_index.json")
+
+    if not os.path.exists(index_path):
+        print("ERROR: No existe data/sample_index.json")
+        print("       Ejecuta primero 1_ANALIZAR.bat")
+        return
+
+    with open(index_path, "r", encoding="utf-8") as f:
+        index = json.load(f)
+
+    samples = index["samples"]
+    valid = [s for s in samples if "error" not in s]
+
+    print("=" * 60)
+    print(f"  ESTADISTICAS DE LA BIBLIOTECA ({len(valid)} samples)")
+    print("=" * 60)
+
+    # Roles
+    roles = {}
+    for s in valid:
+        r = s.get("role", "?")
+        roles[r] = roles.get(r, 0) + 1
+
+    print("\n  Roles:")
+    max_count = max(roles.values()) if roles else 1
+    for role, count in sorted(roles.items(), key=lambda x: -x[1]):
+        bar_len = int(40 * count / max_count)
+        print(f"    {role:12s} {count:4d}  {'█' * bar_len}")
+
+    # Characters
+    chars = {}
+    for s in valid:
+        c = s.get("character", "?")
+        chars[c] = chars.get(c, 0) + 1
+
+    print("\n  Caracteres sonoros:")
+    max_count = max(chars.values()) if chars else 1
+    for char, count in sorted(chars.items(), key=lambda x: -x[1]):
+        bar_len = int(40 * count / max_count)
+        print(f"    {char:12s} {count:4d}  {'█' * bar_len}")
+
+    # Keys
+    keys = {}
+    for s in valid:
+        k = s.get("musical", {}).get("key", "X")
+        keys[k] = keys.get(k, 0) + 1
+
+    print("\n  Tonalidades:")
+    for key, count in sorted(keys.items(), key=lambda x: -x[1])[:15]:
+        print(f"    {key:5s} {count:4d}")
+
+    # Tempo distribution
+    tempos = [s.get("perceptual", {}).get("tempo", 0) for s in valid]
+    tempos_nonzero = [t for t in tempos if t > 0]
+    if tempos_nonzero:
+        print(f"\n  Tempo:")
+        print(f"    Rango: {min(tempos_nonzero):.0f} - {max(tempos_nonzero):.0f} BPM")
+        print(f"    Promedio: {sum(tempos_nonzero) / len(tempos_nonzero):.0f} BPM")
+
+    # LUFS distribution
+    lufs = [s.get("perceptual", {}).get("lufs", 0) for s in valid]
+    lufs_valid = [l for l in lufs if l > -70]
+    if lufs_valid:
+        print(f"\n  Loudness (LUFS):")
+        print(f"    Rango: {min(lufs_valid):.1f} a {max(lufs_valid):.1f} LUFS")
+        print(f"    Promedio: {sum(lufs_valid) / len(lufs_valid):.1f} LUFS")
+
+    # Tonal vs atonal
+    tonal = sum(1 for s in valid if s.get("musical", {}).get("is_tonal", False))
+    atonal = len(valid) - tonal
+    print(f"\n  Tonalidad:")
+    print(f"    Tonal: {tonal} ({100 * tonal / len(valid):.0f}%)")
+    print(f"    Atonal: {atonal} ({100 * atonal / len(valid):.0f}%)")
+
+    # One-shot vs loop
+    oneshot = sum(1 for s in valid if s.get("musical", {}).get("is_oneshot", False))
+    loops = sum(1 for s in valid if s.get("musical", {}).get("is_loop", False))
+    print(f"\n  Tipo:")
+    print(f"    One-shots: {oneshot}")
+    print(f"    Loops: {loops}")
+    print(f"    Otros: {len(valid) - oneshot - loops}")
+
+    print("\n" + "=" * 60)
+
+    # Show samples per role for quick reference
+    print("\n  EJEMPLOS POR ROL:")
+    by_role = {}
+    for s in valid:
+        role = s.get("role", "?")
+        if role not in by_role:
+            by_role[role] = []
+        by_role[role].append(s)
+
+    for role in sorted(by_role.keys()):
+        samples_list = by_role[role][:5]
+        print(f"\n  [{role}] ({len(by_role[role])} total)")
+        for s in samples_list:
+            key = s.get("musical", {}).get("key", "X")
+            char = s.get("character", "?")
+            bpm = s.get("perceptual", {}).get("tempo", 0)
+            print(f"    {s.get('new_name', '?'):50s} key={key:5s} bpm={bpm:5.0f} char={char}")
+
+
+if __name__ == "__main__":
+    main()