feat: pattern-based generators from real track analysis, RPP structure fixes, randomization

- Reverse-engineer drum patterns from 2 real reggaeton tracks with librosa - Create patterns.py with extracted frequency data (kick/snare/hihat positions) - Rewrite rhythm.py with pattern-bank generators (dembow, dense, trapico, offbeat) - Rewrite melodic.py with section-aware generators and humanization - Add weighted random sample selection in SampleSelector (top-5 pool) - Add generate_structure() with randomized templates and energy variance - Fix RPP structure: TEMPO arity (3→4 args), string quoting for empty strings - Rewrite quick_drumloop_test.py with correct REAPER ground truth format - Add scripts/analyze_examples.py for reverse engineering audio tracks - Add --seed argument for reproducible generation - 72 tests passing
2026-05-03 16:08:07 -03:00
parent 32dafd94e0
commit 3444006411
10 changed files with 1664 additions and 285 deletions
--- a/scripts/analyze_examples.py
+++ b/scripts/analyze_examples.py
@@ -0,0 +1,227 @@
+#!/usr/bin/env python
+"""Reverse-engineer drum patterns from example reggaetón tracks."""
+from __future__ import annotations
+import librosa
+import numpy as np
+from pathlib import Path
+
+ROOT = Path(__file__).parent.parent
+
+
+def analyze_track(path: str, track_name: str) -> dict:
+    y, sr = librosa.load(path, sr=44100, mono=True)
+    duration = len(y) / sr
+
+    # Tempo and beats
+    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
+    if isinstance(tempo, np.ndarray):
+        tempo = float(tempo[0]) if tempo.ndim > 0 else float(tempo)
+    beat_times = librosa.frames_to_time(beat_frames, sr=sr)
+    bar_duration = 4 * 60.0 / tempo
+    sixteenth = 60.0 / tempo / 4
+
+    print(f"\n{'='*60}")
+    print(f"  {track_name}")
+    print(f"  Duration: {duration:.1f}s | Tempo: {tempo:.1f} BPM")
+    print(f"  Bar: {bar_duration:.3f}s | 16th: {sixteenth:.4f}s")
+    print(f"{'='*60}\n")
+
+    # ---- KICK DETECTION (low frequency onsets) ----
+    onset_env_low = librosa.onset.onset_strength(
+        y=y, sr=sr,
+        feature=librosa.feature.melspectrogram,
+        fmin=20, fmax=300,
+    )
+    kick_onsets = librosa.onset.onset_detect(
+        onset_envelope=onset_env_low, sr=sr,
+        units="time", backtrack=False,
+    )
+
+    # ---- SNARE DETECTION (mid-high frequency onsets) ----
+    onset_env_high = librosa.onset.onset_strength(
+        y=y, sr=sr,
+        feature=librosa.feature.melspectrogram,
+        fmin=800, fmax=8000,
+    )
+    snare_onsets = librosa.onset.onset_detect(
+        onset_envelope=onset_env_high, sr=sr,
+        units="time", backtrack=False,
+    )
+
+    # ---- HIHAT DETECTION (very high frequency) ----
+    onset_env_hh = librosa.onset.onset_strength(
+        y=y, sr=sr,
+        feature=librosa.feature.melspectrogram,
+        fmin=5000, fmax=16000,
+    )
+    hihat_onsets = librosa.onset.onset_detect(
+        onset_envelope=onset_env_hh, sr=sr,
+        units="time", backtrack=False,
+    )
+
+    first_beat = beat_times[0]
+
+    # Analyze first 16 bars
+    n_bars = min(16, int(len(beat_times) / 4))
+    print("KICK PATTERN (first 16 bars, 16th note grid):")
+    print("  Grid: 1  &  2  &  3  &  4  &  1e &a 2e &a 3e &a 4e &a\n")
+    kick_pattern_counts: dict[int, int] = {}
+
+    for bar in range(n_bars):
+        bar_start = first_beat + bar * bar_duration
+        bar_end = bar_start + bar_duration
+        bar_kicks = kick_onsets[(kick_onsets >= bar_start) & (kick_onsets < bar_end)]
+        positions = []
+        for k in bar_kicks:
+            offset = k - bar_start
+            sixteenth_pos = round(offset / sixteenth)
+            if 0 <= sixteenth_pos < 16:
+                positions.append(sixteenth_pos)
+                kick_pattern_counts[sixteenth_pos] = kick_pattern_counts.get(sixteenth_pos, 0) + 1
+
+        pattern = ["."] * 16
+        for p in positions:
+            pattern[p] = "K"
+        line = " ".join(pattern)
+        print(f"  Bar {bar+1:2d}: {line}")
+
+    # Most common kick positions
+    print(f"\n  Kick frequency: {dict(sorted(kick_pattern_counts.items()))}")
+
+    print(f"\nSNARE PATTERN (first 16 bars, 16th note grid):\n")
+    snare_pattern_counts: dict[int, int] = {}
+
+    for bar in range(n_bars):
+        bar_start = first_beat + bar * bar_duration
+        bar_end = bar_start + bar_duration
+        bar_snares = snare_onsets[(snare_onsets >= bar_start) & (snare_onsets < bar_end)]
+        positions = []
+        for s in bar_snares:
+            offset = s - bar_start
+            sixteenth_pos = round(offset / sixteenth)
+            if 0 <= sixteenth_pos < 16:
+                positions.append(sixteenth_pos)
+                snare_pattern_counts[sixteenth_pos] = snare_pattern_counts.get(sixteenth_pos, 0) + 1
+
+        pattern = ["."] * 16
+        for p in positions:
+            pattern[p] = "S"
+        line = " ".join(pattern)
+        print(f"  Bar {bar+1:2d}: {line}")
+
+    print(f"\n  Snare frequency: {dict(sorted(snare_pattern_counts.items()))}")
+
+    print(f"\nHIHAT PATTERN (first 16 bars, 16th note grid):\n")
+    hihat_pattern_counts: dict[int, int] = {}
+
+    for bar in range(n_bars):
+        bar_start = first_beat + bar * bar_duration
+        bar_end = bar_start + bar_duration
+        bar_hh = hihat_onsets[(hihat_onsets >= bar_start) & (hihat_onsets < bar_end)]
+        positions = []
+        for h in bar_hh:
+            offset = h - bar_start
+            sixteenth_pos = round(offset / sixteenth)
+            if 0 <= sixteenth_pos < 16:
+                positions.append(sixteenth_pos)
+                hihat_pattern_counts[sixteenth_pos] = hihat_pattern_counts.get(sixteenth_pos, 0) + 1
+
+        pattern = ["."] * 16
+        for p in positions:
+            pattern[p] = "H"
+        line = " ".join(pattern)
+        print(f"  Bar {bar+1:2d}: {line}")
+
+    print(f"\n  Hihat frequency: {dict(sorted(hihat_pattern_counts.items()))}")
+
+    # ---- SECTION ANALYSIS (full track) ----
+    hop = 2048
+    rms = librosa.feature.rms(y=y, hop_length=hop, frame_length=4096)[0]
+    rms_times = librosa.times_like(rms, sr=sr, hop_length=hop)
+    rms_db = librosa.amplitude_to_db(rms, ref=np.max)
+
+    # Energy per bar
+    total_bars = int(len(beat_times) / 4)
+    bar_energies = []
+    for b in range(total_bars):
+        start = beat_times[b * 4]
+        end = beat_times[min((b + 1) * 4, len(beat_times) - 1)]
+        mask = (rms_times >= start) & (rms_times <= end)
+        if np.any(mask):
+            bar_energies.append(float(np.mean(rms_db[mask])))
+        else:
+            bar_energies.append(-60.0)
+
+    # Detect sections by energy clustering
+    from scipy.ndimage import uniform_filter1d
+    smooth = uniform_filter1d(np.array(bar_energies), size=2)
+
+    # Find section boundaries (>6dB change)
+    diff = np.diff(smooth)
+    boundaries = [0]
+    for idx, d in enumerate(diff):
+        if abs(d) > 6:
+            boundaries.append(idx + 1)
+    boundaries.append(total_bars)
+
+    print(f"\n\nDETECTED SECTIONS ({len(boundaries)-1} sections):\n")
+    section_labels = []
+    for s in range(len(boundaries) - 1):
+        start_bar = boundaries[s]
+        end_bar = boundaries[s + 1]
+        start_time = beat_times[start_bar * 4] if start_bar * 4 < len(beat_times) else 0
+        n_section_bars = end_bar - start_bar
+        avg_energy = np.mean(bar_energies[start_bar:end_bar])
+
+        # Classify section by energy
+        if avg_energy < -30:
+            label = "SILENCE/BREAK"
+        elif avg_energy < -20:
+            label = "INTRO/FILTER"
+        elif avg_energy < -12:
+            label = "VERSE/BRIDGE"
+        elif avg_energy < -6:
+            label = "BUILD/PRE-CHORUS"
+        else:
+            label = "CHORUS/DROP"
+
+        section_labels.append(label)
+        print(f"  {start_time:6.1f}s | Bars {start_bar+1:3d}-{end_bar:3d} ({n_section_bars:2d} bars) | {avg_energy:+6.1f} dB | {label}")
+
+    # ---- SPECTRAL ANALYSIS (filter sweeps) ----
+    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
+    sc_times = librosa.times_like(spectral_centroid, sr=sr)
+
+    # Smooth and find big changes
+    sc_smooth = uniform_filter1d(spectral_centroid, size=50)
+    sc_diff = np.diff(sc_smooth)
+    big_drops = np.where(sc_diff < -500)[0]
+    big_rises = np.where(sc_diff > 500)[0]
+
+    if len(big_drops) > 0:
+        print(f"\n\nFILTER SWEEPS (spectral centroid drops):\n")
+        for d in big_drops[:10]:
+            t = sc_times[d]
+            print(f"  {t:.1f}s - centroid dropped {sc_diff[d]:.0f} Hz (HPF engaging)")
+
+    if len(big_rises) > 0:
+        print(f"\nFILTER OPENS (spectral centroid rises):\n")
+        for r in big_rises[:10]:
+            t = sc_times[r]
+            print(f"  {t:.1f}s - centroid rose {sc_diff[r]:.0f} Hz (filter opening)")
+
+    return {
+        "tempo": tempo,
+        "duration": duration,
+        "n_bars": total_bars,
+        "kick_pattern": kick_pattern_counts,
+        "snare_pattern": snare_pattern_counts,
+        "hihat_pattern": hihat_pattern_counts,
+        "sections": section_labels,
+    }
+
+
+if __name__ == "__main__":
+    for i in [1, 2]:
+        path = ROOT / "ejemplos" / f"ejemplo{i}.mp3"
+        result = analyze_track(str(path), f"ejemplo{i}.mp3")