feat: pattern-based generators from real track analysis, RPP structure fixes, randomization

- Reverse-engineer drum patterns from 2 real reggaeton tracks with librosa
- Create patterns.py with extracted frequency data (kick/snare/hihat positions)
- Rewrite rhythm.py with pattern-bank generators (dembow, dense, trapico, offbeat)
- Rewrite melodic.py with section-aware generators and humanization
- Add weighted random sample selection in SampleSelector (top-5 pool)
- Add generate_structure() with randomized templates and energy variance
- Fix RPP structure: TEMPO arity (3→4 args), string quoting for empty strings
- Rewrite quick_drumloop_test.py with correct REAPER ground truth format
- Add scripts/analyze_examples.py for reverse engineering audio tracks
- Add --seed argument for reproducible generation
- 72 tests passing
This commit is contained in:
renato97
2026-05-03 16:08:07 -03:00
parent 32dafd94e0
commit 3444006411
10 changed files with 1664 additions and 285 deletions

227
scripts/analyze_examples.py Normal file
View File

@@ -0,0 +1,227 @@
#!/usr/bin/env python
"""Reverse-engineer drum patterns from example reggaetón tracks."""
from __future__ import annotations
import librosa
import numpy as np
from pathlib import Path
ROOT = Path(__file__).parent.parent
def analyze_track(path: str, track_name: str) -> dict:
y, sr = librosa.load(path, sr=44100, mono=True)
duration = len(y) / sr
# Tempo and beats
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
if isinstance(tempo, np.ndarray):
tempo = float(tempo[0]) if tempo.ndim > 0 else float(tempo)
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
bar_duration = 4 * 60.0 / tempo
sixteenth = 60.0 / tempo / 4
print(f"\n{'='*60}")
print(f" {track_name}")
print(f" Duration: {duration:.1f}s | Tempo: {tempo:.1f} BPM")
print(f" Bar: {bar_duration:.3f}s | 16th: {sixteenth:.4f}s")
print(f"{'='*60}\n")
# ---- KICK DETECTION (low frequency onsets) ----
onset_env_low = librosa.onset.onset_strength(
y=y, sr=sr,
feature=librosa.feature.melspectrogram,
fmin=20, fmax=300,
)
kick_onsets = librosa.onset.onset_detect(
onset_envelope=onset_env_low, sr=sr,
units="time", backtrack=False,
)
# ---- SNARE DETECTION (mid-high frequency onsets) ----
onset_env_high = librosa.onset.onset_strength(
y=y, sr=sr,
feature=librosa.feature.melspectrogram,
fmin=800, fmax=8000,
)
snare_onsets = librosa.onset.onset_detect(
onset_envelope=onset_env_high, sr=sr,
units="time", backtrack=False,
)
# ---- HIHAT DETECTION (very high frequency) ----
onset_env_hh = librosa.onset.onset_strength(
y=y, sr=sr,
feature=librosa.feature.melspectrogram,
fmin=5000, fmax=16000,
)
hihat_onsets = librosa.onset.onset_detect(
onset_envelope=onset_env_hh, sr=sr,
units="time", backtrack=False,
)
first_beat = beat_times[0]
# Analyze first 16 bars
n_bars = min(16, int(len(beat_times) / 4))
print("KICK PATTERN (first 16 bars, 16th note grid):")
print(" Grid: 1 & 2 & 3 & 4 & 1e &a 2e &a 3e &a 4e &a\n")
kick_pattern_counts: dict[int, int] = {}
for bar in range(n_bars):
bar_start = first_beat + bar * bar_duration
bar_end = bar_start + bar_duration
bar_kicks = kick_onsets[(kick_onsets >= bar_start) & (kick_onsets < bar_end)]
positions = []
for k in bar_kicks:
offset = k - bar_start
sixteenth_pos = round(offset / sixteenth)
if 0 <= sixteenth_pos < 16:
positions.append(sixteenth_pos)
kick_pattern_counts[sixteenth_pos] = kick_pattern_counts.get(sixteenth_pos, 0) + 1
pattern = ["."] * 16
for p in positions:
pattern[p] = "K"
line = " ".join(pattern)
print(f" Bar {bar+1:2d}: {line}")
# Most common kick positions
print(f"\n Kick frequency: {dict(sorted(kick_pattern_counts.items()))}")
print(f"\nSNARE PATTERN (first 16 bars, 16th note grid):\n")
snare_pattern_counts: dict[int, int] = {}
for bar in range(n_bars):
bar_start = first_beat + bar * bar_duration
bar_end = bar_start + bar_duration
bar_snares = snare_onsets[(snare_onsets >= bar_start) & (snare_onsets < bar_end)]
positions = []
for s in bar_snares:
offset = s - bar_start
sixteenth_pos = round(offset / sixteenth)
if 0 <= sixteenth_pos < 16:
positions.append(sixteenth_pos)
snare_pattern_counts[sixteenth_pos] = snare_pattern_counts.get(sixteenth_pos, 0) + 1
pattern = ["."] * 16
for p in positions:
pattern[p] = "S"
line = " ".join(pattern)
print(f" Bar {bar+1:2d}: {line}")
print(f"\n Snare frequency: {dict(sorted(snare_pattern_counts.items()))}")
print(f"\nHIHAT PATTERN (first 16 bars, 16th note grid):\n")
hihat_pattern_counts: dict[int, int] = {}
for bar in range(n_bars):
bar_start = first_beat + bar * bar_duration
bar_end = bar_start + bar_duration
bar_hh = hihat_onsets[(hihat_onsets >= bar_start) & (hihat_onsets < bar_end)]
positions = []
for h in bar_hh:
offset = h - bar_start
sixteenth_pos = round(offset / sixteenth)
if 0 <= sixteenth_pos < 16:
positions.append(sixteenth_pos)
hihat_pattern_counts[sixteenth_pos] = hihat_pattern_counts.get(sixteenth_pos, 0) + 1
pattern = ["."] * 16
for p in positions:
pattern[p] = "H"
line = " ".join(pattern)
print(f" Bar {bar+1:2d}: {line}")
print(f"\n Hihat frequency: {dict(sorted(hihat_pattern_counts.items()))}")
# ---- SECTION ANALYSIS (full track) ----
hop = 2048
rms = librosa.feature.rms(y=y, hop_length=hop, frame_length=4096)[0]
rms_times = librosa.times_like(rms, sr=sr, hop_length=hop)
rms_db = librosa.amplitude_to_db(rms, ref=np.max)
# Energy per bar
total_bars = int(len(beat_times) / 4)
bar_energies = []
for b in range(total_bars):
start = beat_times[b * 4]
end = beat_times[min((b + 1) * 4, len(beat_times) - 1)]
mask = (rms_times >= start) & (rms_times <= end)
if np.any(mask):
bar_energies.append(float(np.mean(rms_db[mask])))
else:
bar_energies.append(-60.0)
# Detect sections by energy clustering
from scipy.ndimage import uniform_filter1d
smooth = uniform_filter1d(np.array(bar_energies), size=2)
# Find section boundaries (>6dB change)
diff = np.diff(smooth)
boundaries = [0]
for idx, d in enumerate(diff):
if abs(d) > 6:
boundaries.append(idx + 1)
boundaries.append(total_bars)
print(f"\n\nDETECTED SECTIONS ({len(boundaries)-1} sections):\n")
section_labels = []
for s in range(len(boundaries) - 1):
start_bar = boundaries[s]
end_bar = boundaries[s + 1]
start_time = beat_times[start_bar * 4] if start_bar * 4 < len(beat_times) else 0
n_section_bars = end_bar - start_bar
avg_energy = np.mean(bar_energies[start_bar:end_bar])
# Classify section by energy
if avg_energy < -30:
label = "SILENCE/BREAK"
elif avg_energy < -20:
label = "INTRO/FILTER"
elif avg_energy < -12:
label = "VERSE/BRIDGE"
elif avg_energy < -6:
label = "BUILD/PRE-CHORUS"
else:
label = "CHORUS/DROP"
section_labels.append(label)
print(f" {start_time:6.1f}s | Bars {start_bar+1:3d}-{end_bar:3d} ({n_section_bars:2d} bars) | {avg_energy:+6.1f} dB | {label}")
# ---- SPECTRAL ANALYSIS (filter sweeps) ----
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
sc_times = librosa.times_like(spectral_centroid, sr=sr)
# Smooth and find big changes
sc_smooth = uniform_filter1d(spectral_centroid, size=50)
sc_diff = np.diff(sc_smooth)
big_drops = np.where(sc_diff < -500)[0]
big_rises = np.where(sc_diff > 500)[0]
if len(big_drops) > 0:
print(f"\n\nFILTER SWEEPS (spectral centroid drops):\n")
for d in big_drops[:10]:
t = sc_times[d]
print(f" {t:.1f}s - centroid dropped {sc_diff[d]:.0f} Hz (HPF engaging)")
if len(big_rises) > 0:
print(f"\nFILTER OPENS (spectral centroid rises):\n")
for r in big_rises[:10]:
t = sc_times[r]
print(f" {t:.1f}s - centroid rose {sc_diff[r]:.0f} Hz (filter opening)")
return {
"tempo": tempo,
"duration": duration,
"n_bars": total_bars,
"kick_pattern": kick_pattern_counts,
"snare_pattern": snare_pattern_counts,
"hihat_pattern": hihat_pattern_counts,
"sections": section_labels,
}
if __name__ == "__main__":
for i in [1, 2]:
path = ROOT / "ejemplos" / f"ejemplo{i}.mp3"
result = analyze_track(str(path), f"ejemplo{i}.mp3")