feat: pattern-based generators from real track analysis, RPP structure fixes, randomization
- Reverse-engineer drum patterns from 2 real reggaeton tracks with librosa - Create patterns.py with extracted frequency data (kick/snare/hihat positions) - Rewrite rhythm.py with pattern-bank generators (dembow, dense, trapico, offbeat) - Rewrite melodic.py with section-aware generators and humanization - Add weighted random sample selection in SampleSelector (top-5 pool) - Add generate_structure() with randomized templates and energy variance - Fix RPP structure: TEMPO arity (3→4 args), string quoting for empty strings - Rewrite quick_drumloop_test.py with correct REAPER ground truth format - Add scripts/analyze_examples.py for reverse engineering audio tracks - Add --seed argument for reproducible generation - 72 tests passing
This commit is contained in:
227
scripts/analyze_examples.py
Normal file
227
scripts/analyze_examples.py
Normal file
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env python
|
||||
"""Reverse-engineer drum patterns from example reggaetón tracks."""
|
||||
from __future__ import annotations
|
||||
import librosa
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).parent.parent
|
||||
|
||||
|
||||
def analyze_track(path: str, track_name: str) -> dict:
|
||||
y, sr = librosa.load(path, sr=44100, mono=True)
|
||||
duration = len(y) / sr
|
||||
|
||||
# Tempo and beats
|
||||
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
|
||||
if isinstance(tempo, np.ndarray):
|
||||
tempo = float(tempo[0]) if tempo.ndim > 0 else float(tempo)
|
||||
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
|
||||
bar_duration = 4 * 60.0 / tempo
|
||||
sixteenth = 60.0 / tempo / 4
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f" {track_name}")
|
||||
print(f" Duration: {duration:.1f}s | Tempo: {tempo:.1f} BPM")
|
||||
print(f" Bar: {bar_duration:.3f}s | 16th: {sixteenth:.4f}s")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# ---- KICK DETECTION (low frequency onsets) ----
|
||||
onset_env_low = librosa.onset.onset_strength(
|
||||
y=y, sr=sr,
|
||||
feature=librosa.feature.melspectrogram,
|
||||
fmin=20, fmax=300,
|
||||
)
|
||||
kick_onsets = librosa.onset.onset_detect(
|
||||
onset_envelope=onset_env_low, sr=sr,
|
||||
units="time", backtrack=False,
|
||||
)
|
||||
|
||||
# ---- SNARE DETECTION (mid-high frequency onsets) ----
|
||||
onset_env_high = librosa.onset.onset_strength(
|
||||
y=y, sr=sr,
|
||||
feature=librosa.feature.melspectrogram,
|
||||
fmin=800, fmax=8000,
|
||||
)
|
||||
snare_onsets = librosa.onset.onset_detect(
|
||||
onset_envelope=onset_env_high, sr=sr,
|
||||
units="time", backtrack=False,
|
||||
)
|
||||
|
||||
# ---- HIHAT DETECTION (very high frequency) ----
|
||||
onset_env_hh = librosa.onset.onset_strength(
|
||||
y=y, sr=sr,
|
||||
feature=librosa.feature.melspectrogram,
|
||||
fmin=5000, fmax=16000,
|
||||
)
|
||||
hihat_onsets = librosa.onset.onset_detect(
|
||||
onset_envelope=onset_env_hh, sr=sr,
|
||||
units="time", backtrack=False,
|
||||
)
|
||||
|
||||
first_beat = beat_times[0]
|
||||
|
||||
# Analyze first 16 bars
|
||||
n_bars = min(16, int(len(beat_times) / 4))
|
||||
print("KICK PATTERN (first 16 bars, 16th note grid):")
|
||||
print(" Grid: 1 & 2 & 3 & 4 & 1e &a 2e &a 3e &a 4e &a\n")
|
||||
kick_pattern_counts: dict[int, int] = {}
|
||||
|
||||
for bar in range(n_bars):
|
||||
bar_start = first_beat + bar * bar_duration
|
||||
bar_end = bar_start + bar_duration
|
||||
bar_kicks = kick_onsets[(kick_onsets >= bar_start) & (kick_onsets < bar_end)]
|
||||
positions = []
|
||||
for k in bar_kicks:
|
||||
offset = k - bar_start
|
||||
sixteenth_pos = round(offset / sixteenth)
|
||||
if 0 <= sixteenth_pos < 16:
|
||||
positions.append(sixteenth_pos)
|
||||
kick_pattern_counts[sixteenth_pos] = kick_pattern_counts.get(sixteenth_pos, 0) + 1
|
||||
|
||||
pattern = ["."] * 16
|
||||
for p in positions:
|
||||
pattern[p] = "K"
|
||||
line = " ".join(pattern)
|
||||
print(f" Bar {bar+1:2d}: {line}")
|
||||
|
||||
# Most common kick positions
|
||||
print(f"\n Kick frequency: {dict(sorted(kick_pattern_counts.items()))}")
|
||||
|
||||
print(f"\nSNARE PATTERN (first 16 bars, 16th note grid):\n")
|
||||
snare_pattern_counts: dict[int, int] = {}
|
||||
|
||||
for bar in range(n_bars):
|
||||
bar_start = first_beat + bar * bar_duration
|
||||
bar_end = bar_start + bar_duration
|
||||
bar_snares = snare_onsets[(snare_onsets >= bar_start) & (snare_onsets < bar_end)]
|
||||
positions = []
|
||||
for s in bar_snares:
|
||||
offset = s - bar_start
|
||||
sixteenth_pos = round(offset / sixteenth)
|
||||
if 0 <= sixteenth_pos < 16:
|
||||
positions.append(sixteenth_pos)
|
||||
snare_pattern_counts[sixteenth_pos] = snare_pattern_counts.get(sixteenth_pos, 0) + 1
|
||||
|
||||
pattern = ["."] * 16
|
||||
for p in positions:
|
||||
pattern[p] = "S"
|
||||
line = " ".join(pattern)
|
||||
print(f" Bar {bar+1:2d}: {line}")
|
||||
|
||||
print(f"\n Snare frequency: {dict(sorted(snare_pattern_counts.items()))}")
|
||||
|
||||
print(f"\nHIHAT PATTERN (first 16 bars, 16th note grid):\n")
|
||||
hihat_pattern_counts: dict[int, int] = {}
|
||||
|
||||
for bar in range(n_bars):
|
||||
bar_start = first_beat + bar * bar_duration
|
||||
bar_end = bar_start + bar_duration
|
||||
bar_hh = hihat_onsets[(hihat_onsets >= bar_start) & (hihat_onsets < bar_end)]
|
||||
positions = []
|
||||
for h in bar_hh:
|
||||
offset = h - bar_start
|
||||
sixteenth_pos = round(offset / sixteenth)
|
||||
if 0 <= sixteenth_pos < 16:
|
||||
positions.append(sixteenth_pos)
|
||||
hihat_pattern_counts[sixteenth_pos] = hihat_pattern_counts.get(sixteenth_pos, 0) + 1
|
||||
|
||||
pattern = ["."] * 16
|
||||
for p in positions:
|
||||
pattern[p] = "H"
|
||||
line = " ".join(pattern)
|
||||
print(f" Bar {bar+1:2d}: {line}")
|
||||
|
||||
print(f"\n Hihat frequency: {dict(sorted(hihat_pattern_counts.items()))}")
|
||||
|
||||
# ---- SECTION ANALYSIS (full track) ----
|
||||
hop = 2048
|
||||
rms = librosa.feature.rms(y=y, hop_length=hop, frame_length=4096)[0]
|
||||
rms_times = librosa.times_like(rms, sr=sr, hop_length=hop)
|
||||
rms_db = librosa.amplitude_to_db(rms, ref=np.max)
|
||||
|
||||
# Energy per bar
|
||||
total_bars = int(len(beat_times) / 4)
|
||||
bar_energies = []
|
||||
for b in range(total_bars):
|
||||
start = beat_times[b * 4]
|
||||
end = beat_times[min((b + 1) * 4, len(beat_times) - 1)]
|
||||
mask = (rms_times >= start) & (rms_times <= end)
|
||||
if np.any(mask):
|
||||
bar_energies.append(float(np.mean(rms_db[mask])))
|
||||
else:
|
||||
bar_energies.append(-60.0)
|
||||
|
||||
# Detect sections by energy clustering
|
||||
from scipy.ndimage import uniform_filter1d
|
||||
smooth = uniform_filter1d(np.array(bar_energies), size=2)
|
||||
|
||||
# Find section boundaries (>6dB change)
|
||||
diff = np.diff(smooth)
|
||||
boundaries = [0]
|
||||
for idx, d in enumerate(diff):
|
||||
if abs(d) > 6:
|
||||
boundaries.append(idx + 1)
|
||||
boundaries.append(total_bars)
|
||||
|
||||
print(f"\n\nDETECTED SECTIONS ({len(boundaries)-1} sections):\n")
|
||||
section_labels = []
|
||||
for s in range(len(boundaries) - 1):
|
||||
start_bar = boundaries[s]
|
||||
end_bar = boundaries[s + 1]
|
||||
start_time = beat_times[start_bar * 4] if start_bar * 4 < len(beat_times) else 0
|
||||
n_section_bars = end_bar - start_bar
|
||||
avg_energy = np.mean(bar_energies[start_bar:end_bar])
|
||||
|
||||
# Classify section by energy
|
||||
if avg_energy < -30:
|
||||
label = "SILENCE/BREAK"
|
||||
elif avg_energy < -20:
|
||||
label = "INTRO/FILTER"
|
||||
elif avg_energy < -12:
|
||||
label = "VERSE/BRIDGE"
|
||||
elif avg_energy < -6:
|
||||
label = "BUILD/PRE-CHORUS"
|
||||
else:
|
||||
label = "CHORUS/DROP"
|
||||
|
||||
section_labels.append(label)
|
||||
print(f" {start_time:6.1f}s | Bars {start_bar+1:3d}-{end_bar:3d} ({n_section_bars:2d} bars) | {avg_energy:+6.1f} dB | {label}")
|
||||
|
||||
# ---- SPECTRAL ANALYSIS (filter sweeps) ----
|
||||
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
|
||||
sc_times = librosa.times_like(spectral_centroid, sr=sr)
|
||||
|
||||
# Smooth and find big changes
|
||||
sc_smooth = uniform_filter1d(spectral_centroid, size=50)
|
||||
sc_diff = np.diff(sc_smooth)
|
||||
big_drops = np.where(sc_diff < -500)[0]
|
||||
big_rises = np.where(sc_diff > 500)[0]
|
||||
|
||||
if len(big_drops) > 0:
|
||||
print(f"\n\nFILTER SWEEPS (spectral centroid drops):\n")
|
||||
for d in big_drops[:10]:
|
||||
t = sc_times[d]
|
||||
print(f" {t:.1f}s - centroid dropped {sc_diff[d]:.0f} Hz (HPF engaging)")
|
||||
|
||||
if len(big_rises) > 0:
|
||||
print(f"\nFILTER OPENS (spectral centroid rises):\n")
|
||||
for r in big_rises[:10]:
|
||||
t = sc_times[r]
|
||||
print(f" {t:.1f}s - centroid rose {sc_diff[r]:.0f} Hz (filter opening)")
|
||||
|
||||
return {
|
||||
"tempo": tempo,
|
||||
"duration": duration,
|
||||
"n_bars": total_bars,
|
||||
"kick_pattern": kick_pattern_counts,
|
||||
"snare_pattern": snare_pattern_counts,
|
||||
"hihat_pattern": hihat_pattern_counts,
|
||||
"sections": section_labels,
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for i in [1, 2]:
|
||||
path = ROOT / "ejemplos" / f"ejemplo{i}.mp3"
|
||||
result = analyze_track(str(path), f"ejemplo{i}.mp3")
|
||||
Reference in New Issue
Block a user