feat: reggaeton production system with intelligent sample selection and FLP generation

This commit is contained in:
renato97
2026-05-02 21:40:18 -03:00
commit 4d941f3f90
62 changed files with 8656 additions and 0 deletions

827
src/analyzer/__init__.py Normal file
View File

@@ -0,0 +1,827 @@
"""Deep forensic audio sample analyzer.
4-layer analysis pipeline:
Layer 1 - Signal: FFT, spectral centroid, bandwidth, rolloff, flatness, ZCR, RMS, crest factor
Layer 2 - Perceptual: MFCC (20), chromagram (12), onset envelope, tempo, LUFS
Layer 3 - Musical: Key estimation (Krumhansl-Schmuckler), F0 via aubio (C-native), tonal/atonal
Layer 4 - Timbre: Mel band stats, spectral contrast, tonnetz
Architecture: ProcessPoolExecutor with 16 workers for TRUE multi-core parallelism.
aubio for F0 (C-native, ~1ms per file vs pyin ~2s per file).
"""
from __future__ import annotations
import os
import json
import hashlib
from pathlib import Path
from typing import Optional
from concurrent.futures import ProcessPoolExecutor, as_completed
import numpy as np
import librosa
import soundfile as sf
import aubio
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
SAMPLE_RATE = 44100
HOP_LENGTH = 512
N_FFT = 2048
N_MFCC = 20
N_CHROMA = 12
MAX_WORKERS = 16 # 70% of 24 cores
AUDIO_EXT = {".wav", ".flac", ".mp3", ".aif", ".aiff"}
# Krumhansl-Schmuckler key profiles
MAJOR_PROFILE = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
MINOR_PROFILE = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
# Character classification thresholds
CHARACTERS = {
"boomy": {"low_ratio_min": 0.6, "centroid_max": 400},
"deep": {"low_ratio_min": 0.5, "centroid_max": 500, "fundamental_max": 150},
"sharp": {"high_ratio_min": 0.4, "centroid_min": 3000, "attack_max": 0.005},
"crisp": {"high_ratio_min": 0.3, "centroid_min": 4000, "duration_max": 0.2},
"warm": {"centroid_min": 300, "centroid_max": 2000, "mid_ratio_min": 0.4},
"bright": {"centroid_min": 3000, "high_ratio_min": 0.3},
"dark": {"centroid_max": 800, "low_ratio_min": 0.4},
"ethereal": {"centroid_min": 1500, "centroid_max": 5000, "rms_std_max": 0.03},
"short": {"duration_max": 0.15},
"impact": {"attack_max": 0.005, "peak_rms_ratio_min": 5.0},
"full": {"duration_min": 1.0, "bandwidth_min": 4000},
"hollow": {"mid_ratio_max": 0.2, "low_ratio_min": 0.3, "high_ratio_min": 0.3},
"tight": {"attack_max": 0.003, "duration_max": 0.3, "centroid_min": 1000},
"lush": {"spectral_flatness_min": 0.1, "mid_ratio_min": 0.3, "duration_min": 0.5},
"aggressive": {"peak_rms_ratio_min": 4.0, "centroid_min": 2000},
"soft": {"peak_rms_ratio_max": 3.0, "attack_min": 0.01},
}
# ---------------------------------------------------------------------------
# Layer 1: Signal Analysis
# ---------------------------------------------------------------------------
def analyze_signal(y: np.ndarray, sr: int) -> dict:
"""Layer 1: Time-domain and spectral signal features."""
duration = len(y) / sr
rms = librosa.feature.rms(y=y, hop_length=HOP_LENGTH)[0]
rms_mean = float(np.mean(rms))
rms_std = float(np.std(rms))
peak = float(np.max(np.abs(y)))
crest_factor = peak / (rms_mean + 1e-10)
peak_rms_ratio = float(np.max(rms) / (np.mean(rms) + 1e-10))
zcr = librosa.feature.zero_crossing_rate(y, hop_length=HOP_LENGTH)[0]
zcr_mean = float(np.mean(zcr))
S = np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH))
S_power = S ** 2
spectral_centroid = librosa.feature.spectral_centroid(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
spectral_bandwidth = librosa.feature.spectral_bandwidth(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
spectral_rolloff = librosa.feature.spectral_rolloff(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
spectral_flatness = librosa.feature.spectral_flatness(S=S_power)[0]
freqs = librosa.fft_frequencies(sr=sr, n_fft=N_FFT)
low_mask = freqs < 300
mid_mask = (freqs >= 300) & (freqs < 3000)
high_mask = freqs >= 3000
band_energy = np.mean(S_power, axis=1)
total_energy = np.sum(band_energy) + 1e-10
low_ratio = float(np.sum(band_energy[low_mask]) / total_energy)
mid_ratio = float(np.sum(band_energy[mid_mask]) / total_energy)
high_ratio = float(np.sum(band_energy[high_mask]) / total_energy)
rms_peak_idx = int(np.argmax(rms))
attack_time = float(rms_peak_idx * HOP_LENGTH / sr)
return {
"duration": round(duration, 4),
"rms_mean": round(rms_mean, 6),
"rms_std": round(rms_std, 6),
"peak_amplitude": round(peak, 6),
"crest_factor": round(crest_factor, 2),
"peak_rms_ratio": round(peak_rms_ratio, 2),
"zcr_mean": round(zcr_mean, 4),
"spectral_centroid_mean": round(float(np.mean(spectral_centroid)), 2),
"spectral_centroid_std": round(float(np.std(spectral_centroid)), 2),
"spectral_centroid_max": round(float(np.max(spectral_centroid)), 2),
"spectral_bandwidth_mean": round(float(np.mean(spectral_bandwidth)), 2),
"spectral_rolloff_mean": round(float(np.mean(spectral_rolloff)), 2),
"spectral_flatness_mean": round(float(np.mean(spectral_flatness)), 6),
"low_energy_ratio": round(low_ratio, 4),
"mid_energy_ratio": round(mid_ratio, 4),
"high_energy_ratio": round(high_ratio, 4),
"attack_time": round(attack_time, 4),
}
# ---------------------------------------------------------------------------
# Layer 2: Perceptual Analysis
# ---------------------------------------------------------------------------
def analyze_perceptual(y: np.ndarray, sr: int) -> dict:
"""Layer 2: MFCC, chromagram, onset, tempo."""
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC, hop_length=HOP_LENGTH)
mfcc_means = [round(float(np.mean(mfcc[i])), 4) for i in range(N_MFCC)]
mfcc_stds = [round(float(np.std(mfcc[i])), 4) for i in range(N_MFCC)]
chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=HOP_LENGTH)
chroma_mean = np.mean(chroma, axis=1)
onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=HOP_LENGTH)
onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, hop_length=HOP_LENGTH)
onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=HOP_LENGTH)
onset_count = len(onset_times)
tempo = 0.0
if len(onset_env) > 0:
tempo_vals = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=HOP_LENGTH)
if len(tempo_vals) > 0:
tempo = float(tempo_vals[0])
lufs = _compute_lufs(y, sr)
return {
"mfcc_means": mfcc_means,
"mfcc_stds": mfcc_stds,
"chroma_mean": [round(float(v), 4) for v in chroma_mean],
"onset_count": onset_count,
"onset_density": round(onset_count / max(len(y) / sr, 0.01), 2),
"tempo": round(tempo, 2),
"lufs": round(lufs, 2),
}
def _compute_lufs(y: np.ndarray, sr: int) -> float:
"""Simplified LUFS (integrated loudness) approximation."""
try:
from scipy.signal import butter, sosfilt
sos_hp = butter(2, 60, btype='high', fs=sr, output='sos')
y_filtered = sosfilt(sos_hp, y)
sos_hs = butter(1, 1500, btype='high', fs=sr, output='sos')
y_filtered = sosfilt(sos_hs, y_filtered)
block_size = int(0.4 * sr)
hop = int(0.1 * sr)
if len(y_filtered) < block_size:
block_size = len(y_filtered)
hop = max(1, block_size // 4)
blocks = []
for i in range(0, len(y_filtered) - block_size + 1, hop):
block = y_filtered[i:i + block_size]
rms = np.sqrt(np.mean(block ** 2))
if rms > 1e-10:
blocks.append(rms)
if not blocks:
return -70.0
mean_rms = np.mean(blocks)
lufs = -0.691 + 10 * np.log10(mean_rms ** 2 + 1e-20)
return max(lufs, -70.0)
except Exception:
return -70.0
# ---------------------------------------------------------------------------
# F0 Detection via aubio (C-native, ~1ms per file)
# ---------------------------------------------------------------------------
def _fast_f0(y: np.ndarray, sr: int) -> float:
"""Estimate fundamental frequency using aubio's YIN implementation.
This is C-native code running at ~1ms per file, vs librosa.pyin at ~2s."""
try:
# aubio pitch detector
win_s = N_FFT
hop_s = HOP_LENGTH
pitch_o = aubio.pitch("yin", win_s, hop_s, sr)
pitch_o.set_unit("Hz")
pitch_o.set_tolerance(0.8) # confidence threshold
# Process in chunks
pitches = []
for i in range(0, len(y) - win_s + 1, hop_s):
chunk = y[i:i + hop_s].astype(np.float32)
if len(chunk) < hop_s:
break
freq = pitch_o(chunk)
if freq[0] > 0:
pitches.append(float(freq[0]))
if pitches:
return float(np.median(pitches))
return 0.0
except Exception:
return 0.0
# ---------------------------------------------------------------------------
# Layer 3: Musical Analysis
# ---------------------------------------------------------------------------
def analyze_musical(signal_features: dict, perceptual_features: dict, y: np.ndarray, sr: int) -> dict:
"""Layer 3: Key estimation, tonal/atonal, F0 via aubio, one-shot vs loop."""
chroma_mean = np.array(perceptual_features["chroma_mean"])
key_name, key_correlation, mode = _estimate_key(chroma_mean)
chroma_max = float(np.max(chroma_mean))
chroma_std = float(np.std(chroma_mean))
is_tonal = chroma_std > 0.05 and chroma_max > 0.15
duration = signal_features["duration"]
onset_count = perceptual_features["onset_count"]
is_oneshot = duration < 2.0 and onset_count <= 2
is_loop = duration > 1.5 and onset_count >= 4
f0 = 0.0
f0_note = "X"
if is_tonal:
f0 = _fast_f0(y, sr)
if f0 > 0:
midi_note = int(round(12 * np.log2(f0 / 440.0) + 69))
f0_note = _midi_to_note_name(midi_note)
return {
"key": key_name,
"key_correlation": round(key_correlation, 4),
"mode": mode,
"is_tonal": is_tonal,
"is_oneshot": is_oneshot,
"is_loop": is_loop,
"fundamental_freq": round(f0, 2),
"fundamental_note": f0_note,
}
def _estimate_key(chroma_profile: np.ndarray) -> tuple:
"""Krumhansl-Schmuckler key-finding algorithm."""
if np.max(chroma_profile) < 0.01:
return "X", 0.0, "atonal"
chroma_norm = chroma_profile / (np.sum(chroma_profile) + 1e-10)
best_key = "C"
best_corr = -1.0
best_mode = "atonal"
for i in range(12):
rotated = np.roll(chroma_norm, -i)
major_corr = float(np.corrcoef(rotated, MAJOR_PROFILE)[0, 1])
if np.isnan(major_corr):
major_corr = 0.0
minor_corr = float(np.corrcoef(rotated, MINOR_PROFILE)[0, 1])
if np.isnan(minor_corr):
minor_corr = 0.0
if major_corr > best_corr:
best_corr = major_corr
best_key = NOTE_NAMES[i]
best_mode = "major"
if minor_corr > best_corr:
best_corr = minor_corr
best_key = NOTE_NAMES[i]
best_mode = "minor"
if best_corr < 0.3:
return "X", best_corr, "atonal"
if best_mode == "minor":
return f"{best_key}m", best_corr, "minor"
return best_key, best_corr, "major"
def _midi_to_note_name(midi: int) -> str:
if midi < 0 or midi > 127:
return "X"
note = NOTE_NAMES[midi % 12]
octave = midi // 12 - 1
return f"{note}{octave}"
# ---------------------------------------------------------------------------
# Layer 4: Timbre Fingerprint
# ---------------------------------------------------------------------------
def analyze_timbre(y: np.ndarray, sr: int) -> dict:
"""Layer 4: Mel spectrogram statistics for timbre fingerprinting."""
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, hop_length=HOP_LENGTH)
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
n_bands = 8
band_size = 128 // n_bands
band_stats = []
for b in range(n_bands):
start = b * band_size
end = start + band_size
band = mel_spec_db[start:end, :]
band_stats.append({
"mean": round(float(np.mean(band)), 2),
"std": round(float(np.std(band)), 2),
"max": round(float(np.max(band)), 2),
})
contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=HOP_LENGTH)
contrast_mean = [round(float(np.mean(contrast[i])), 4) for i in range(min(7, contrast.shape[0]))]
try:
tonnetz = librosa.feature.tonnetz(y=y, sr=sr, hop_length=HOP_LENGTH)
tonnetz_mean = [round(float(np.mean(tonnetz[i])), 4) for i in range(min(6, tonnetz.shape[0]))]
except Exception:
tonnetz_mean = [0.0] * 6
return {
"mel_band_stats": band_stats,
"spectral_contrast": contrast_mean,
"tonnetz": tonnetz_mean,
}
# ---------------------------------------------------------------------------
# Classification — 3-layer priority: filename → folder → spectral heuristic
# ---------------------------------------------------------------------------
def classify_role(signal: dict, perceptual: dict, musical: dict, folder_hint: str = "") -> str:
"""Classify sample into a production role.
Priority order:
1. FILENAME keywords (most reliable — producers name their files correctly)
2. FOLDER structure (less reliable — "SentimientoLatino" has everything mixed)
3. SPECTRAL heuristics (fallback for unnamed/unknown samples)
"""
filename = folder_hint.lower() # contains parent + current folder names
# ====================================================================
# LAYER 1: Filename keyword extraction (HIGHEST PRIORITY)
# Matches specific patterns like "Lead", "Pad", "Bass" in filenames
# ====================================================================
# Ordered by specificity — more specific keywords first
filename_map = [
# (keyword_pattern, role, require_not) — avoid false positives
(["reese"], "bass", []),
(["808"], "bass", []),
(["kick"], "kick", ["kickdown", "kick drum"]),
(["snare"], "snare", []),
(["hi-hat", "hihat", "hats", "hat "], "hihat", []),
(["shaker"], "perc", []),
(["tambourine", "tambor"], "perc", []),
(["conga", "bongo", "rim"], "perc", []),
(["timbal"], "perc", []),
(["vocal chop", "v.chop", "vox chop"], "vocal", []),
(["vocal", "vox", "vocals"], "vocal", []),
(["pluck"], "pluck", []),
(["bell"], "pluck", []),
(["stab"], "oneshot", []),
(["lead"], "lead", []),
(["arp", "arpeggio"], "arp", []),
(["pad reverse"], "pad", []),
(["pad", "pads"], "pad", []),
(["chord", "chords"], "pad", []),
(["rhodes", "piano", "keys", "key "], "keys", []),
(["guitar"], "guitar", []),
(["string"], "pad", []),
(["brass"], "brass", []),
(["synth"], "synth", []),
(["texture"], "pad", []),
(["riser", "sweep", "impact", "explosion"], "fx", []),
(["loop"], "drumloop", ["vocal loop", "melody loop"]),
(["fill"], "fill", []),
(["drum"], "drumloop", []),
]
for keywords, role, excludes in filename_map:
for kw in keywords:
if kw in filename:
# Check exclusions
excluded = False
for ex in excludes:
if ex in filename:
excluded = True
break
if not excluded:
return role
# ====================================================================
# LAYER 2: Midilatino / SS_RNBL structured filename parsing
# These packs have naming conventions we can extract roles from
# ====================================================================
# Midilatino pattern: "Midilatino_Song_Key_BPM_StemType.wav"
# e.g. "Midilatino_Holanda_F_Min_108BPM_Lead.wav"
# e.g. "Midilatino_Cookie_E_Min_89BPM_Pluck.wav"
parts = filename.replace(".wav", "").replace(".flac", "").replace(".mp3", "").split("_")
if len(parts) >= 2:
# Check last part for stem type
last_parts = " ".join(parts[-2:]).lower()
stem_map = {
"drums": "drumloop", "drum": "drumloop",
"bass": "bass", "reese": "bass",
"lead": "lead", "pluck": "pluck", "pluck fx": "fx",
"pad": "pad", "pad reverse": "pad",
"arp": "arp", "vocal": "vocal", "vocals": "vocal",
"vocal chop": "vocal", "vox": "vocal",
"guitar": "guitar", "rhodes": "keys", "rhode": "keys",
"piano": "keys", "keys": "keys",
"synth": "synth", "texture": "pad", "texture 2": "pad",
"bell chords": "pad", "accent": "oneshot", "accent keys": "keys",
"harp": "pluck", "shaker": "perc",
}
for stem_kw, stem_role in stem_map.items():
if stem_kw in last_parts:
return stem_role
# SS_RNBL pattern: "SS_RNBL_Song_Stem_Type.wav"
# e.g. "SS_RNBL_Amor_One_Shot_Bass_C_.wav"
if "ss_rnbl" in filename or "ss rnbl" in filename:
ss_map = {
"kick": "kick", "snare": "snare", "hats": "hihat", "hat": "hihat",
"perc": "perc", "bass": "bass", "lead": "lead", "pad": "pad",
"fx": "fx", "top": "drumloop", "drum": "drumloop",
"v.chop": "vocal", "phrases": "vocal",
"one shot": "oneshot", "music": "drumloop",
"double": "drumloop", "add": "drumloop",
"gustas": "drumloop", # "Gustas" are full loop sections
}
for kw, role in ss_map.items():
if kw in filename:
return role
# ====================================================================
# LAYER 3: Folder-based hints (MEDIUM PRIORITY)
# Only for folders that are explicitly categorized
# ====================================================================
folder_map = {
"kick": "kick", "kicks": "kick", "8. kicks": "kick",
"snare": "snare", "snares": "snare", "9. snare": "snare",
"hi-hat": "hihat", "hihat": "hihat", "hi-hats": "hihat",
"bass": "bass",
"perc": "perc", "percs": "perc", "10. percs": "perc",
"fx": "fx", "5. fx": "fx",
"drum loops": "drumloop", "4. drum loops": "drumloop", "drumloops": "drumloop",
"vocal": "vocal", "vocals": "vocal", "11. vocals": "vocal",
"fill": "fill", "fills": "fill", "7. fill": "fill",
"3. one shots": "oneshot",
}
for key, role in folder_map.items():
if key in folder_hint.lower():
return role
# ====================================================================
# LAYER 4: Spectral heuristics (LOWEST PRIORITY — fallback only)
# Only used when filename and folder give no signal
# ====================================================================
centroid = signal["spectral_centroid_mean"]
low_r = signal["low_energy_ratio"]
high_r = signal["high_energy_ratio"]
dur = signal["duration"]
onsets = perceptual["onset_count"]
is_tonal = musical["is_tonal"]
attack = signal["attack_time"]
rms_std = signal["rms_std"]
# Percussive one-shots
if centroid < 600 and low_r > 0.5 and dur < 1.0 and attack < 0.01 and onsets <= 3:
return "kick"
if centroid > 5000 and high_r > 0.4 and dur < 0.3:
return "hihat"
if 1000 < centroid < 5000 and attack < 0.005 and onsets <= 2:
return "snare"
if dur < 0.5 and onsets <= 2 and 500 < centroid < 5000:
return "perc"
# Tonal classification (for long tonal samples that didn't match filename)
if is_tonal:
# Sub-bass / bass
if centroid < 200 and low_r > 0.7:
return "bass"
# Pad: sustained, low variance, long
if rms_std < 0.05 and dur > 1.0 and centroid < 4000:
return "pad"
# Pluck: short, tonal
if dur < 0.8 and onsets <= 3:
return "pluck"
# Lead: prominent, mid-high frequency
if 500 < centroid < 6000:
return "lead"
# Keys: mid frequency, moderate dynamics
if 200 < centroid < 2000 and rms_std < 0.1:
return "keys"
# Generic tonal loop
if dur > 2.0 and onsets > 4:
return "drumloop"
return "synth"
# Atonal loops
if dur > 2.0 and onsets >= 4:
return "drumloop"
# Short atonal
if dur < 2.0 and onsets <= 1:
return "oneshot"
return "fx"
def classify_character(signal: dict, perceptual: dict, musical: dict) -> str:
"""Classify the sonic character."""
centroid = signal["spectral_centroid_mean"]
low_r = signal["low_energy_ratio"]
high_r = signal["high_energy_ratio"]
mid_r = signal["mid_energy_ratio"]
dur = signal["duration"]
attack = signal["attack_time"]
peak_rms = signal["peak_rms_ratio"]
flatness = signal["spectral_flatness_mean"]
rms_std = signal["rms_std"]
scores = {}
if low_r >= 0.6 and centroid <= 400:
scores["boomy"] = low_r * 2
if low_r >= 0.5 and centroid <= 500:
scores["deep"] = low_r * 1.5
if high_r >= 0.4 and centroid >= 3000 and attack <= 0.005:
scores["sharp"] = high_r * 2
if high_r >= 0.3 and centroid >= 4000 and dur <= 0.2:
scores["crisp"] = high_r * 1.5
if 300 <= centroid <= 2000 and mid_r >= 0.4:
scores["warm"] = mid_r * 1.5
if centroid >= 3000 and high_r >= 0.3:
scores["bright"] = high_r * 1.5
if centroid <= 800 and low_r >= 0.4:
scores["dark"] = low_r * 1.5
if 1500 <= centroid <= 5000 and rms_std <= 0.03:
scores["ethereal"] = 1.0
if dur <= 0.15:
scores["short"] = 1.0
if attack <= 0.005 and peak_rms >= 5.0:
scores["impact"] = peak_rms / 5.0
if dur >= 1.0 and signal["spectral_bandwidth_mean"] >= 4000:
scores["full"] = 1.0
if mid_r <= 0.2 and low_r >= 0.3 and high_r >= 0.3:
scores["hollow"] = 1.0
if attack <= 0.003 and dur <= 0.3 and centroid >= 1000:
scores["tight"] = 1.0
if flatness >= 0.1 and mid_r >= 0.3 and dur >= 0.5:
scores["lush"] = flatness * 5
if peak_rms >= 4.0 and centroid >= 2000:
scores["aggressive"] = peak_rms / 4.0
if peak_rms <= 3.0 and attack >= 0.01:
scores["soft"] = 1.0
return max(scores, key=scores.get) if scores else "neutral"
# ---------------------------------------------------------------------------
# Full Analysis Pipeline (single file)
# ---------------------------------------------------------------------------
def analyze_file(filepath: str) -> Optional[dict]:
"""Full 4-layer analysis of a single audio file.
This function is picklable and runs in separate processes."""
try:
y, sr = librosa.load(filepath, sr=SAMPLE_RATE, mono=True, duration=30.0)
if len(y) < 512:
return None
peak = np.max(np.abs(y))
if peak > 1e-6:
y = y / peak
path = Path(filepath)
# Pass BOTH the full filename and folder structure to classifier
classify_hint = f"{path.parent.parent.name} {path.parent.name} {path.stem}"
signal = analyze_signal(y, sr)
perceptual = analyze_perceptual(y, sr)
musical = analyze_musical(signal, perceptual, y, sr)
timbre = analyze_timbre(y, sr)
role = classify_role(signal, perceptual, musical, classify_hint)
character = classify_character(signal, perceptual, musical)
new_name = _generate_name(role, musical, perceptual, character, filepath)
file_hash = _hash_file(filepath)
return {
"original_path": filepath,
"original_name": path.name,
"new_name": new_name,
"file_hash": file_hash,
"file_size": os.path.getsize(filepath),
"role": role,
"character": character,
"musical": musical,
"signal": signal,
"perceptual": {
"mfcc_means": perceptual["mfcc_means"],
"mfcc_stds": perceptual["mfcc_stds"],
"onset_count": perceptual["onset_count"],
"onset_density": perceptual["onset_density"],
"tempo": perceptual["tempo"],
"lufs": perceptual["lufs"],
},
"timbre": timbre,
}
except Exception as e:
return {"original_path": filepath, "error": str(e)}
def _generate_name(role: str, musical: dict, perceptual: dict, character: str, filepath: str) -> str:
key = musical["fundamental_note"] if musical["is_tonal"] else "X"
if key == "X" and musical["key"] != "X":
key = musical["key"]
bpm = int(perceptual["tempo"]) if perceptual["tempo"] > 0 else 0
short_id = hashlib.md5(filepath.encode()).hexdigest()[:6]
ext = Path(filepath).suffix
return f"{role}_{key}_{bpm:03d}_{character}_{short_id}{ext}"
def _hash_file(filepath: str) -> str:
h = hashlib.md5()
size = os.path.getsize(filepath)
with open(filepath, "rb") as f:
h.update(f.read(65536))
if size > 131072:
f.seek(size - 65536)
h.update(f.read(65536))
return h.hexdigest()
# ---------------------------------------------------------------------------
# File Collection
# ---------------------------------------------------------------------------
def collect_audio_files(*directories: str) -> list[str]:
files = []
for d in directories:
base = Path(d)
if not base.exists():
continue
for f in base.rglob("*"):
if f.is_file() and f.suffix.lower() in AUDIO_EXT:
files.append(str(f))
return sorted(files)
# ---------------------------------------------------------------------------
# Batch Analysis (TRUE multiprocessing)
# ---------------------------------------------------------------------------
def batch_analyze(files: list[str], workers: int = MAX_WORKERS, checkpoint_path: Optional[str] = None) -> list[dict]:
"""Analyze all files using ProcessPoolExecutor for real multi-core parallelism.
Each process runs independently — no GIL contention, no shared memory."""
results = []
errors = []
done = 0
total = len(files)
# Resume from checkpoint
completed_paths = set()
if checkpoint_path and os.path.exists(checkpoint_path):
with open(checkpoint_path, "r", encoding="utf-8") as f:
for line in f:
try:
entry = json.loads(line.strip())
completed_paths.add(entry["original_path"])
results.append(entry)
except (json.JSONDecodeError, KeyError):
pass
done = len(results)
print(f"Resumed from checkpoint: {done}/{total}")
remaining = [f for f in files if f not in completed_paths]
if not remaining:
print("All files already analyzed.")
return results
print(f"Analyzing {len(remaining)} files with {workers} PROCESSES (true parallel)...")
with ProcessPoolExecutor(max_workers=workers) as executor:
futures = {executor.submit(analyze_file, f): f for f in remaining}
for future in as_completed(futures):
filepath = futures[future]
done += 1
try:
result = future.result()
if result is None:
errors.append(filepath)
continue
if "error" in result:
errors.append(f"{filepath}: {result['error']}")
continue
results.append(result)
if checkpoint_path and done % 50 == 0:
_save_checkpoint(results, checkpoint_path)
if done % 25 == 0 or done == total:
print(f" [{done}/{total}] {result.get('new_name', '?')}")
except Exception as e:
errors.append(f"{filepath}: {e}")
if errors:
print(f"\nErrors ({len(errors)}):")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... and {len(errors) - 10} more")
return results
def _save_checkpoint(results: list[dict], path: str):
with open(path, "w", encoding="utf-8") as f:
for r in results:
f.write(json.dumps(r, ensure_ascii=False) + "\n")
def save_index(results: list[dict], output_path: str):
roles = {}
keys = {}
characters = {}
for r in results:
if "error" in r:
continue
role = r.get("role", "unknown")
roles[role] = roles.get(role, 0) + 1
key = r.get("musical", {}).get("key", "X")
keys[key] = keys.get(key, 0) + 1
char = r.get("character", "unknown")
characters[char] = characters.get(char, 0) + 1
index = {
"metadata": {
"total_samples": len(results),
"errors": sum(1 for r in results if "error" in r),
"roles": roles,
"keys": keys,
"characters": characters,
},
"samples": results,
}
with open(output_path, "w", encoding="utf-8") as f:
json.dump(index, f, ensure_ascii=False, indent=2)
print(f"Index saved to {output_path}")
print(f" Total: {len(results)} | Roles: {roles} | Keys: {len(keys)} | Characters: {len(characters)}")
# ---------------------------------------------------------------------------
# Rename Engine
# ---------------------------------------------------------------------------
def plan_renames(results: list[dict], output_dir: str) -> list[dict]:
out = Path(output_dir)
renames = []
seen_names = set()
for r in results:
if "error" in r or "new_name" not in r:
continue
old_path = Path(r["original_path"])
role = r["role"]
new_name = r["new_name"]
if new_name in seen_names:
stem = Path(new_name).stem
ext = Path(new_name).suffix
counter = 2
candidate = f"{stem}_{counter}{ext}"
while candidate in seen_names:
counter += 1
candidate = f"{stem}_{counter}{ext}"
new_name = candidate
seen_names.add(new_name)
new_path = out / role / new_name
renames.append({
"old_path": str(old_path),
"new_path": str(new_path),
"new_name": new_name,
"role": role,
"original_name": r["original_name"],
})
return renames
def execute_renames(renames: list[dict], dry_run: bool = True) -> dict:
stats = {"planned": len(renames), "executed": 0, "skipped": 0, "errors": []}
for r in renames:
old = Path(r["old_path"])
new = Path(r["new_path"])
if not old.exists():
stats["skipped"] += 1
continue
if dry_run:
stats["skipped"] += 1
continue
new.parent.mkdir(parents=True, exist_ok=True)
try:
import shutil
shutil.copy2(str(old), str(new))
stats["executed"] += 1
except Exception as e:
stats["errors"].append(f"{old.name} -> {new.name}: {e}")
return stats

View File

@@ -0,0 +1,72 @@
"""Forensic analysis of misclassified samples."""
import json, os
PROJECT = r"C:\Users\Administrator\Documents\fl_control"
with open(os.path.join(PROJECT, "data", "sample_index.json"), "r", encoding="utf-8") as f:
d = json.load(f)
samples = d["samples"]
# --- DRUMLOOPS ---
drumloops = [s for s in samples if s.get("role") == "drumloop"]
print(f"DRUMLOOPS ({len(drumloops)} total)")
print(f"{'Orig filename':<55s} {'Dur':>5s} {'Onset':>5s} {'Centr':>7s} {'Low':>5s} {'Mid':>5s} {'High':>5s} {'Tonal':>5s} {'Key':>5s} {'Char':>10s}")
print("-" * 120)
for s in drumloops[:50]:
orig = s.get("original_name", "?")[:54]
dur = s["signal"]["duration"]
onc = s["perceptual"]["onset_count"]
cent = s["signal"]["spectral_centroid_mean"]
low = s["signal"]["low_energy_ratio"]
mid = s["signal"]["mid_energy_ratio"]
high = s["signal"]["high_energy_ratio"]
ton = s["musical"]["is_tonal"]
key = s["musical"]["key"]
char = s["character"]
print(f"{orig:<55s} {dur:5.1f} {onc:5d} {cent:7.0f} {low:5.2f} {mid:5.2f} {high:5.2f} {str(ton):>5s} {key:>5s} {char:>10s}")
# --- ONESHOTS ---
oneshots = [s for s in samples if s.get("role") == "oneshot"]
print(f"\nONESHOTS ({len(oneshots)} total)")
print(f"{'Orig filename':<55s} {'Dur':>5s} {'Onset':>5s} {'Centr':>7s} {'Low':>5s} {'Mid':>5s} {'High':>5s} {'Tonal':>5s} {'Key':>5s} {'Char':>10s}")
print("-" * 120)
for s in oneshots[:40]:
orig = s.get("original_name", "?")[:54]
dur = s["signal"]["duration"]
onc = s["perceptual"]["onset_count"]
cent = s["signal"]["spectral_centroid_mean"]
low = s["signal"]["low_energy_ratio"]
mid = s["signal"]["mid_energy_ratio"]
high = s["signal"]["high_energy_ratio"]
ton = s["musical"]["is_tonal"]
key = s["musical"]["key"]
char = s["character"]
print(f"{orig:<55s} {dur:5.1f} {onc:5d} {cent:7.0f} {low:5.2f} {mid:5.2f} {high:5.2f} {str(ton):>5s} {key:>5s} {char:>10s}")
# --- Summary: folder source of drumloops ---
print(f"\n\nDRUMLOOP ORIGINS:")
from collections import Counter
origins = Counter()
for s in drumloops:
path = s.get("original_path", "")
parts = path.replace("\\", "/").split("/")
# Find the category folder
for i, p in enumerate(parts):
if "reggaeton" in p.lower() and i+1 < len(parts):
origins[parts[i+1]] += 1
break
for k, v in origins.most_common():
print(f" {k:40s} {v:4d}")
# --- Summary: folder source of oneshots ---
print(f"\nONESHOT ORIGINS:")
origins2 = Counter()
for s in oneshots:
path = s.get("original_path", "")
parts = path.replace("\\", "/").split("/")
for i, p in enumerate(parts):
if "reggaeton" in p.lower() and i+1 < len(parts):
origins2[parts[i+1]] += 1
break
for k, v in origins2.most_common():
print(f" {k:40s} {v:4d}")

View File

@@ -0,0 +1,72 @@
"""Forensic analysis of misclassified samples."""
import json, os, re
from collections import Counter
PROJECT = r"C:\Users\Administrator\Documents\fl_control"
with open(os.path.join(PROJECT, "data", "sample_index.json"), "r", encoding="utf-8") as f:
d = json.load(f)
samples = d["samples"]
# --- Analyze filename patterns in misclassified ---
print("=" * 70)
print(" PATRONES DE NOMBRE EN 'DRUMLOOPS'")
print("=" * 70)
# Extract Midilatino stems
ml_stems = Counter()
for s in samples:
if s.get("role") != "drumloop":
continue
name = s.get("original_name", "")
# Midilatino pattern: Midilatino_Name_Key_Min_BPM_Stem.wav
if "Midilatino" in name or "midilatino" in name:
# Extract the stem type (last part before .wav)
parts = name.replace(".wav", "").replace(".flac", "").replace(".mp3", "")
# Try to find stem keywords
for kw in ["Drums", "Bass", "Lead", "Pad", "Pluck", "Arp", "Vocal",
"Vox", "Guitar", "Rhodes", "Piano", "Synth", "Reese",
"Texture", "Chords", "Reverse", "Fx", "Accent", "Harp",
"Keys", "Bell", "Loop", "Stem", "Snare", "Kick", "Hat",
"Perc", "Shaker", "Hi", "808"]:
if kw.lower() in parts.lower():
ml_stems[kw] += 1
break
else:
# No stem keyword found - it's the full mix
ml_stems["FULL_MIX"] += 1
print("\nMidilatino stem types in 'drumloop':")
for k, v in ml_stems.most_common():
print(f" {k:15s} {v:4d}")
# --- SS_RNBL patterns ---
print("\n\nSentimientoLatino SS_RNBL patterns:")
ss_stems = Counter()
for s in samples:
name = s.get("original_name", "")
if "SS_RNBL" in name:
# Extract type: SS_RNBL_Song_Stem_Type.wav
parts = name.replace(".wav", "").split("_")
if len(parts) >= 4:
stem_type = parts[3] if parts[3] not in ("One", "Shot") else "_".join(parts[3:5])
ss_stems[stem_type] += 1
for k, v in ss_stems.most_common():
print(f" {k:20s} {v:4d}")
# --- All filename keywords ---
print("\n\nAll filename role keywords across library:")
role_keywords = Counter()
for s in samples:
name = s.get("original_name", "").lower()
for kw in ["kick", "snare", "hi-hat", "hihat", "hat", "bass", "808",
"lead", "pad", "pluck", "arp", "vocal", "vox", "fx",
"perc", "drum", "loop", "fill", "guitar", "piano", "rhodes",
"synth", "bell", "brass", "string", "reese", "texture",
"chord", "shaker", "tambourine", "conga", "rim"]:
if kw in name:
role_keywords[kw] += 1
for k, v in role_keywords.most_common(25):
print(f" {k:15s} {v:4d}")

143
src/analyzer/run_batch.py Normal file
View File

@@ -0,0 +1,143 @@
"""
Batch analyzer - STANDALONE for double-click execution.
Uses ProcessPoolExecutor (16 processes) for TRUE multi-core parallelism.
aubio replaces pyin for F0 detection (~1ms vs ~2s per file).
IMPORTANT: The if __name__ == '__main__' guard is REQUIRED on Windows
for ProcessPoolExecutor. Without it, child processes re-import this file
and create infinite process spawning.
"""
from __future__ import annotations
import sys
import os
import time
import json
import warnings
import traceback
import multiprocessing
# CRITICAL: Windows multiprocessing guard - MUST be at top level
multiprocessing.freeze_support()
warnings.filterwarnings("ignore")
PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
os.chdir(PROJECT)
if PROJECT not in sys.path:
sys.path.insert(0, PROJECT)
from src.analyzer import (
collect_audio_files,
batch_analyze,
save_index,
plan_renames,
)
def main():
print("=" * 60)
print(" ANALIZADOR FORENSE DE SAMPLES v2.0")
print(" ProcessPoolExecutor + aubio F0 (C-native)")
print(" 4 capas: Signal + Perceptual + Musical + Timbre")
print(" 16 procesos independientes = 16 cores en paralelo")
print("=" * 60)
lib1 = os.path.join(PROJECT, "libreria", "reggaeton")
lib2 = os.path.join(PROJECT, "librerias", "reggaeton")
print("\n[1/4] Colectando archivos de audio...")
files = collect_audio_files(lib1, lib2)
print(f" Encontrados: {len(files)} archivos")
if not files:
print("ERROR: No se encontraron archivos de audio.")
return
data_dir = os.path.join(PROJECT, "data")
os.makedirs(data_dir, exist_ok=True)
checkpoint = os.path.join(data_dir, "analysis_checkpoint.jsonl")
# Delete old checkpoint from failed thread-based run
if os.path.exists(checkpoint):
old_size = os.path.getsize(checkpoint)
if old_size < 1000: # Probably broken from the thread run
os.remove(checkpoint)
print(" (Removed broken checkpoint)")
print(f"\n[2/4] Analizando con 16 PROCESOS (70% CPU)...")
print(f" Cada proceso en su propio core, sin GIL")
print(f" Checkpoint: {checkpoint}")
print(f" (Si se corta, re-ejecuta y continua desde donde quedo)")
print()
start = time.time()
results = batch_analyze(files, workers=16, checkpoint_path=checkpoint)
elapsed = time.time() - start
valid = [r for r in results if "error" not in r]
errors = [r for r in results if "error" in r]
print(f"\n Tiempo: {elapsed:.1f}s ({elapsed / max(len(files), 1):.2f}s/archivo)")
print(f" Exitosos: {len(valid)} | Errores: {len(errors)}")
if errors:
err_path = os.path.join(data_dir, "analysis_errors.json")
with open(err_path, "w", encoding="utf-8") as f:
json.dump(errors, f, ensure_ascii=False, indent=2)
print(f" Errores guardados en: {err_path}")
print(f"\n[3/4] Guardando indice...")
index_path = os.path.join(data_dir, "sample_index.json")
save_index(results, index_path)
print(f"\n[4/4] Plan de renombrado...")
output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples")
renames = plan_renames(results, output_dir)
rename_path = os.path.join(data_dir, "rename_plan.json")
with open(rename_path, "w", encoding="utf-8") as f:
json.dump(renames, f, ensure_ascii=False, indent=2)
print(f" {len(renames)} archivos para renombrar")
print(f" Plan guardado en: {rename_path}")
# Summary
print("\n" + "=" * 60)
print(" RESUMEN")
print("=" * 60)
roles = {}
chars = {}
keys = {}
for r in valid:
role = r.get("role", "?")
roles[role] = roles.get(role, 0) + 1
char = r.get("character", "?")
chars[char] = chars.get(char, 0) + 1
key = r.get("musical", {}).get("key", "X")
keys[key] = keys.get(key, 0) + 1
print(f"\n Roles:")
for role, count in sorted(roles.items(), key=lambda x: -x[1]):
bar = "#" * min(count, 60)
print(f" {role:12s} {count:4d} {bar}")
print(f"\n Caracteres:")
for char, count in sorted(chars.items(), key=lambda x: -x[1]):
bar = "#" * min(count, 50)
print(f" {char:12s} {count:4d} {bar}")
print(f"\n Tonalidades (top 10):")
for key, count in sorted(keys.items(), key=lambda x: -x[1])[:10]:
print(f" {key:5s} {count:4d}")
print(f"\n Proximo paso: ejecuta 2_RENOMBRAR.bat")
print("=" * 60)
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f"\nFATAL ERROR: {e}")
traceback.print_exc()
input("Presiona Enter para cerrar...")

View File

@@ -0,0 +1,88 @@
"""
Rename executor - Copies files to analyzed_samples/ with standardized names.
Reads from data/rename_plan.json generated by the batch analyzer.
"""
from __future__ import annotations
import sys
import os
import json
import shutil
import warnings
warnings.filterwarnings("ignore")
PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
os.chdir(PROJECT)
if PROJECT not in sys.path:
sys.path.insert(0, PROJECT)
from src.analyzer import plan_renames, execute_renames
def main():
rename_path = os.path.join(PROJECT, "data", "rename_plan.json")
index_path = os.path.join(PROJECT, "data", "sample_index.json")
output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples")
# Load rename plan if exists, otherwise generate from index
if os.path.exists(rename_path):
print("Cargando plan de renombrado existente...")
with open(rename_path, "r", encoding="utf-8") as f:
renames = json.load(f)
elif os.path.exists(index_path):
print("Generando plan desde indice...")
with open(index_path, "r", encoding="utf-8") as f:
index = json.load(f)
renames = plan_renames(index["samples"], output_dir)
with open(rename_path, "w", encoding="utf-8") as f:
json.dump(renames, f, ensure_ascii=False, indent=2)
else:
print("ERROR: No existe data/sample_index.json ni data/rename_plan.json")
print(" Ejecuta primero 1_ANALIZAR.bat")
return
print(f"\n{len(renames)} archivos para renombrar")
print(f"Destino: {output_dir}")
print()
# Show sample renames
print("Ejemplos:")
for r in renames[:15]:
print(f" {r['original_name']:50s} -> {r['role']:10s}\\{r['new_name']}")
if len(renames) > 15:
print(f" ... y {len(renames) - 15} mas")
print()
# Confirm
answer = input("Ejecutar renombrado? (s/n): ").strip().lower()
if answer != "s":
print("Cancelado.")
return
# Execute
print("\nCopiando archivos...")
stats = execute_renames(renames, dry_run=False)
print(f"\nResultado: {stats['executed']} copiados, {stats['skipped']} omitidos, {len(stats.get('errors', []))} errores")
if stats.get("errors"):
print("Errores:")
for e in stats["errors"][:10]:
print(f" {e}")
# Save rename log
log_path = os.path.join(PROJECT, "data", "rename_log.json")
with open(log_path, "w", encoding="utf-8") as f:
json.dump({"stats": stats, "renames": renames}, f, ensure_ascii=False, indent=2)
print(f"\nLog guardado en: {log_path}")
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f"\nFATAL ERROR: {e}")
import traceback
traceback.print_exc()
input("Presiona Enter para cerrar...")

117
src/analyzer/show_stats.py Normal file
View File

@@ -0,0 +1,117 @@
"""
Show statistics from the analysis index.
"""
import sys
import os
import json
PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
os.chdir(PROJECT)
def main():
index_path = os.path.join(PROJECT, "data", "sample_index.json")
if not os.path.exists(index_path):
print("ERROR: No existe data/sample_index.json")
print(" Ejecuta primero 1_ANALIZAR.bat")
return
with open(index_path, "r", encoding="utf-8") as f:
index = json.load(f)
samples = index["samples"]
valid = [s for s in samples if "error" not in s]
print("=" * 60)
print(f" ESTADISTICAS DE LA BIBLIOTECA ({len(valid)} samples)")
print("=" * 60)
# Roles
roles = {}
for s in valid:
r = s.get("role", "?")
roles[r] = roles.get(r, 0) + 1
print("\n Roles:")
max_count = max(roles.values()) if roles else 1
for role, count in sorted(roles.items(), key=lambda x: -x[1]):
bar_len = int(40 * count / max_count)
print(f" {role:12s} {count:4d} {'' * bar_len}")
# Characters
chars = {}
for s in valid:
c = s.get("character", "?")
chars[c] = chars.get(c, 0) + 1
print("\n Caracteres sonoros:")
max_count = max(chars.values()) if chars else 1
for char, count in sorted(chars.items(), key=lambda x: -x[1]):
bar_len = int(40 * count / max_count)
print(f" {char:12s} {count:4d} {'' * bar_len}")
# Keys
keys = {}
for s in valid:
k = s.get("musical", {}).get("key", "X")
keys[k] = keys.get(k, 0) + 1
print("\n Tonalidades:")
for key, count in sorted(keys.items(), key=lambda x: -x[1])[:15]:
print(f" {key:5s} {count:4d}")
# Tempo distribution
tempos = [s.get("perceptual", {}).get("tempo", 0) for s in valid]
tempos_nonzero = [t for t in tempos if t > 0]
if tempos_nonzero:
print(f"\n Tempo:")
print(f" Rango: {min(tempos_nonzero):.0f} - {max(tempos_nonzero):.0f} BPM")
print(f" Promedio: {sum(tempos_nonzero) / len(tempos_nonzero):.0f} BPM")
# LUFS distribution
lufs = [s.get("perceptual", {}).get("lufs", 0) for s in valid]
lufs_valid = [l for l in lufs if l > -70]
if lufs_valid:
print(f"\n Loudness (LUFS):")
print(f" Rango: {min(lufs_valid):.1f} a {max(lufs_valid):.1f} LUFS")
print(f" Promedio: {sum(lufs_valid) / len(lufs_valid):.1f} LUFS")
# Tonal vs atonal
tonal = sum(1 for s in valid if s.get("musical", {}).get("is_tonal", False))
atonal = len(valid) - tonal
print(f"\n Tonalidad:")
print(f" Tonal: {tonal} ({100 * tonal / len(valid):.0f}%)")
print(f" Atonal: {atonal} ({100 * atonal / len(valid):.0f}%)")
# One-shot vs loop
oneshot = sum(1 for s in valid if s.get("musical", {}).get("is_oneshot", False))
loops = sum(1 for s in valid if s.get("musical", {}).get("is_loop", False))
print(f"\n Tipo:")
print(f" One-shots: {oneshot}")
print(f" Loops: {loops}")
print(f" Otros: {len(valid) - oneshot - loops}")
print("\n" + "=" * 60)
# Show samples per role for quick reference
print("\n EJEMPLOS POR ROL:")
by_role = {}
for s in valid:
role = s.get("role", "?")
if role not in by_role:
by_role[role] = []
by_role[role].append(s)
for role in sorted(by_role.keys()):
samples_list = by_role[role][:5]
print(f"\n [{role}] ({len(by_role[role])} total)")
for s in samples_list:
key = s.get("musical", {}).get("key", "X")
char = s.get("character", "?")
bpm = s.get("perceptual", {}).get("tempo", 0)
print(f" {s.get('new_name', '?'):50s} key={key:5s} bpm={bpm:5.0f} char={char}")
if __name__ == "__main__":
main()