feat: reggaeton production system with intelligent sample selection and FLP generation
This commit is contained in:
827
src/analyzer/__init__.py
Normal file
827
src/analyzer/__init__.py
Normal file
@@ -0,0 +1,827 @@
|
||||
"""Deep forensic audio sample analyzer.
|
||||
|
||||
4-layer analysis pipeline:
|
||||
Layer 1 - Signal: FFT, spectral centroid, bandwidth, rolloff, flatness, ZCR, RMS, crest factor
|
||||
Layer 2 - Perceptual: MFCC (20), chromagram (12), onset envelope, tempo, LUFS
|
||||
Layer 3 - Musical: Key estimation (Krumhansl-Schmuckler), F0 via aubio (C-native), tonal/atonal
|
||||
Layer 4 - Timbre: Mel band stats, spectral contrast, tonnetz
|
||||
|
||||
Architecture: ProcessPoolExecutor with 16 workers for TRUE multi-core parallelism.
|
||||
aubio for F0 (C-native, ~1ms per file vs pyin ~2s per file).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
|
||||
import numpy as np
|
||||
import librosa
|
||||
import soundfile as sf
|
||||
import aubio
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
SAMPLE_RATE = 44100
|
||||
HOP_LENGTH = 512
|
||||
N_FFT = 2048
|
||||
N_MFCC = 20
|
||||
N_CHROMA = 12
|
||||
MAX_WORKERS = 16 # 70% of 24 cores
|
||||
|
||||
AUDIO_EXT = {".wav", ".flac", ".mp3", ".aif", ".aiff"}
|
||||
|
||||
# Krumhansl-Schmuckler key profiles
|
||||
MAJOR_PROFILE = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
|
||||
MINOR_PROFILE = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
|
||||
NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
|
||||
|
||||
# Character classification thresholds
|
||||
CHARACTERS = {
|
||||
"boomy": {"low_ratio_min": 0.6, "centroid_max": 400},
|
||||
"deep": {"low_ratio_min": 0.5, "centroid_max": 500, "fundamental_max": 150},
|
||||
"sharp": {"high_ratio_min": 0.4, "centroid_min": 3000, "attack_max": 0.005},
|
||||
"crisp": {"high_ratio_min": 0.3, "centroid_min": 4000, "duration_max": 0.2},
|
||||
"warm": {"centroid_min": 300, "centroid_max": 2000, "mid_ratio_min": 0.4},
|
||||
"bright": {"centroid_min": 3000, "high_ratio_min": 0.3},
|
||||
"dark": {"centroid_max": 800, "low_ratio_min": 0.4},
|
||||
"ethereal": {"centroid_min": 1500, "centroid_max": 5000, "rms_std_max": 0.03},
|
||||
"short": {"duration_max": 0.15},
|
||||
"impact": {"attack_max": 0.005, "peak_rms_ratio_min": 5.0},
|
||||
"full": {"duration_min": 1.0, "bandwidth_min": 4000},
|
||||
"hollow": {"mid_ratio_max": 0.2, "low_ratio_min": 0.3, "high_ratio_min": 0.3},
|
||||
"tight": {"attack_max": 0.003, "duration_max": 0.3, "centroid_min": 1000},
|
||||
"lush": {"spectral_flatness_min": 0.1, "mid_ratio_min": 0.3, "duration_min": 0.5},
|
||||
"aggressive": {"peak_rms_ratio_min": 4.0, "centroid_min": 2000},
|
||||
"soft": {"peak_rms_ratio_max": 3.0, "attack_min": 0.01},
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Layer 1: Signal Analysis
|
||||
# ---------------------------------------------------------------------------
|
||||
def analyze_signal(y: np.ndarray, sr: int) -> dict:
|
||||
"""Layer 1: Time-domain and spectral signal features."""
|
||||
duration = len(y) / sr
|
||||
rms = librosa.feature.rms(y=y, hop_length=HOP_LENGTH)[0]
|
||||
rms_mean = float(np.mean(rms))
|
||||
rms_std = float(np.std(rms))
|
||||
peak = float(np.max(np.abs(y)))
|
||||
crest_factor = peak / (rms_mean + 1e-10)
|
||||
peak_rms_ratio = float(np.max(rms) / (np.mean(rms) + 1e-10))
|
||||
zcr = librosa.feature.zero_crossing_rate(y, hop_length=HOP_LENGTH)[0]
|
||||
zcr_mean = float(np.mean(zcr))
|
||||
|
||||
S = np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH))
|
||||
S_power = S ** 2
|
||||
spectral_centroid = librosa.feature.spectral_centroid(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
|
||||
spectral_bandwidth = librosa.feature.spectral_bandwidth(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
|
||||
spectral_rolloff = librosa.feature.spectral_rolloff(S=S_power, sr=sr, hop_length=HOP_LENGTH)[0]
|
||||
spectral_flatness = librosa.feature.spectral_flatness(S=S_power)[0]
|
||||
|
||||
freqs = librosa.fft_frequencies(sr=sr, n_fft=N_FFT)
|
||||
low_mask = freqs < 300
|
||||
mid_mask = (freqs >= 300) & (freqs < 3000)
|
||||
high_mask = freqs >= 3000
|
||||
band_energy = np.mean(S_power, axis=1)
|
||||
total_energy = np.sum(band_energy) + 1e-10
|
||||
low_ratio = float(np.sum(band_energy[low_mask]) / total_energy)
|
||||
mid_ratio = float(np.sum(band_energy[mid_mask]) / total_energy)
|
||||
high_ratio = float(np.sum(band_energy[high_mask]) / total_energy)
|
||||
|
||||
rms_peak_idx = int(np.argmax(rms))
|
||||
attack_time = float(rms_peak_idx * HOP_LENGTH / sr)
|
||||
|
||||
return {
|
||||
"duration": round(duration, 4),
|
||||
"rms_mean": round(rms_mean, 6),
|
||||
"rms_std": round(rms_std, 6),
|
||||
"peak_amplitude": round(peak, 6),
|
||||
"crest_factor": round(crest_factor, 2),
|
||||
"peak_rms_ratio": round(peak_rms_ratio, 2),
|
||||
"zcr_mean": round(zcr_mean, 4),
|
||||
"spectral_centroid_mean": round(float(np.mean(spectral_centroid)), 2),
|
||||
"spectral_centroid_std": round(float(np.std(spectral_centroid)), 2),
|
||||
"spectral_centroid_max": round(float(np.max(spectral_centroid)), 2),
|
||||
"spectral_bandwidth_mean": round(float(np.mean(spectral_bandwidth)), 2),
|
||||
"spectral_rolloff_mean": round(float(np.mean(spectral_rolloff)), 2),
|
||||
"spectral_flatness_mean": round(float(np.mean(spectral_flatness)), 6),
|
||||
"low_energy_ratio": round(low_ratio, 4),
|
||||
"mid_energy_ratio": round(mid_ratio, 4),
|
||||
"high_energy_ratio": round(high_ratio, 4),
|
||||
"attack_time": round(attack_time, 4),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Layer 2: Perceptual Analysis
|
||||
# ---------------------------------------------------------------------------
|
||||
def analyze_perceptual(y: np.ndarray, sr: int) -> dict:
|
||||
"""Layer 2: MFCC, chromagram, onset, tempo."""
|
||||
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC, hop_length=HOP_LENGTH)
|
||||
mfcc_means = [round(float(np.mean(mfcc[i])), 4) for i in range(N_MFCC)]
|
||||
mfcc_stds = [round(float(np.std(mfcc[i])), 4) for i in range(N_MFCC)]
|
||||
|
||||
chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=HOP_LENGTH)
|
||||
chroma_mean = np.mean(chroma, axis=1)
|
||||
|
||||
onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=HOP_LENGTH)
|
||||
onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, hop_length=HOP_LENGTH)
|
||||
onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=HOP_LENGTH)
|
||||
onset_count = len(onset_times)
|
||||
|
||||
tempo = 0.0
|
||||
if len(onset_env) > 0:
|
||||
tempo_vals = librosa.beat.tempo(onset_envelope=onset_env, sr=sr, hop_length=HOP_LENGTH)
|
||||
if len(tempo_vals) > 0:
|
||||
tempo = float(tempo_vals[0])
|
||||
|
||||
lufs = _compute_lufs(y, sr)
|
||||
|
||||
return {
|
||||
"mfcc_means": mfcc_means,
|
||||
"mfcc_stds": mfcc_stds,
|
||||
"chroma_mean": [round(float(v), 4) for v in chroma_mean],
|
||||
"onset_count": onset_count,
|
||||
"onset_density": round(onset_count / max(len(y) / sr, 0.01), 2),
|
||||
"tempo": round(tempo, 2),
|
||||
"lufs": round(lufs, 2),
|
||||
}
|
||||
|
||||
|
||||
def _compute_lufs(y: np.ndarray, sr: int) -> float:
|
||||
"""Simplified LUFS (integrated loudness) approximation."""
|
||||
try:
|
||||
from scipy.signal import butter, sosfilt
|
||||
sos_hp = butter(2, 60, btype='high', fs=sr, output='sos')
|
||||
y_filtered = sosfilt(sos_hp, y)
|
||||
sos_hs = butter(1, 1500, btype='high', fs=sr, output='sos')
|
||||
y_filtered = sosfilt(sos_hs, y_filtered)
|
||||
|
||||
block_size = int(0.4 * sr)
|
||||
hop = int(0.1 * sr)
|
||||
if len(y_filtered) < block_size:
|
||||
block_size = len(y_filtered)
|
||||
hop = max(1, block_size // 4)
|
||||
|
||||
blocks = []
|
||||
for i in range(0, len(y_filtered) - block_size + 1, hop):
|
||||
block = y_filtered[i:i + block_size]
|
||||
rms = np.sqrt(np.mean(block ** 2))
|
||||
if rms > 1e-10:
|
||||
blocks.append(rms)
|
||||
|
||||
if not blocks:
|
||||
return -70.0
|
||||
|
||||
mean_rms = np.mean(blocks)
|
||||
lufs = -0.691 + 10 * np.log10(mean_rms ** 2 + 1e-20)
|
||||
return max(lufs, -70.0)
|
||||
except Exception:
|
||||
return -70.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# F0 Detection via aubio (C-native, ~1ms per file)
|
||||
# ---------------------------------------------------------------------------
|
||||
def _fast_f0(y: np.ndarray, sr: int) -> float:
|
||||
"""Estimate fundamental frequency using aubio's YIN implementation.
|
||||
This is C-native code running at ~1ms per file, vs librosa.pyin at ~2s."""
|
||||
try:
|
||||
# aubio pitch detector
|
||||
win_s = N_FFT
|
||||
hop_s = HOP_LENGTH
|
||||
pitch_o = aubio.pitch("yin", win_s, hop_s, sr)
|
||||
pitch_o.set_unit("Hz")
|
||||
pitch_o.set_tolerance(0.8) # confidence threshold
|
||||
|
||||
# Process in chunks
|
||||
pitches = []
|
||||
for i in range(0, len(y) - win_s + 1, hop_s):
|
||||
chunk = y[i:i + hop_s].astype(np.float32)
|
||||
if len(chunk) < hop_s:
|
||||
break
|
||||
freq = pitch_o(chunk)
|
||||
if freq[0] > 0:
|
||||
pitches.append(float(freq[0]))
|
||||
|
||||
if pitches:
|
||||
return float(np.median(pitches))
|
||||
return 0.0
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Layer 3: Musical Analysis
|
||||
# ---------------------------------------------------------------------------
|
||||
def analyze_musical(signal_features: dict, perceptual_features: dict, y: np.ndarray, sr: int) -> dict:
|
||||
"""Layer 3: Key estimation, tonal/atonal, F0 via aubio, one-shot vs loop."""
|
||||
chroma_mean = np.array(perceptual_features["chroma_mean"])
|
||||
|
||||
key_name, key_correlation, mode = _estimate_key(chroma_mean)
|
||||
|
||||
chroma_max = float(np.max(chroma_mean))
|
||||
chroma_std = float(np.std(chroma_mean))
|
||||
is_tonal = chroma_std > 0.05 and chroma_max > 0.15
|
||||
|
||||
duration = signal_features["duration"]
|
||||
onset_count = perceptual_features["onset_count"]
|
||||
is_oneshot = duration < 2.0 and onset_count <= 2
|
||||
is_loop = duration > 1.5 and onset_count >= 4
|
||||
|
||||
f0 = 0.0
|
||||
f0_note = "X"
|
||||
if is_tonal:
|
||||
f0 = _fast_f0(y, sr)
|
||||
if f0 > 0:
|
||||
midi_note = int(round(12 * np.log2(f0 / 440.0) + 69))
|
||||
f0_note = _midi_to_note_name(midi_note)
|
||||
|
||||
return {
|
||||
"key": key_name,
|
||||
"key_correlation": round(key_correlation, 4),
|
||||
"mode": mode,
|
||||
"is_tonal": is_tonal,
|
||||
"is_oneshot": is_oneshot,
|
||||
"is_loop": is_loop,
|
||||
"fundamental_freq": round(f0, 2),
|
||||
"fundamental_note": f0_note,
|
||||
}
|
||||
|
||||
|
||||
def _estimate_key(chroma_profile: np.ndarray) -> tuple:
|
||||
"""Krumhansl-Schmuckler key-finding algorithm."""
|
||||
if np.max(chroma_profile) < 0.01:
|
||||
return "X", 0.0, "atonal"
|
||||
|
||||
chroma_norm = chroma_profile / (np.sum(chroma_profile) + 1e-10)
|
||||
best_key = "C"
|
||||
best_corr = -1.0
|
||||
best_mode = "atonal"
|
||||
|
||||
for i in range(12):
|
||||
rotated = np.roll(chroma_norm, -i)
|
||||
major_corr = float(np.corrcoef(rotated, MAJOR_PROFILE)[0, 1])
|
||||
if np.isnan(major_corr):
|
||||
major_corr = 0.0
|
||||
minor_corr = float(np.corrcoef(rotated, MINOR_PROFILE)[0, 1])
|
||||
if np.isnan(minor_corr):
|
||||
minor_corr = 0.0
|
||||
|
||||
if major_corr > best_corr:
|
||||
best_corr = major_corr
|
||||
best_key = NOTE_NAMES[i]
|
||||
best_mode = "major"
|
||||
if minor_corr > best_corr:
|
||||
best_corr = minor_corr
|
||||
best_key = NOTE_NAMES[i]
|
||||
best_mode = "minor"
|
||||
|
||||
if best_corr < 0.3:
|
||||
return "X", best_corr, "atonal"
|
||||
|
||||
if best_mode == "minor":
|
||||
return f"{best_key}m", best_corr, "minor"
|
||||
return best_key, best_corr, "major"
|
||||
|
||||
|
||||
def _midi_to_note_name(midi: int) -> str:
|
||||
if midi < 0 or midi > 127:
|
||||
return "X"
|
||||
note = NOTE_NAMES[midi % 12]
|
||||
octave = midi // 12 - 1
|
||||
return f"{note}{octave}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Layer 4: Timbre Fingerprint
|
||||
# ---------------------------------------------------------------------------
|
||||
def analyze_timbre(y: np.ndarray, sr: int) -> dict:
|
||||
"""Layer 4: Mel spectrogram statistics for timbre fingerprinting."""
|
||||
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, hop_length=HOP_LENGTH)
|
||||
mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
|
||||
|
||||
n_bands = 8
|
||||
band_size = 128 // n_bands
|
||||
band_stats = []
|
||||
for b in range(n_bands):
|
||||
start = b * band_size
|
||||
end = start + band_size
|
||||
band = mel_spec_db[start:end, :]
|
||||
band_stats.append({
|
||||
"mean": round(float(np.mean(band)), 2),
|
||||
"std": round(float(np.std(band)), 2),
|
||||
"max": round(float(np.max(band)), 2),
|
||||
})
|
||||
|
||||
contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=HOP_LENGTH)
|
||||
contrast_mean = [round(float(np.mean(contrast[i])), 4) for i in range(min(7, contrast.shape[0]))]
|
||||
|
||||
try:
|
||||
tonnetz = librosa.feature.tonnetz(y=y, sr=sr, hop_length=HOP_LENGTH)
|
||||
tonnetz_mean = [round(float(np.mean(tonnetz[i])), 4) for i in range(min(6, tonnetz.shape[0]))]
|
||||
except Exception:
|
||||
tonnetz_mean = [0.0] * 6
|
||||
|
||||
return {
|
||||
"mel_band_stats": band_stats,
|
||||
"spectral_contrast": contrast_mean,
|
||||
"tonnetz": tonnetz_mean,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Classification — 3-layer priority: filename → folder → spectral heuristic
|
||||
# ---------------------------------------------------------------------------
|
||||
def classify_role(signal: dict, perceptual: dict, musical: dict, folder_hint: str = "") -> str:
|
||||
"""Classify sample into a production role.
|
||||
|
||||
Priority order:
|
||||
1. FILENAME keywords (most reliable — producers name their files correctly)
|
||||
2. FOLDER structure (less reliable — "SentimientoLatino" has everything mixed)
|
||||
3. SPECTRAL heuristics (fallback for unnamed/unknown samples)
|
||||
"""
|
||||
filename = folder_hint.lower() # contains parent + current folder names
|
||||
|
||||
# ====================================================================
|
||||
# LAYER 1: Filename keyword extraction (HIGHEST PRIORITY)
|
||||
# Matches specific patterns like "Lead", "Pad", "Bass" in filenames
|
||||
# ====================================================================
|
||||
|
||||
# Ordered by specificity — more specific keywords first
|
||||
filename_map = [
|
||||
# (keyword_pattern, role, require_not) — avoid false positives
|
||||
(["reese"], "bass", []),
|
||||
(["808"], "bass", []),
|
||||
(["kick"], "kick", ["kickdown", "kick drum"]),
|
||||
(["snare"], "snare", []),
|
||||
(["hi-hat", "hihat", "hats", "hat "], "hihat", []),
|
||||
(["shaker"], "perc", []),
|
||||
(["tambourine", "tambor"], "perc", []),
|
||||
(["conga", "bongo", "rim"], "perc", []),
|
||||
(["timbal"], "perc", []),
|
||||
(["vocal chop", "v.chop", "vox chop"], "vocal", []),
|
||||
(["vocal", "vox", "vocals"], "vocal", []),
|
||||
(["pluck"], "pluck", []),
|
||||
(["bell"], "pluck", []),
|
||||
(["stab"], "oneshot", []),
|
||||
(["lead"], "lead", []),
|
||||
(["arp", "arpeggio"], "arp", []),
|
||||
(["pad reverse"], "pad", []),
|
||||
(["pad", "pads"], "pad", []),
|
||||
(["chord", "chords"], "pad", []),
|
||||
(["rhodes", "piano", "keys", "key "], "keys", []),
|
||||
(["guitar"], "guitar", []),
|
||||
(["string"], "pad", []),
|
||||
(["brass"], "brass", []),
|
||||
(["synth"], "synth", []),
|
||||
(["texture"], "pad", []),
|
||||
(["riser", "sweep", "impact", "explosion"], "fx", []),
|
||||
(["loop"], "drumloop", ["vocal loop", "melody loop"]),
|
||||
(["fill"], "fill", []),
|
||||
(["drum"], "drumloop", []),
|
||||
]
|
||||
|
||||
for keywords, role, excludes in filename_map:
|
||||
for kw in keywords:
|
||||
if kw in filename:
|
||||
# Check exclusions
|
||||
excluded = False
|
||||
for ex in excludes:
|
||||
if ex in filename:
|
||||
excluded = True
|
||||
break
|
||||
if not excluded:
|
||||
return role
|
||||
|
||||
# ====================================================================
|
||||
# LAYER 2: Midilatino / SS_RNBL structured filename parsing
|
||||
# These packs have naming conventions we can extract roles from
|
||||
# ====================================================================
|
||||
|
||||
# Midilatino pattern: "Midilatino_Song_Key_BPM_StemType.wav"
|
||||
# e.g. "Midilatino_Holanda_F_Min_108BPM_Lead.wav"
|
||||
# e.g. "Midilatino_Cookie_E_Min_89BPM_Pluck.wav"
|
||||
parts = filename.replace(".wav", "").replace(".flac", "").replace(".mp3", "").split("_")
|
||||
if len(parts) >= 2:
|
||||
# Check last part for stem type
|
||||
last_parts = " ".join(parts[-2:]).lower()
|
||||
stem_map = {
|
||||
"drums": "drumloop", "drum": "drumloop",
|
||||
"bass": "bass", "reese": "bass",
|
||||
"lead": "lead", "pluck": "pluck", "pluck fx": "fx",
|
||||
"pad": "pad", "pad reverse": "pad",
|
||||
"arp": "arp", "vocal": "vocal", "vocals": "vocal",
|
||||
"vocal chop": "vocal", "vox": "vocal",
|
||||
"guitar": "guitar", "rhodes": "keys", "rhode": "keys",
|
||||
"piano": "keys", "keys": "keys",
|
||||
"synth": "synth", "texture": "pad", "texture 2": "pad",
|
||||
"bell chords": "pad", "accent": "oneshot", "accent keys": "keys",
|
||||
"harp": "pluck", "shaker": "perc",
|
||||
}
|
||||
for stem_kw, stem_role in stem_map.items():
|
||||
if stem_kw in last_parts:
|
||||
return stem_role
|
||||
|
||||
# SS_RNBL pattern: "SS_RNBL_Song_Stem_Type.wav"
|
||||
# e.g. "SS_RNBL_Amor_One_Shot_Bass_C_.wav"
|
||||
if "ss_rnbl" in filename or "ss rnbl" in filename:
|
||||
ss_map = {
|
||||
"kick": "kick", "snare": "snare", "hats": "hihat", "hat": "hihat",
|
||||
"perc": "perc", "bass": "bass", "lead": "lead", "pad": "pad",
|
||||
"fx": "fx", "top": "drumloop", "drum": "drumloop",
|
||||
"v.chop": "vocal", "phrases": "vocal",
|
||||
"one shot": "oneshot", "music": "drumloop",
|
||||
"double": "drumloop", "add": "drumloop",
|
||||
"gustas": "drumloop", # "Gustas" are full loop sections
|
||||
}
|
||||
for kw, role in ss_map.items():
|
||||
if kw in filename:
|
||||
return role
|
||||
|
||||
# ====================================================================
|
||||
# LAYER 3: Folder-based hints (MEDIUM PRIORITY)
|
||||
# Only for folders that are explicitly categorized
|
||||
# ====================================================================
|
||||
folder_map = {
|
||||
"kick": "kick", "kicks": "kick", "8. kicks": "kick",
|
||||
"snare": "snare", "snares": "snare", "9. snare": "snare",
|
||||
"hi-hat": "hihat", "hihat": "hihat", "hi-hats": "hihat",
|
||||
"bass": "bass",
|
||||
"perc": "perc", "percs": "perc", "10. percs": "perc",
|
||||
"fx": "fx", "5. fx": "fx",
|
||||
"drum loops": "drumloop", "4. drum loops": "drumloop", "drumloops": "drumloop",
|
||||
"vocal": "vocal", "vocals": "vocal", "11. vocals": "vocal",
|
||||
"fill": "fill", "fills": "fill", "7. fill": "fill",
|
||||
"3. one shots": "oneshot",
|
||||
}
|
||||
for key, role in folder_map.items():
|
||||
if key in folder_hint.lower():
|
||||
return role
|
||||
|
||||
# ====================================================================
|
||||
# LAYER 4: Spectral heuristics (LOWEST PRIORITY — fallback only)
|
||||
# Only used when filename and folder give no signal
|
||||
# ====================================================================
|
||||
centroid = signal["spectral_centroid_mean"]
|
||||
low_r = signal["low_energy_ratio"]
|
||||
high_r = signal["high_energy_ratio"]
|
||||
dur = signal["duration"]
|
||||
onsets = perceptual["onset_count"]
|
||||
is_tonal = musical["is_tonal"]
|
||||
attack = signal["attack_time"]
|
||||
rms_std = signal["rms_std"]
|
||||
|
||||
# Percussive one-shots
|
||||
if centroid < 600 and low_r > 0.5 and dur < 1.0 and attack < 0.01 and onsets <= 3:
|
||||
return "kick"
|
||||
if centroid > 5000 and high_r > 0.4 and dur < 0.3:
|
||||
return "hihat"
|
||||
if 1000 < centroid < 5000 and attack < 0.005 and onsets <= 2:
|
||||
return "snare"
|
||||
if dur < 0.5 and onsets <= 2 and 500 < centroid < 5000:
|
||||
return "perc"
|
||||
|
||||
# Tonal classification (for long tonal samples that didn't match filename)
|
||||
if is_tonal:
|
||||
# Sub-bass / bass
|
||||
if centroid < 200 and low_r > 0.7:
|
||||
return "bass"
|
||||
# Pad: sustained, low variance, long
|
||||
if rms_std < 0.05 and dur > 1.0 and centroid < 4000:
|
||||
return "pad"
|
||||
# Pluck: short, tonal
|
||||
if dur < 0.8 and onsets <= 3:
|
||||
return "pluck"
|
||||
# Lead: prominent, mid-high frequency
|
||||
if 500 < centroid < 6000:
|
||||
return "lead"
|
||||
# Keys: mid frequency, moderate dynamics
|
||||
if 200 < centroid < 2000 and rms_std < 0.1:
|
||||
return "keys"
|
||||
# Generic tonal loop
|
||||
if dur > 2.0 and onsets > 4:
|
||||
return "drumloop"
|
||||
return "synth"
|
||||
|
||||
# Atonal loops
|
||||
if dur > 2.0 and onsets >= 4:
|
||||
return "drumloop"
|
||||
|
||||
# Short atonal
|
||||
if dur < 2.0 and onsets <= 1:
|
||||
return "oneshot"
|
||||
|
||||
return "fx"
|
||||
|
||||
|
||||
def classify_character(signal: dict, perceptual: dict, musical: dict) -> str:
|
||||
"""Classify the sonic character."""
|
||||
centroid = signal["spectral_centroid_mean"]
|
||||
low_r = signal["low_energy_ratio"]
|
||||
high_r = signal["high_energy_ratio"]
|
||||
mid_r = signal["mid_energy_ratio"]
|
||||
dur = signal["duration"]
|
||||
attack = signal["attack_time"]
|
||||
peak_rms = signal["peak_rms_ratio"]
|
||||
flatness = signal["spectral_flatness_mean"]
|
||||
rms_std = signal["rms_std"]
|
||||
|
||||
scores = {}
|
||||
if low_r >= 0.6 and centroid <= 400:
|
||||
scores["boomy"] = low_r * 2
|
||||
if low_r >= 0.5 and centroid <= 500:
|
||||
scores["deep"] = low_r * 1.5
|
||||
if high_r >= 0.4 and centroid >= 3000 and attack <= 0.005:
|
||||
scores["sharp"] = high_r * 2
|
||||
if high_r >= 0.3 and centroid >= 4000 and dur <= 0.2:
|
||||
scores["crisp"] = high_r * 1.5
|
||||
if 300 <= centroid <= 2000 and mid_r >= 0.4:
|
||||
scores["warm"] = mid_r * 1.5
|
||||
if centroid >= 3000 and high_r >= 0.3:
|
||||
scores["bright"] = high_r * 1.5
|
||||
if centroid <= 800 and low_r >= 0.4:
|
||||
scores["dark"] = low_r * 1.5
|
||||
if 1500 <= centroid <= 5000 and rms_std <= 0.03:
|
||||
scores["ethereal"] = 1.0
|
||||
if dur <= 0.15:
|
||||
scores["short"] = 1.0
|
||||
if attack <= 0.005 and peak_rms >= 5.0:
|
||||
scores["impact"] = peak_rms / 5.0
|
||||
if dur >= 1.0 and signal["spectral_bandwidth_mean"] >= 4000:
|
||||
scores["full"] = 1.0
|
||||
if mid_r <= 0.2 and low_r >= 0.3 and high_r >= 0.3:
|
||||
scores["hollow"] = 1.0
|
||||
if attack <= 0.003 and dur <= 0.3 and centroid >= 1000:
|
||||
scores["tight"] = 1.0
|
||||
if flatness >= 0.1 and mid_r >= 0.3 and dur >= 0.5:
|
||||
scores["lush"] = flatness * 5
|
||||
if peak_rms >= 4.0 and centroid >= 2000:
|
||||
scores["aggressive"] = peak_rms / 4.0
|
||||
if peak_rms <= 3.0 and attack >= 0.01:
|
||||
scores["soft"] = 1.0
|
||||
|
||||
return max(scores, key=scores.get) if scores else "neutral"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full Analysis Pipeline (single file)
|
||||
# ---------------------------------------------------------------------------
|
||||
def analyze_file(filepath: str) -> Optional[dict]:
|
||||
"""Full 4-layer analysis of a single audio file.
|
||||
This function is picklable and runs in separate processes."""
|
||||
try:
|
||||
y, sr = librosa.load(filepath, sr=SAMPLE_RATE, mono=True, duration=30.0)
|
||||
if len(y) < 512:
|
||||
return None
|
||||
|
||||
peak = np.max(np.abs(y))
|
||||
if peak > 1e-6:
|
||||
y = y / peak
|
||||
|
||||
path = Path(filepath)
|
||||
# Pass BOTH the full filename and folder structure to classifier
|
||||
classify_hint = f"{path.parent.parent.name} {path.parent.name} {path.stem}"
|
||||
|
||||
signal = analyze_signal(y, sr)
|
||||
perceptual = analyze_perceptual(y, sr)
|
||||
musical = analyze_musical(signal, perceptual, y, sr)
|
||||
timbre = analyze_timbre(y, sr)
|
||||
|
||||
role = classify_role(signal, perceptual, musical, classify_hint)
|
||||
character = classify_character(signal, perceptual, musical)
|
||||
new_name = _generate_name(role, musical, perceptual, character, filepath)
|
||||
file_hash = _hash_file(filepath)
|
||||
|
||||
return {
|
||||
"original_path": filepath,
|
||||
"original_name": path.name,
|
||||
"new_name": new_name,
|
||||
"file_hash": file_hash,
|
||||
"file_size": os.path.getsize(filepath),
|
||||
"role": role,
|
||||
"character": character,
|
||||
"musical": musical,
|
||||
"signal": signal,
|
||||
"perceptual": {
|
||||
"mfcc_means": perceptual["mfcc_means"],
|
||||
"mfcc_stds": perceptual["mfcc_stds"],
|
||||
"onset_count": perceptual["onset_count"],
|
||||
"onset_density": perceptual["onset_density"],
|
||||
"tempo": perceptual["tempo"],
|
||||
"lufs": perceptual["lufs"],
|
||||
},
|
||||
"timbre": timbre,
|
||||
}
|
||||
except Exception as e:
|
||||
return {"original_path": filepath, "error": str(e)}
|
||||
|
||||
|
||||
def _generate_name(role: str, musical: dict, perceptual: dict, character: str, filepath: str) -> str:
|
||||
key = musical["fundamental_note"] if musical["is_tonal"] else "X"
|
||||
if key == "X" and musical["key"] != "X":
|
||||
key = musical["key"]
|
||||
bpm = int(perceptual["tempo"]) if perceptual["tempo"] > 0 else 0
|
||||
short_id = hashlib.md5(filepath.encode()).hexdigest()[:6]
|
||||
ext = Path(filepath).suffix
|
||||
return f"{role}_{key}_{bpm:03d}_{character}_{short_id}{ext}"
|
||||
|
||||
|
||||
def _hash_file(filepath: str) -> str:
|
||||
h = hashlib.md5()
|
||||
size = os.path.getsize(filepath)
|
||||
with open(filepath, "rb") as f:
|
||||
h.update(f.read(65536))
|
||||
if size > 131072:
|
||||
f.seek(size - 65536)
|
||||
h.update(f.read(65536))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# File Collection
|
||||
# ---------------------------------------------------------------------------
|
||||
def collect_audio_files(*directories: str) -> list[str]:
|
||||
files = []
|
||||
for d in directories:
|
||||
base = Path(d)
|
||||
if not base.exists():
|
||||
continue
|
||||
for f in base.rglob("*"):
|
||||
if f.is_file() and f.suffix.lower() in AUDIO_EXT:
|
||||
files.append(str(f))
|
||||
return sorted(files)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Batch Analysis (TRUE multiprocessing)
|
||||
# ---------------------------------------------------------------------------
|
||||
def batch_analyze(files: list[str], workers: int = MAX_WORKERS, checkpoint_path: Optional[str] = None) -> list[dict]:
|
||||
"""Analyze all files using ProcessPoolExecutor for real multi-core parallelism.
|
||||
Each process runs independently — no GIL contention, no shared memory."""
|
||||
results = []
|
||||
errors = []
|
||||
done = 0
|
||||
total = len(files)
|
||||
|
||||
# Resume from checkpoint
|
||||
completed_paths = set()
|
||||
if checkpoint_path and os.path.exists(checkpoint_path):
|
||||
with open(checkpoint_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
try:
|
||||
entry = json.loads(line.strip())
|
||||
completed_paths.add(entry["original_path"])
|
||||
results.append(entry)
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
done = len(results)
|
||||
print(f"Resumed from checkpoint: {done}/{total}")
|
||||
|
||||
remaining = [f for f in files if f not in completed_paths]
|
||||
if not remaining:
|
||||
print("All files already analyzed.")
|
||||
return results
|
||||
|
||||
print(f"Analyzing {len(remaining)} files with {workers} PROCESSES (true parallel)...")
|
||||
|
||||
with ProcessPoolExecutor(max_workers=workers) as executor:
|
||||
futures = {executor.submit(analyze_file, f): f for f in remaining}
|
||||
|
||||
for future in as_completed(futures):
|
||||
filepath = futures[future]
|
||||
done += 1
|
||||
try:
|
||||
result = future.result()
|
||||
if result is None:
|
||||
errors.append(filepath)
|
||||
continue
|
||||
if "error" in result:
|
||||
errors.append(f"{filepath}: {result['error']}")
|
||||
continue
|
||||
|
||||
results.append(result)
|
||||
|
||||
if checkpoint_path and done % 50 == 0:
|
||||
_save_checkpoint(results, checkpoint_path)
|
||||
|
||||
if done % 25 == 0 or done == total:
|
||||
print(f" [{done}/{total}] {result.get('new_name', '?')}")
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"{filepath}: {e}")
|
||||
|
||||
if errors:
|
||||
print(f"\nErrors ({len(errors)}):")
|
||||
for e in errors[:10]:
|
||||
print(f" - {e}")
|
||||
if len(errors) > 10:
|
||||
print(f" ... and {len(errors) - 10} more")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def _save_checkpoint(results: list[dict], path: str):
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
for r in results:
|
||||
f.write(json.dumps(r, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
def save_index(results: list[dict], output_path: str):
|
||||
roles = {}
|
||||
keys = {}
|
||||
characters = {}
|
||||
for r in results:
|
||||
if "error" in r:
|
||||
continue
|
||||
role = r.get("role", "unknown")
|
||||
roles[role] = roles.get(role, 0) + 1
|
||||
key = r.get("musical", {}).get("key", "X")
|
||||
keys[key] = keys.get(key, 0) + 1
|
||||
char = r.get("character", "unknown")
|
||||
characters[char] = characters.get(char, 0) + 1
|
||||
|
||||
index = {
|
||||
"metadata": {
|
||||
"total_samples": len(results),
|
||||
"errors": sum(1 for r in results if "error" in r),
|
||||
"roles": roles,
|
||||
"keys": keys,
|
||||
"characters": characters,
|
||||
},
|
||||
"samples": results,
|
||||
}
|
||||
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(index, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Index saved to {output_path}")
|
||||
print(f" Total: {len(results)} | Roles: {roles} | Keys: {len(keys)} | Characters: {len(characters)}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rename Engine
|
||||
# ---------------------------------------------------------------------------
|
||||
def plan_renames(results: list[dict], output_dir: str) -> list[dict]:
|
||||
out = Path(output_dir)
|
||||
renames = []
|
||||
seen_names = set()
|
||||
|
||||
for r in results:
|
||||
if "error" in r or "new_name" not in r:
|
||||
continue
|
||||
old_path = Path(r["original_path"])
|
||||
role = r["role"]
|
||||
new_name = r["new_name"]
|
||||
|
||||
if new_name in seen_names:
|
||||
stem = Path(new_name).stem
|
||||
ext = Path(new_name).suffix
|
||||
counter = 2
|
||||
candidate = f"{stem}_{counter}{ext}"
|
||||
while candidate in seen_names:
|
||||
counter += 1
|
||||
candidate = f"{stem}_{counter}{ext}"
|
||||
new_name = candidate
|
||||
|
||||
seen_names.add(new_name)
|
||||
new_path = out / role / new_name
|
||||
renames.append({
|
||||
"old_path": str(old_path),
|
||||
"new_path": str(new_path),
|
||||
"new_name": new_name,
|
||||
"role": role,
|
||||
"original_name": r["original_name"],
|
||||
})
|
||||
|
||||
return renames
|
||||
|
||||
|
||||
def execute_renames(renames: list[dict], dry_run: bool = True) -> dict:
|
||||
stats = {"planned": len(renames), "executed": 0, "skipped": 0, "errors": []}
|
||||
|
||||
for r in renames:
|
||||
old = Path(r["old_path"])
|
||||
new = Path(r["new_path"])
|
||||
|
||||
if not old.exists():
|
||||
stats["skipped"] += 1
|
||||
continue
|
||||
if dry_run:
|
||||
stats["skipped"] += 1
|
||||
continue
|
||||
|
||||
new.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
import shutil
|
||||
shutil.copy2(str(old), str(new))
|
||||
stats["executed"] += 1
|
||||
except Exception as e:
|
||||
stats["errors"].append(f"{old.name} -> {new.name}: {e}")
|
||||
|
||||
return stats
|
||||
72
src/analyzer/forensic_classify.py
Normal file
72
src/analyzer/forensic_classify.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Forensic analysis of misclassified samples."""
|
||||
import json, os
|
||||
|
||||
PROJECT = r"C:\Users\Administrator\Documents\fl_control"
|
||||
with open(os.path.join(PROJECT, "data", "sample_index.json"), "r", encoding="utf-8") as f:
|
||||
d = json.load(f)
|
||||
|
||||
samples = d["samples"]
|
||||
|
||||
# --- DRUMLOOPS ---
|
||||
drumloops = [s for s in samples if s.get("role") == "drumloop"]
|
||||
print(f"DRUMLOOPS ({len(drumloops)} total)")
|
||||
print(f"{'Orig filename':<55s} {'Dur':>5s} {'Onset':>5s} {'Centr':>7s} {'Low':>5s} {'Mid':>5s} {'High':>5s} {'Tonal':>5s} {'Key':>5s} {'Char':>10s}")
|
||||
print("-" * 120)
|
||||
for s in drumloops[:50]:
|
||||
orig = s.get("original_name", "?")[:54]
|
||||
dur = s["signal"]["duration"]
|
||||
onc = s["perceptual"]["onset_count"]
|
||||
cent = s["signal"]["spectral_centroid_mean"]
|
||||
low = s["signal"]["low_energy_ratio"]
|
||||
mid = s["signal"]["mid_energy_ratio"]
|
||||
high = s["signal"]["high_energy_ratio"]
|
||||
ton = s["musical"]["is_tonal"]
|
||||
key = s["musical"]["key"]
|
||||
char = s["character"]
|
||||
print(f"{orig:<55s} {dur:5.1f} {onc:5d} {cent:7.0f} {low:5.2f} {mid:5.2f} {high:5.2f} {str(ton):>5s} {key:>5s} {char:>10s}")
|
||||
|
||||
# --- ONESHOTS ---
|
||||
oneshots = [s for s in samples if s.get("role") == "oneshot"]
|
||||
print(f"\nONESHOTS ({len(oneshots)} total)")
|
||||
print(f"{'Orig filename':<55s} {'Dur':>5s} {'Onset':>5s} {'Centr':>7s} {'Low':>5s} {'Mid':>5s} {'High':>5s} {'Tonal':>5s} {'Key':>5s} {'Char':>10s}")
|
||||
print("-" * 120)
|
||||
for s in oneshots[:40]:
|
||||
orig = s.get("original_name", "?")[:54]
|
||||
dur = s["signal"]["duration"]
|
||||
onc = s["perceptual"]["onset_count"]
|
||||
cent = s["signal"]["spectral_centroid_mean"]
|
||||
low = s["signal"]["low_energy_ratio"]
|
||||
mid = s["signal"]["mid_energy_ratio"]
|
||||
high = s["signal"]["high_energy_ratio"]
|
||||
ton = s["musical"]["is_tonal"]
|
||||
key = s["musical"]["key"]
|
||||
char = s["character"]
|
||||
print(f"{orig:<55s} {dur:5.1f} {onc:5d} {cent:7.0f} {low:5.2f} {mid:5.2f} {high:5.2f} {str(ton):>5s} {key:>5s} {char:>10s}")
|
||||
|
||||
# --- Summary: folder source of drumloops ---
|
||||
print(f"\n\nDRUMLOOP ORIGINS:")
|
||||
from collections import Counter
|
||||
origins = Counter()
|
||||
for s in drumloops:
|
||||
path = s.get("original_path", "")
|
||||
parts = path.replace("\\", "/").split("/")
|
||||
# Find the category folder
|
||||
for i, p in enumerate(parts):
|
||||
if "reggaeton" in p.lower() and i+1 < len(parts):
|
||||
origins[parts[i+1]] += 1
|
||||
break
|
||||
for k, v in origins.most_common():
|
||||
print(f" {k:40s} {v:4d}")
|
||||
|
||||
# --- Summary: folder source of oneshots ---
|
||||
print(f"\nONESHOT ORIGINS:")
|
||||
origins2 = Counter()
|
||||
for s in oneshots:
|
||||
path = s.get("original_path", "")
|
||||
parts = path.replace("\\", "/").split("/")
|
||||
for i, p in enumerate(parts):
|
||||
if "reggaeton" in p.lower() and i+1 < len(parts):
|
||||
origins2[parts[i+1]] += 1
|
||||
break
|
||||
for k, v in origins2.most_common():
|
||||
print(f" {k:40s} {v:4d}")
|
||||
72
src/analyzer/forensic_filenames.py
Normal file
72
src/analyzer/forensic_filenames.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Forensic analysis of misclassified samples."""
|
||||
import json, os, re
|
||||
from collections import Counter
|
||||
|
||||
PROJECT = r"C:\Users\Administrator\Documents\fl_control"
|
||||
with open(os.path.join(PROJECT, "data", "sample_index.json"), "r", encoding="utf-8") as f:
|
||||
d = json.load(f)
|
||||
|
||||
samples = d["samples"]
|
||||
|
||||
# --- Analyze filename patterns in misclassified ---
|
||||
print("=" * 70)
|
||||
print(" PATRONES DE NOMBRE EN 'DRUMLOOPS'")
|
||||
print("=" * 70)
|
||||
|
||||
# Extract Midilatino stems
|
||||
ml_stems = Counter()
|
||||
for s in samples:
|
||||
if s.get("role") != "drumloop":
|
||||
continue
|
||||
name = s.get("original_name", "")
|
||||
# Midilatino pattern: Midilatino_Name_Key_Min_BPM_Stem.wav
|
||||
if "Midilatino" in name or "midilatino" in name:
|
||||
# Extract the stem type (last part before .wav)
|
||||
parts = name.replace(".wav", "").replace(".flac", "").replace(".mp3", "")
|
||||
# Try to find stem keywords
|
||||
for kw in ["Drums", "Bass", "Lead", "Pad", "Pluck", "Arp", "Vocal",
|
||||
"Vox", "Guitar", "Rhodes", "Piano", "Synth", "Reese",
|
||||
"Texture", "Chords", "Reverse", "Fx", "Accent", "Harp",
|
||||
"Keys", "Bell", "Loop", "Stem", "Snare", "Kick", "Hat",
|
||||
"Perc", "Shaker", "Hi", "808"]:
|
||||
if kw.lower() in parts.lower():
|
||||
ml_stems[kw] += 1
|
||||
break
|
||||
else:
|
||||
# No stem keyword found - it's the full mix
|
||||
ml_stems["FULL_MIX"] += 1
|
||||
|
||||
print("\nMidilatino stem types in 'drumloop':")
|
||||
for k, v in ml_stems.most_common():
|
||||
print(f" {k:15s} {v:4d}")
|
||||
|
||||
# --- SS_RNBL patterns ---
|
||||
print("\n\nSentimientoLatino SS_RNBL patterns:")
|
||||
ss_stems = Counter()
|
||||
for s in samples:
|
||||
name = s.get("original_name", "")
|
||||
if "SS_RNBL" in name:
|
||||
# Extract type: SS_RNBL_Song_Stem_Type.wav
|
||||
parts = name.replace(".wav", "").split("_")
|
||||
if len(parts) >= 4:
|
||||
stem_type = parts[3] if parts[3] not in ("One", "Shot") else "_".join(parts[3:5])
|
||||
ss_stems[stem_type] += 1
|
||||
|
||||
for k, v in ss_stems.most_common():
|
||||
print(f" {k:20s} {v:4d}")
|
||||
|
||||
# --- All filename keywords ---
|
||||
print("\n\nAll filename role keywords across library:")
|
||||
role_keywords = Counter()
|
||||
for s in samples:
|
||||
name = s.get("original_name", "").lower()
|
||||
for kw in ["kick", "snare", "hi-hat", "hihat", "hat", "bass", "808",
|
||||
"lead", "pad", "pluck", "arp", "vocal", "vox", "fx",
|
||||
"perc", "drum", "loop", "fill", "guitar", "piano", "rhodes",
|
||||
"synth", "bell", "brass", "string", "reese", "texture",
|
||||
"chord", "shaker", "tambourine", "conga", "rim"]:
|
||||
if kw in name:
|
||||
role_keywords[kw] += 1
|
||||
|
||||
for k, v in role_keywords.most_common(25):
|
||||
print(f" {k:15s} {v:4d}")
|
||||
143
src/analyzer/run_batch.py
Normal file
143
src/analyzer/run_batch.py
Normal file
@@ -0,0 +1,143 @@
|
||||
"""
|
||||
Batch analyzer - STANDALONE for double-click execution.
|
||||
Uses ProcessPoolExecutor (16 processes) for TRUE multi-core parallelism.
|
||||
aubio replaces pyin for F0 detection (~1ms vs ~2s per file).
|
||||
|
||||
IMPORTANT: The if __name__ == '__main__' guard is REQUIRED on Windows
|
||||
for ProcessPoolExecutor. Without it, child processes re-import this file
|
||||
and create infinite process spawning.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
import warnings
|
||||
import traceback
|
||||
import multiprocessing
|
||||
|
||||
# CRITICAL: Windows multiprocessing guard - MUST be at top level
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
os.chdir(PROJECT)
|
||||
if PROJECT not in sys.path:
|
||||
sys.path.insert(0, PROJECT)
|
||||
|
||||
from src.analyzer import (
|
||||
collect_audio_files,
|
||||
batch_analyze,
|
||||
save_index,
|
||||
plan_renames,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print(" ANALIZADOR FORENSE DE SAMPLES v2.0")
|
||||
print(" ProcessPoolExecutor + aubio F0 (C-native)")
|
||||
print(" 4 capas: Signal + Perceptual + Musical + Timbre")
|
||||
print(" 16 procesos independientes = 16 cores en paralelo")
|
||||
print("=" * 60)
|
||||
|
||||
lib1 = os.path.join(PROJECT, "libreria", "reggaeton")
|
||||
lib2 = os.path.join(PROJECT, "librerias", "reggaeton")
|
||||
|
||||
print("\n[1/4] Colectando archivos de audio...")
|
||||
files = collect_audio_files(lib1, lib2)
|
||||
print(f" Encontrados: {len(files)} archivos")
|
||||
|
||||
if not files:
|
||||
print("ERROR: No se encontraron archivos de audio.")
|
||||
return
|
||||
|
||||
data_dir = os.path.join(PROJECT, "data")
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
checkpoint = os.path.join(data_dir, "analysis_checkpoint.jsonl")
|
||||
|
||||
# Delete old checkpoint from failed thread-based run
|
||||
if os.path.exists(checkpoint):
|
||||
old_size = os.path.getsize(checkpoint)
|
||||
if old_size < 1000: # Probably broken from the thread run
|
||||
os.remove(checkpoint)
|
||||
print(" (Removed broken checkpoint)")
|
||||
|
||||
print(f"\n[2/4] Analizando con 16 PROCESOS (70% CPU)...")
|
||||
print(f" Cada proceso en su propio core, sin GIL")
|
||||
print(f" Checkpoint: {checkpoint}")
|
||||
print(f" (Si se corta, re-ejecuta y continua desde donde quedo)")
|
||||
print()
|
||||
|
||||
start = time.time()
|
||||
results = batch_analyze(files, workers=16, checkpoint_path=checkpoint)
|
||||
elapsed = time.time() - start
|
||||
|
||||
valid = [r for r in results if "error" not in r]
|
||||
errors = [r for r in results if "error" in r]
|
||||
|
||||
print(f"\n Tiempo: {elapsed:.1f}s ({elapsed / max(len(files), 1):.2f}s/archivo)")
|
||||
print(f" Exitosos: {len(valid)} | Errores: {len(errors)}")
|
||||
|
||||
if errors:
|
||||
err_path = os.path.join(data_dir, "analysis_errors.json")
|
||||
with open(err_path, "w", encoding="utf-8") as f:
|
||||
json.dump(errors, f, ensure_ascii=False, indent=2)
|
||||
print(f" Errores guardados en: {err_path}")
|
||||
|
||||
print(f"\n[3/4] Guardando indice...")
|
||||
index_path = os.path.join(data_dir, "sample_index.json")
|
||||
save_index(results, index_path)
|
||||
|
||||
print(f"\n[4/4] Plan de renombrado...")
|
||||
output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples")
|
||||
renames = plan_renames(results, output_dir)
|
||||
rename_path = os.path.join(data_dir, "rename_plan.json")
|
||||
with open(rename_path, "w", encoding="utf-8") as f:
|
||||
json.dump(renames, f, ensure_ascii=False, indent=2)
|
||||
print(f" {len(renames)} archivos para renombrar")
|
||||
print(f" Plan guardado en: {rename_path}")
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 60)
|
||||
print(" RESUMEN")
|
||||
print("=" * 60)
|
||||
|
||||
roles = {}
|
||||
chars = {}
|
||||
keys = {}
|
||||
for r in valid:
|
||||
role = r.get("role", "?")
|
||||
roles[role] = roles.get(role, 0) + 1
|
||||
char = r.get("character", "?")
|
||||
chars[char] = chars.get(char, 0) + 1
|
||||
key = r.get("musical", {}).get("key", "X")
|
||||
keys[key] = keys.get(key, 0) + 1
|
||||
|
||||
print(f"\n Roles:")
|
||||
for role, count in sorted(roles.items(), key=lambda x: -x[1]):
|
||||
bar = "#" * min(count, 60)
|
||||
print(f" {role:12s} {count:4d} {bar}")
|
||||
|
||||
print(f"\n Caracteres:")
|
||||
for char, count in sorted(chars.items(), key=lambda x: -x[1]):
|
||||
bar = "#" * min(count, 50)
|
||||
print(f" {char:12s} {count:4d} {bar}")
|
||||
|
||||
print(f"\n Tonalidades (top 10):")
|
||||
for key, count in sorted(keys.items(), key=lambda x: -x[1])[:10]:
|
||||
print(f" {key:5s} {count:4d}")
|
||||
|
||||
print(f"\n Proximo paso: ejecuta 2_RENOMBRAR.bat")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
print(f"\nFATAL ERROR: {e}")
|
||||
traceback.print_exc()
|
||||
input("Presiona Enter para cerrar...")
|
||||
88
src/analyzer/run_rename.py
Normal file
88
src/analyzer/run_rename.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""
|
||||
Rename executor - Copies files to analyzed_samples/ with standardized names.
|
||||
Reads from data/rename_plan.json generated by the batch analyzer.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
os.chdir(PROJECT)
|
||||
if PROJECT not in sys.path:
|
||||
sys.path.insert(0, PROJECT)
|
||||
|
||||
from src.analyzer import plan_renames, execute_renames
|
||||
|
||||
|
||||
def main():
|
||||
rename_path = os.path.join(PROJECT, "data", "rename_plan.json")
|
||||
index_path = os.path.join(PROJECT, "data", "sample_index.json")
|
||||
output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples")
|
||||
|
||||
# Load rename plan if exists, otherwise generate from index
|
||||
if os.path.exists(rename_path):
|
||||
print("Cargando plan de renombrado existente...")
|
||||
with open(rename_path, "r", encoding="utf-8") as f:
|
||||
renames = json.load(f)
|
||||
elif os.path.exists(index_path):
|
||||
print("Generando plan desde indice...")
|
||||
with open(index_path, "r", encoding="utf-8") as f:
|
||||
index = json.load(f)
|
||||
renames = plan_renames(index["samples"], output_dir)
|
||||
with open(rename_path, "w", encoding="utf-8") as f:
|
||||
json.dump(renames, f, ensure_ascii=False, indent=2)
|
||||
else:
|
||||
print("ERROR: No existe data/sample_index.json ni data/rename_plan.json")
|
||||
print(" Ejecuta primero 1_ANALIZAR.bat")
|
||||
return
|
||||
|
||||
print(f"\n{len(renames)} archivos para renombrar")
|
||||
print(f"Destino: {output_dir}")
|
||||
print()
|
||||
|
||||
# Show sample renames
|
||||
print("Ejemplos:")
|
||||
for r in renames[:15]:
|
||||
print(f" {r['original_name']:50s} -> {r['role']:10s}\\{r['new_name']}")
|
||||
if len(renames) > 15:
|
||||
print(f" ... y {len(renames) - 15} mas")
|
||||
print()
|
||||
|
||||
# Confirm
|
||||
answer = input("Ejecutar renombrado? (s/n): ").strip().lower()
|
||||
if answer != "s":
|
||||
print("Cancelado.")
|
||||
return
|
||||
|
||||
# Execute
|
||||
print("\nCopiando archivos...")
|
||||
stats = execute_renames(renames, dry_run=False)
|
||||
|
||||
print(f"\nResultado: {stats['executed']} copiados, {stats['skipped']} omitidos, {len(stats.get('errors', []))} errores")
|
||||
|
||||
if stats.get("errors"):
|
||||
print("Errores:")
|
||||
for e in stats["errors"][:10]:
|
||||
print(f" {e}")
|
||||
|
||||
# Save rename log
|
||||
log_path = os.path.join(PROJECT, "data", "rename_log.json")
|
||||
with open(log_path, "w", encoding="utf-8") as f:
|
||||
json.dump({"stats": stats, "renames": renames}, f, ensure_ascii=False, indent=2)
|
||||
print(f"\nLog guardado en: {log_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
print(f"\nFATAL ERROR: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
input("Presiona Enter para cerrar...")
|
||||
117
src/analyzer/show_stats.py
Normal file
117
src/analyzer/show_stats.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
Show statistics from the analysis index.
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
|
||||
PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
os.chdir(PROJECT)
|
||||
|
||||
|
||||
def main():
|
||||
index_path = os.path.join(PROJECT, "data", "sample_index.json")
|
||||
|
||||
if not os.path.exists(index_path):
|
||||
print("ERROR: No existe data/sample_index.json")
|
||||
print(" Ejecuta primero 1_ANALIZAR.bat")
|
||||
return
|
||||
|
||||
with open(index_path, "r", encoding="utf-8") as f:
|
||||
index = json.load(f)
|
||||
|
||||
samples = index["samples"]
|
||||
valid = [s for s in samples if "error" not in s]
|
||||
|
||||
print("=" * 60)
|
||||
print(f" ESTADISTICAS DE LA BIBLIOTECA ({len(valid)} samples)")
|
||||
print("=" * 60)
|
||||
|
||||
# Roles
|
||||
roles = {}
|
||||
for s in valid:
|
||||
r = s.get("role", "?")
|
||||
roles[r] = roles.get(r, 0) + 1
|
||||
|
||||
print("\n Roles:")
|
||||
max_count = max(roles.values()) if roles else 1
|
||||
for role, count in sorted(roles.items(), key=lambda x: -x[1]):
|
||||
bar_len = int(40 * count / max_count)
|
||||
print(f" {role:12s} {count:4d} {'█' * bar_len}")
|
||||
|
||||
# Characters
|
||||
chars = {}
|
||||
for s in valid:
|
||||
c = s.get("character", "?")
|
||||
chars[c] = chars.get(c, 0) + 1
|
||||
|
||||
print("\n Caracteres sonoros:")
|
||||
max_count = max(chars.values()) if chars else 1
|
||||
for char, count in sorted(chars.items(), key=lambda x: -x[1]):
|
||||
bar_len = int(40 * count / max_count)
|
||||
print(f" {char:12s} {count:4d} {'█' * bar_len}")
|
||||
|
||||
# Keys
|
||||
keys = {}
|
||||
for s in valid:
|
||||
k = s.get("musical", {}).get("key", "X")
|
||||
keys[k] = keys.get(k, 0) + 1
|
||||
|
||||
print("\n Tonalidades:")
|
||||
for key, count in sorted(keys.items(), key=lambda x: -x[1])[:15]:
|
||||
print(f" {key:5s} {count:4d}")
|
||||
|
||||
# Tempo distribution
|
||||
tempos = [s.get("perceptual", {}).get("tempo", 0) for s in valid]
|
||||
tempos_nonzero = [t for t in tempos if t > 0]
|
||||
if tempos_nonzero:
|
||||
print(f"\n Tempo:")
|
||||
print(f" Rango: {min(tempos_nonzero):.0f} - {max(tempos_nonzero):.0f} BPM")
|
||||
print(f" Promedio: {sum(tempos_nonzero) / len(tempos_nonzero):.0f} BPM")
|
||||
|
||||
# LUFS distribution
|
||||
lufs = [s.get("perceptual", {}).get("lufs", 0) for s in valid]
|
||||
lufs_valid = [l for l in lufs if l > -70]
|
||||
if lufs_valid:
|
||||
print(f"\n Loudness (LUFS):")
|
||||
print(f" Rango: {min(lufs_valid):.1f} a {max(lufs_valid):.1f} LUFS")
|
||||
print(f" Promedio: {sum(lufs_valid) / len(lufs_valid):.1f} LUFS")
|
||||
|
||||
# Tonal vs atonal
|
||||
tonal = sum(1 for s in valid if s.get("musical", {}).get("is_tonal", False))
|
||||
atonal = len(valid) - tonal
|
||||
print(f"\n Tonalidad:")
|
||||
print(f" Tonal: {tonal} ({100 * tonal / len(valid):.0f}%)")
|
||||
print(f" Atonal: {atonal} ({100 * atonal / len(valid):.0f}%)")
|
||||
|
||||
# One-shot vs loop
|
||||
oneshot = sum(1 for s in valid if s.get("musical", {}).get("is_oneshot", False))
|
||||
loops = sum(1 for s in valid if s.get("musical", {}).get("is_loop", False))
|
||||
print(f"\n Tipo:")
|
||||
print(f" One-shots: {oneshot}")
|
||||
print(f" Loops: {loops}")
|
||||
print(f" Otros: {len(valid) - oneshot - loops}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
|
||||
# Show samples per role for quick reference
|
||||
print("\n EJEMPLOS POR ROL:")
|
||||
by_role = {}
|
||||
for s in valid:
|
||||
role = s.get("role", "?")
|
||||
if role not in by_role:
|
||||
by_role[role] = []
|
||||
by_role[role].append(s)
|
||||
|
||||
for role in sorted(by_role.keys()):
|
||||
samples_list = by_role[role][:5]
|
||||
print(f"\n [{role}] ({len(by_role[role])} total)")
|
||||
for s in samples_list:
|
||||
key = s.get("musical", {}).get("key", "X")
|
||||
char = s.get("character", "?")
|
||||
bpm = s.get("perceptual", {}).get("tempo", 0)
|
||||
print(f" {s.get('new_name', '?'):50s} key={key:5s} bpm={bpm:5.0f} char={char}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user