144 lines
4.6 KiB
Python
144 lines
4.6 KiB
Python
"""
|
|
Batch analyzer - STANDALONE for double-click execution.
|
|
Uses ProcessPoolExecutor (16 processes) for TRUE multi-core parallelism.
|
|
aubio replaces pyin for F0 detection (~1ms vs ~2s per file).
|
|
|
|
IMPORTANT: The if __name__ == '__main__' guard is REQUIRED on Windows
|
|
for ProcessPoolExecutor. Without it, child processes re-import this file
|
|
and create infinite process spawning.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
import os
|
|
import time
|
|
import json
|
|
import warnings
|
|
import traceback
|
|
import multiprocessing
|
|
|
|
# CRITICAL: Windows multiprocessing guard - MUST be at top level
|
|
multiprocessing.freeze_support()
|
|
|
|
warnings.filterwarnings("ignore")
|
|
|
|
PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
os.chdir(PROJECT)
|
|
if PROJECT not in sys.path:
|
|
sys.path.insert(0, PROJECT)
|
|
|
|
from src.analyzer import (
|
|
collect_audio_files,
|
|
batch_analyze,
|
|
save_index,
|
|
plan_renames,
|
|
)
|
|
|
|
|
|
def main():
|
|
print("=" * 60)
|
|
print(" ANALIZADOR FORENSE DE SAMPLES v2.0")
|
|
print(" ProcessPoolExecutor + aubio F0 (C-native)")
|
|
print(" 4 capas: Signal + Perceptual + Musical + Timbre")
|
|
print(" 16 procesos independientes = 16 cores en paralelo")
|
|
print("=" * 60)
|
|
|
|
lib1 = os.path.join(PROJECT, "libreria", "reggaeton")
|
|
lib2 = os.path.join(PROJECT, "librerias", "reggaeton")
|
|
|
|
print("\n[1/4] Colectando archivos de audio...")
|
|
files = collect_audio_files(lib1, lib2)
|
|
print(f" Encontrados: {len(files)} archivos")
|
|
|
|
if not files:
|
|
print("ERROR: No se encontraron archivos de audio.")
|
|
return
|
|
|
|
data_dir = os.path.join(PROJECT, "data")
|
|
os.makedirs(data_dir, exist_ok=True)
|
|
checkpoint = os.path.join(data_dir, "analysis_checkpoint.jsonl")
|
|
|
|
# Delete old checkpoint from failed thread-based run
|
|
if os.path.exists(checkpoint):
|
|
old_size = os.path.getsize(checkpoint)
|
|
if old_size < 1000: # Probably broken from the thread run
|
|
os.remove(checkpoint)
|
|
print(" (Removed broken checkpoint)")
|
|
|
|
print(f"\n[2/4] Analizando con 16 PROCESOS (70% CPU)...")
|
|
print(f" Cada proceso en su propio core, sin GIL")
|
|
print(f" Checkpoint: {checkpoint}")
|
|
print(f" (Si se corta, re-ejecuta y continua desde donde quedo)")
|
|
print()
|
|
|
|
start = time.time()
|
|
results = batch_analyze(files, workers=16, checkpoint_path=checkpoint)
|
|
elapsed = time.time() - start
|
|
|
|
valid = [r for r in results if "error" not in r]
|
|
errors = [r for r in results if "error" in r]
|
|
|
|
print(f"\n Tiempo: {elapsed:.1f}s ({elapsed / max(len(files), 1):.2f}s/archivo)")
|
|
print(f" Exitosos: {len(valid)} | Errores: {len(errors)}")
|
|
|
|
if errors:
|
|
err_path = os.path.join(data_dir, "analysis_errors.json")
|
|
with open(err_path, "w", encoding="utf-8") as f:
|
|
json.dump(errors, f, ensure_ascii=False, indent=2)
|
|
print(f" Errores guardados en: {err_path}")
|
|
|
|
print(f"\n[3/4] Guardando indice...")
|
|
index_path = os.path.join(data_dir, "sample_index.json")
|
|
save_index(results, index_path)
|
|
|
|
print(f"\n[4/4] Plan de renombrado...")
|
|
output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples")
|
|
renames = plan_renames(results, output_dir)
|
|
rename_path = os.path.join(data_dir, "rename_plan.json")
|
|
with open(rename_path, "w", encoding="utf-8") as f:
|
|
json.dump(renames, f, ensure_ascii=False, indent=2)
|
|
print(f" {len(renames)} archivos para renombrar")
|
|
print(f" Plan guardado en: {rename_path}")
|
|
|
|
# Summary
|
|
print("\n" + "=" * 60)
|
|
print(" RESUMEN")
|
|
print("=" * 60)
|
|
|
|
roles = {}
|
|
chars = {}
|
|
keys = {}
|
|
for r in valid:
|
|
role = r.get("role", "?")
|
|
roles[role] = roles.get(role, 0) + 1
|
|
char = r.get("character", "?")
|
|
chars[char] = chars.get(char, 0) + 1
|
|
key = r.get("musical", {}).get("key", "X")
|
|
keys[key] = keys.get(key, 0) + 1
|
|
|
|
print(f"\n Roles:")
|
|
for role, count in sorted(roles.items(), key=lambda x: -x[1]):
|
|
bar = "#" * min(count, 60)
|
|
print(f" {role:12s} {count:4d} {bar}")
|
|
|
|
print(f"\n Caracteres:")
|
|
for char, count in sorted(chars.items(), key=lambda x: -x[1]):
|
|
bar = "#" * min(count, 50)
|
|
print(f" {char:12s} {count:4d} {bar}")
|
|
|
|
print(f"\n Tonalidades (top 10):")
|
|
for key, count in sorted(keys.items(), key=lambda x: -x[1])[:10]:
|
|
print(f" {key:5s} {count:4d}")
|
|
|
|
print(f"\n Proximo paso: ejecuta 2_RENOMBRAR.bat")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main()
|
|
except Exception as e:
|
|
print(f"\nFATAL ERROR: {e}")
|
|
traceback.print_exc()
|
|
input("Presiona Enter para cerrar...")
|