""" Batch analyzer - STANDALONE for double-click execution. Uses ProcessPoolExecutor (16 processes) for TRUE multi-core parallelism. aubio replaces pyin for F0 detection (~1ms vs ~2s per file). IMPORTANT: The if __name__ == '__main__' guard is REQUIRED on Windows for ProcessPoolExecutor. Without it, child processes re-import this file and create infinite process spawning. """ from __future__ import annotations import sys import os import time import json import warnings import traceback import multiprocessing # CRITICAL: Windows multiprocessing guard - MUST be at top level multiprocessing.freeze_support() warnings.filterwarnings("ignore") PROJECT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) os.chdir(PROJECT) if PROJECT not in sys.path: sys.path.insert(0, PROJECT) from src.analyzer import ( collect_audio_files, batch_analyze, save_index, plan_renames, ) def main(): print("=" * 60) print(" ANALIZADOR FORENSE DE SAMPLES v2.0") print(" ProcessPoolExecutor + aubio F0 (C-native)") print(" 4 capas: Signal + Perceptual + Musical + Timbre") print(" 16 procesos independientes = 16 cores en paralelo") print("=" * 60) lib1 = os.path.join(PROJECT, "libreria", "reggaeton") lib2 = os.path.join(PROJECT, "librerias", "reggaeton") print("\n[1/4] Colectando archivos de audio...") files = collect_audio_files(lib1, lib2) print(f" Encontrados: {len(files)} archivos") if not files: print("ERROR: No se encontraron archivos de audio.") return data_dir = os.path.join(PROJECT, "data") os.makedirs(data_dir, exist_ok=True) checkpoint = os.path.join(data_dir, "analysis_checkpoint.jsonl") # Delete old checkpoint from failed thread-based run if os.path.exists(checkpoint): old_size = os.path.getsize(checkpoint) if old_size < 1000: # Probably broken from the thread run os.remove(checkpoint) print(" (Removed broken checkpoint)") print(f"\n[2/4] Analizando con 16 PROCESOS (70% CPU)...") print(f" Cada proceso en su propio core, sin GIL") print(f" Checkpoint: {checkpoint}") print(f" (Si se corta, re-ejecuta y continua desde donde quedo)") print() start = time.time() results = batch_analyze(files, workers=16, checkpoint_path=checkpoint) elapsed = time.time() - start valid = [r for r in results if "error" not in r] errors = [r for r in results if "error" in r] print(f"\n Tiempo: {elapsed:.1f}s ({elapsed / max(len(files), 1):.2f}s/archivo)") print(f" Exitosos: {len(valid)} | Errores: {len(errors)}") if errors: err_path = os.path.join(data_dir, "analysis_errors.json") with open(err_path, "w", encoding="utf-8") as f: json.dump(errors, f, ensure_ascii=False, indent=2) print(f" Errores guardados en: {err_path}") print(f"\n[3/4] Guardando indice...") index_path = os.path.join(data_dir, "sample_index.json") save_index(results, index_path) print(f"\n[4/4] Plan de renombrado...") output_dir = os.path.join(PROJECT, "librerias", "analyzed_samples") renames = plan_renames(results, output_dir) rename_path = os.path.join(data_dir, "rename_plan.json") with open(rename_path, "w", encoding="utf-8") as f: json.dump(renames, f, ensure_ascii=False, indent=2) print(f" {len(renames)} archivos para renombrar") print(f" Plan guardado en: {rename_path}") # Summary print("\n" + "=" * 60) print(" RESUMEN") print("=" * 60) roles = {} chars = {} keys = {} for r in valid: role = r.get("role", "?") roles[role] = roles.get(role, 0) + 1 char = r.get("character", "?") chars[char] = chars.get(char, 0) + 1 key = r.get("musical", {}).get("key", "X") keys[key] = keys.get(key, 0) + 1 print(f"\n Roles:") for role, count in sorted(roles.items(), key=lambda x: -x[1]): bar = "#" * min(count, 60) print(f" {role:12s} {count:4d} {bar}") print(f"\n Caracteres:") for char, count in sorted(chars.items(), key=lambda x: -x[1]): bar = "#" * min(count, 50) print(f" {char:12s} {count:4d} {bar}") print(f"\n Tonalidades (top 10):") for key, count in sorted(keys.items(), key=lambda x: -x[1])[:10]: print(f" {key:5s} {count:4d}") print(f"\n Proximo paso: ejecuta 2_RENOMBRAR.bat") print("=" * 60) if __name__ == "__main__": try: main() except Exception as e: print(f"\nFATAL ERROR: {e}") traceback.print_exc() input("Presiona Enter para cerrar...")