Cambios realizados: - server.py: actualiza PROJECT_SAMPLES_DIR a reggaeton - sample_manager.py: cambia base_dir a reggaeton + agrega género reggaeton - health_check.py: actualiza paths a reggaeton - scan_audio.py: actualiza path a reggaeton - segment_rag_builder.py: actualiza default library a reggaeton - reference_stem_builder.py: actualiza PROJECT_SAMPLES_DIR a reggaeton - song_generator.py: agrega GENRE_CONFIG y SECTION_BLUEPRINT para reggaeton - rebuild_index.py: actualiza para apuntar a reggaeton Nueva estructura soportada: - 1. MIDI CHORDS / 2. MIDI ARP (MIDI files) - 3. ONE SHOTS / 4. DRUM LOOPS - 5. FX / 6. IMPACT INTRO / 7. FILL - 8. KICKS / 9. SNARE / 10. PERCS - 11. VOCALS - reggaeton 2/ (bass, drumloops, fx, kick, oneshots, perc, snare) - SentimientoLatino2025/ (drums, vocals, loops, one shots) Index rebuilt: 508 samples
199 lines
8.3 KiB
Python
199 lines
8.3 KiB
Python
"""
|
|
segment_rag_builder.py - Build or refresh the persistent segment-audio index.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from reference_listener import ReferenceAudioListener, export_segment_rag_manifest, generate_segment_rag_summary, _get_segment_rag_status, _backfill_segment_cache_metadata
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _default_library_dir() -> Path:
|
|
return Path(__file__).resolve().parents[2] / "librerias" / "reggaeton"
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Build the persistent segment-audio retrieval cache.")
|
|
parser.add_argument("--library-dir", default=str(_default_library_dir()), help="Audio library directory")
|
|
parser.add_argument("--roles", nargs="*", default=None, help="Subset of roles to index")
|
|
parser.add_argument("--max-files", type=int, default=None, help="Optional limit for targeted files")
|
|
parser.add_argument("--duration-limit", type=float, default=24.0, help="Max seconds per file during indexing")
|
|
parser.add_argument("--force", action="store_true", help="Rebuild even if persistent segment cache already exists")
|
|
parser.add_argument("--json", action="store_true", help="Emit full JSON report")
|
|
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
|
|
parser.add_argument("--offset", type=int, default=0, help="Skip first N files before starting (for chunked indexing)")
|
|
parser.add_argument("--batch-size", type=int, default=None, help="Process exactly N files then stop (for chunked indexing)")
|
|
parser.add_argument("--output-manifest", type=str, default=None, help="Path to save full manifest JSON")
|
|
parser.add_argument("--output-summary", type=str, default=None, help="Path to save summary report")
|
|
parser.add_argument("--resume", action="store_true", help="Resume from previous run state")
|
|
parser.add_argument("--export-manifest", type=str, default=None,
|
|
help="Export candidate manifest to FILE (format: .json or .md)")
|
|
parser.add_argument("--export-format", type=str, default="json",
|
|
choices=['json', 'markdown'], help="Manifest export format")
|
|
parser.add_argument("--status", action="store_true", help="Show current index status without building")
|
|
parser.add_argument("--backfill-metadata", action="store_true", help="Backfill metadata into existing cache files from indexing state")
|
|
parser.add_argument("--force-backfill", action="store_true", help="Force backfill even for files that already have metadata")
|
|
args = parser.parse_args()
|
|
|
|
# Configure logging based on verbose flag
|
|
if args.verbose:
|
|
logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s')
|
|
else:
|
|
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
|
|
|
# Handle --status flag for early exit
|
|
if args.status:
|
|
status = _get_segment_rag_status(Path(args.library_dir))
|
|
|
|
if args.json:
|
|
print(json.dumps(status, indent=2, default=str))
|
|
else:
|
|
print("=" * 60)
|
|
print("SEGMENT RAG INDEX STATUS")
|
|
print("=" * 60)
|
|
print(f"Cache Directory: {status['cache_dir']}")
|
|
print(f"Cache Files: {status['cache_files']}")
|
|
print(f"Total Indexed Segments: {status['total_segments']}")
|
|
print(f"Status: {status.get('status', 'unknown')}")
|
|
|
|
if status.get('role_coverage'):
|
|
print("\nRole Coverage:")
|
|
for role, count in sorted(status['role_coverage'].items()):
|
|
print(f" {role}: {count} segments")
|
|
|
|
if status.get('newest_entries'):
|
|
print(f"\nNewest Entries: {len(status['newest_entries'])} files")
|
|
for entry in status['newest_entries'][:5]:
|
|
print(f" - {entry['file_name']} ({entry['segments']} segments)")
|
|
|
|
if status.get('oldest_entries'):
|
|
print(f"\nOldest Entries: {len(status['oldest_entries'])} files")
|
|
for entry in status['oldest_entries'][:5]:
|
|
print(f" - {entry['file_name']} ({entry['segments']} segments)")
|
|
|
|
return 0
|
|
|
|
# Handle --backfill-metadata flag for early exit
|
|
if args.backfill_metadata:
|
|
result = _backfill_segment_cache_metadata(Path(args.library_dir), force=args.force_backfill)
|
|
|
|
if args.json:
|
|
print(json.dumps(result, indent=2, default=str))
|
|
else:
|
|
print("=" * 60)
|
|
print("SEGMENT CACHE METADATA BACKFILL")
|
|
print("=" * 60)
|
|
print(f"Cache Directory: {result['cache_dir']}")
|
|
print(f"Cache Files: {result['cache_files']}")
|
|
print(f"Backfilled: {result['backfilled']}")
|
|
print(f"Skipped: {result['skipped']}")
|
|
print(f"Errors: {result['errors']}")
|
|
print(f"Status: {result.get('status', 'unknown')}")
|
|
|
|
return 0
|
|
|
|
listener = ReferenceAudioListener(args.library_dir)
|
|
report = listener.build_segment_rag_index(
|
|
roles=args.roles,
|
|
max_files=args.max_files,
|
|
duration_limit=args.duration_limit,
|
|
force=args.force,
|
|
offset=args.offset,
|
|
batch_size=args.batch_size,
|
|
resume=args.resume,
|
|
)
|
|
|
|
# Generate enhanced summary
|
|
summary = generate_segment_rag_summary(report, Path(args.library_dir))
|
|
|
|
if args.json:
|
|
print(json.dumps(summary, indent=2, default=str))
|
|
else:
|
|
# Enhanced text output
|
|
print("=" * 60)
|
|
print("SEGMENT RAG INDEX COMPLETE")
|
|
print("=" * 60)
|
|
print(f"Device: {summary['device']}")
|
|
print(f"Cache: {summary['segment_index_dir']}")
|
|
print()
|
|
print(f"Files: {summary['files_targeted']} targeted")
|
|
print(f" Built: {summary['built']}")
|
|
print(f" Reused: {summary['reused']}")
|
|
print(f" Skipped: {summary['skipped']}")
|
|
print(f" Errors: {summary['errors']}")
|
|
print()
|
|
print(f"Total Segments: {summary['total_segments']}")
|
|
|
|
if 'summary_stats' in summary:
|
|
stats = summary['summary_stats']
|
|
print(f" Avg per file: {stats['avg_segments_per_file']:.1f}")
|
|
print(f" Range: {stats['min_segments']} - {stats['max_segments']}")
|
|
|
|
if 'role_coverage' in summary:
|
|
print("\nRole Coverage:")
|
|
for role in sorted(summary['role_coverage'].keys()):
|
|
print(f" {role}: {summary['role_coverage'][role]} segments")
|
|
|
|
if 'cache_info' in summary:
|
|
info = summary['cache_info']
|
|
print(f"\nCache Size: {info['cache_size_mb']} MB")
|
|
|
|
if args.offset > 0:
|
|
print(f"\nOffset: {args.offset}")
|
|
if args.batch_size is not None:
|
|
print(f"Batch Size: {args.batch_size}")
|
|
print(f"Files Remaining: {summary.get('files_remaining', 'unknown')}")
|
|
|
|
# Save manifest if requested
|
|
if args.output_manifest:
|
|
manifest_path = Path(args.output_manifest)
|
|
manifest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(manifest_path, 'w') as f:
|
|
json.dump({
|
|
"report": report,
|
|
"full_manifest": report.get("manifest", []),
|
|
}, f, indent=2)
|
|
if not args.json:
|
|
print(f"\nManifest saved to: {manifest_path}")
|
|
|
|
# Save summary if requested
|
|
if args.output_summary:
|
|
summary_path = Path(args.output_summary)
|
|
summary_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(summary_path, 'w') as f:
|
|
json.dump(summary, f, indent=2, default=str)
|
|
if not args.json:
|
|
print(f"Summary saved to: {summary_path}")
|
|
|
|
# Export manifest in requested format
|
|
if args.export_manifest:
|
|
manifest_path = Path(args.export_manifest)
|
|
export_format = args.export_format
|
|
|
|
# Determine format from extension if not specified
|
|
if not args.export_format or args.export_format == "json":
|
|
if manifest_path.suffix == '.md':
|
|
export_format = 'markdown'
|
|
else:
|
|
export_format = 'json'
|
|
|
|
export_segment_rag_manifest(
|
|
report.get('manifest', []),
|
|
manifest_path,
|
|
format=export_format
|
|
)
|
|
print(f"Manifest exported to: {manifest_path}")
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|