import os import shutil import glob import logging from pathlib import Path import json import wave logger = logging.getLogger("AudioOrganizer") logging.basicConfig(level=logging.INFO) CATEGORIES = { 'kick': ['kick', 'bd', 'bass drum'], 'snare': ['snare', 'sd', 'clap'], 'hat': ['hat', 'hh', 'hihat', 'closed hat', 'open hat'], 'perc': ['perc', 'percussion', 'conga', 'shaker', 'tamb', 'tom'], 'bass': ['bass', 'sub', '808'], 'synth': ['synth', 'lead', 'pad', 'arp', 'pluck', 'chord'], 'vocal': ['vocal', 'vox', 'voice', 'speech', 'chant'], 'fx': ['fx', 'sweep', 'riser', 'downlifter', 'impact', 'crash', 'fill', 'texture', 'drone', 'noise'] } def get_duration(file_path: str) -> float: try: with wave.open(file_path, 'r') as w: frames = w.getnframes() rate = w.getframerate() return frames / float(rate) except Exception: pass try: size_bytes = os.path.getsize(file_path) if file_path.lower().endswith('.mp3'): return size_bytes / 30000.0 else: return size_bytes / 176400.0 except Exception: return 0.0 def detect_category(name: str) -> str: name_lower = name.lower() for cat, keywords in CATEGORIES.items(): if any(kw in name_lower.split('_') or kw in name_lower.split('-') or kw in name_lower.split(' ') for kw in keywords): return cat # Fallback substring check for cat, keywords in CATEGORIES.items(): if any(kw in name_lower for kw in keywords): return cat if 'loop' in name_lower: return 'loop_other' return 'other' def get_duration_folder(duration: float) -> str: if duration <= 2.8: return "oneshots" elif duration <= 16.0: return "loops" else: return "textures" def organize_library(source_dir: str, dest_dir: str): logger.info(f"Scanning {source_dir}...") source_path = Path(source_dir) dest_path = Path(dest_dir) extensions = {'.wav', '.aif', '.aiff', '.mp3'} files_to_process = [] for ext in extensions: files_to_process.extend(source_path.rglob('*' + ext)) files_to_process.extend(source_path.rglob('*' + ext.upper())) if not files_to_process: logger.warning(f"No audio files found in {source_dir}") return logger.info(f"Found {len(files_to_process)} audio files. Reorganizing to {dest_dir}...") processed_count = 0 for f in list(set(files_to_process)): try: dur = get_duration(str(f)) if dur <= 0.1: # Skip tiny unreadable files continue dur_folder = get_duration_folder(dur) category = detect_category(f.stem) target_folder = dest_path / dur_folder / category target_folder.mkdir(parents=True, exist_ok=True) # Avoid overwriting names target_file = target_folder / f.name counter = 1 while target_file.exists(): target_file = target_folder / f"{f.stem}_{counter}{f.suffix}" counter += 1 shutil.copy2(str(f), str(target_file)) processed_count += 1 if processed_count % 50 == 0: logger.info(f"Processed {processed_count} files...") except Exception as e: logger.error(f"Error processing {f.name}: {e}") logger.info(f"Successfully organized {processed_count} files into {dest_dir}") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Organize an audio library by duration and type") parser.add_argument("--source", required=True, help="Raw sample library path") parser.add_argument("--dest", required=True, help="Destination structured library path") args = parser.parse_args() organize_library(args.source, args.dest)