"""
sample_selector.py - Selector inteligente de samples (Fase 4 mejorada)

Proporciona:
- Selección contextual basada en género, key, BPM
- Matching armónico entre samples
- Creación de kits de batería coherentes
- Recomendaciones basadas en compatibilidad
- Mapeo MIDI automático

Mejoras Fase 4:
- Ranking mejorado con múltiples factores de similitud
- Diversidad entre corridas con seeding determinista
- Validación de roles para evitar elecciones absurdas
- Penalización de familias repetidas
- Balance one-shots vs loops
- Soporte opcional para GPU/embeddings
"""

import random
import logging
import hashlib
import time
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass, field
from collections import defaultdict, deque

# Detección de numpy para cálculos vectorizados
try:
    import numpy as np
    NUMPY_AVAILABLE = True
except ImportError:
    NUMPY_AVAILABLE = False
    np = None

# Detección de GPU (cupy) para aceleración
try:
    import cupy as cp
    GPU_AVAILABLE = True
except ImportError:
    GPU_AVAILABLE = False
    cp = None

# Imports del sistema de samples
try:
    from .sample_manager import SampleManager, Sample, get_manager
    from .audio_analyzer import AudioAnalyzer, calculate_key_compatibility
    MANAGER_AVAILABLE = True
except ImportError:
    try:
        from sample_manager import SampleManager, Sample, get_manager
        from audio_analyzer import AudioAnalyzer, calculate_key_compatibility
        MANAGER_AVAILABLE = True
    except ImportError:
        MANAGER_AVAILABLE = False
        SampleManager = None
        Sample = None
        AudioAnalyzer = None
        calculate_key_compatibility = None

logger = logging.getLogger("SampleSelector")

# ============================================================================
# IMPORTS DE MEMORIA DE DIVERSIDAD (Phase 5)
# ============================================================================
try:
    from .diversity_memory import (
        get_diversity_memory,
        record_sample_usage,
        record_generation_complete,
        get_penalty_for_sample,
        detect_sample_family,
        DIVERSITY_MEMORY_AVAILABLE
    )
    DIVERSITY_MEMORY_AVAILABLE = True
except ImportError:
    try:
        from diversity_memory import (
            get_diversity_memory,
            record_sample_usage,
            record_generation_complete,
            get_penalty_for_sample,
            detect_sample_family,
        )
        DIVERSITY_MEMORY_AVAILABLE = True
    except ImportError:
        DIVERSITY_MEMORY_AVAILABLE = False
        get_diversity_memory = None
        record_sample_usage = None
        record_generation_complete = None
        get_penalty_for_sample = None
        detect_sample_family = None

# Memoria entre generaciones (legacy, mantener para compatibilidad)
# Ahora delegamos a diversity_memory.py para persistencia
_cross_generation_family_memory: Dict[str, int] = defaultdict(int)
_cross_generation_path_memory: Dict[str, int] = defaultdict(int)
_cross_generation_generation_count: int = 0

_recent_sample_diversity_memory: Dict[str, List[str]] = defaultdict(list)
RECENT_MEMORY_MAX_PER_ROLE = 50

def _get_cross_generation_memory() -> Dict[str, int]:
    """Retorna copia de la memoria entre generaciones."""
    return _cross_generation_family_memory.copy()

def _update_cross_generation_memory(families_used: Dict[str, int], paths_used: List[str] = None) -> None:
    """Actualiza memoria cross-generation con familias y paths usados.
    
    Esta función ahora delega principalmente a diversity_memory.py para
    persistencia persistente, pero mantiene la memoria en memoria para
    compatibilidad con código existente.
    """
    global _cross_generation_family_memory, _cross_generation_path_memory, _cross_generation_generation_count
    _cross_generation_generation_count += 1

    # Delegar al sistema de memoria persistente
    if DIVERSITY_MEMORY_AVAILABLE:
        try:
            record_generation_complete()
            logger.debug("Memoria cross-generation persistida (generación %d)", _cross_generation_generation_count)
        except Exception as e:
            logger.warning("Error actualizando memoria persistente: %s", e)

    # Mantener memoria en RAM para compatibilidad
    for family in list(_cross_generation_family_memory.keys()):
        _cross_generation_family_memory[family] = max(0, _cross_generation_family_memory[family] - 1)

    for path in list(_cross_generation_path_memory.keys()):
        _cross_generation_path_memory[path] = max(0, _cross_generation_path_memory[path] - 1)

    for family, count in families_used.items():
        _cross_generation_family_memory[family] += count

    if paths_used:
        for path in paths_used:
            _cross_generation_path_memory[path] += 1

    _cross_generation_family_memory = {k: v for k, v in _cross_generation_family_memory.items() if v > 0}
    _cross_generation_path_memory = {k: v for k, v in _cross_generation_path_memory.items() if v > 0}

def reset_cross_generation_memory() -> None:
    """Limpia toda la memoria cross-generation (RAM y persistente)."""
    global _cross_generation_family_memory, _cross_generation_path_memory, _cross_generation_generation_count, _recent_sample_diversity_memory
    
    # Limpiar memoria persistente
    if DIVERSITY_MEMORY_AVAILABLE:
        try:
            from .diversity_memory import reset_diversity_memory
            reset_diversity_memory()
            logger.info("Memoria de diversidad persistente reseteada")
        except ImportError:
            try:
                from diversity_memory import reset_diversity_memory
                reset_diversity_memory()
                logger.info("Memoria de diversidad persistente reseteada")
            except ImportError:
                pass
    
    # Limpiar memoria en RAM
    _cross_generation_family_memory.clear()
    _cross_generation_path_memory.clear()
    _cross_generation_generation_count = 0
    _recent_sample_diversity_memory.clear()

def add_to_recent_memory(role: str, sample_path: str) -> None:
    """Add a sample path to the recent memory for its role."""
    global _recent_sample_diversity_memory
    if role not in _recent_sample_diversity_memory:
        _recent_sample_diversity_memory[role] = []
    if sample_path not in _recent_sample_diversity_memory[role]:
        _recent_sample_diversity_memory[role].append(sample_path)
        if len(_recent_sample_diversity_memory[role]) > RECENT_MEMORY_MAX_PER_ROLE:
            _recent_sample_diversity_memory[role] = _recent_sample_diversity_memory[role][-RECENT_MEMORY_MAX_PER_ROLE:]

def get_recent_memory_penalty(role: str, sample_path: str) -> float:
    """Get penalty for a sample that was recently used for the same role.Returns 1.0 (no penalty) to 0.1 (strong penalty)."""
    global _recent_sample_diversity_memory
    role_samples = _recent_sample_diversity_memory.get(role, [])
    if sample_path not in role_samples:
        return 1.0
    position = role_samples.index(sample_path)
    recency = len(role_samples) - position
    if recency <= 5:
        return 0.1
    elif recency <= 10:
        return 0.25
    elif recency <= 20:
        return 0.5
    elif recency <= 30:
        return 0.7
    else:
        return 0.85

def get_recent_sample_diversity_state() -> Dict[str, List[str]]:
    """Get copy of recent sample diversity memory."""
    return {role: list(paths) for role, paths in _recent_sample_diversity_memory.items()}

def sync_cross_generation_memory_from_reference(families: Dict[str, int], paths: Dict[str, int]) -> None:
    """Sincroniza memoria cross-generation con reference_listener (para consistencia)."""
    global _cross_generation_family_memory, _cross_generation_path_memory
    for family, count in families.items():
        if count > 0:
            _cross_generation_family_memory[family] = max(
                _cross_generation_family_memory.get(family, 0), count
            )
    for path, count in paths.items():
        if count > 0:
            _cross_generation_path_memory[path] = max(
                _cross_generation_path_memory.get(path, 0), count
            )

def get_cross_generation_state() -> Tuple[Dict[str, int], Dict[str, int]]:
    """Retorna la memoria cross-generation actual (familias, paths)."""
    return (
        dict(_cross_generation_family_memory),
        dict(_cross_generation_path_memory)
    )


@dataclass
class SampleDecision:
    """Registro estructurado de decisión de selección de sample."""
    sample_name: str
    target_role: str
    final_score: float
    selected: bool
    rejection_reasons: list[str] = field(default_factory=list)
    bonus_factors: list[str] = field(default_factory=list)
    selection_index: int = -1  # Position in ranking

    def to_log_str(self) -> str:
        """Genera string loggable."""
        if self.selected:
            bonuses = ", ".join(self.bonus_factors) if self.bonus_factors else "none"
            return f"SELECTED: {self.sample_name} for {self.target_role} (score={self.final_score:.3f}, bonuses={bonuses})"
        else:
            reasons = ", ".join(self.rejection_reasons) if self.rejection_reasons else "low score"
            return f"REJECTED: {self.sample_name} for {self.target_role} ({reasons})"


class GenreProfile:
    """Perfil musical para un género específico"""

    def __init__(self,
                 name: str,
                 bpm_range: Tuple[int, int],
                 common_keys: List[str],
                 drum_pattern: str,
                 bass_style: str,
                 characteristics: List[str]):
        self.name = name
        self.bpm_range = bpm_range
        self.common_keys = common_keys
        self.drum_pattern = drum_pattern
        self.bass_style = bass_style
        self.characteristics = characteristics


# Perfiles de géneros musicales
GENRE_PROFILES = {
    'techno': GenreProfile(
        name='Techno',
        bpm_range=(125, 140),
        common_keys=['F#m', 'Am', 'Dm', 'Gm', 'Cm'],
        drum_pattern='four_on_floor',
        bass_style='rolling',
        characteristics=['driving', 'industrial', 'repetitive', 'dark']
    ),
    'industrial-techno': GenreProfile(
        name='Industrial Techno',
        bpm_range=(135, 150),
        common_keys=['F#m', 'Am', 'Dm'],
        drum_pattern='distorted_four',
        bass_style='aggressive',
        characteristics=['distorted', 'harsh', 'mechanical', 'dark']
    ),
    'minimal-techno': GenreProfile(
        name='Minimal Techno',
        bpm_range=(124, 130),
        common_keys=['F#m', 'Am', 'Em'],
        drum_pattern='sparse',
        bass_style='minimal',
        characteristics=['stripped', 'subtle', 'groove', 'reduced']
    ),
    'house': GenreProfile(
        name='House',
        bpm_range=(118, 128),
        common_keys=['Am', 'Fm', 'Cm', 'Gm', 'Dm'],
        drum_pattern='classic_house',
        bass_style='funky',
        characteristics=['soulful', 'groovy', 'warm', 'organic']
    ),
    'deep-house': GenreProfile(
        name='Deep House',
        bpm_range=(120, 124),
        common_keys=['Am', 'Fm', 'Dm', 'Gm'],
        drum_pattern='deep_house',
        bass_style='subby',
        characteristics=['deep', 'jazzy', 'warm', 'mellow']
    ),
    'tech-house': GenreProfile(
        name='Tech House',
        bpm_range=(124, 128),
        common_keys=['F#m', 'Am', 'Gm', 'Cm'],
        drum_pattern='bouncy',
        bass_style='groovy',
        characteristics=['bouncy', 'funky', 'percussive', 'club']
    ),
    'progressive-house': GenreProfile(
        name='Progressive House',
        bpm_range=(126, 132),
        common_keys=['Fm', 'Am', 'Dm', 'Gm'],
        drum_pattern='progressive',
        bass_style='driving',
        characteristics=['epic', 'buildup', 'melodic', 'anthem']
    ),
    'trance': GenreProfile(
        name='Trance',
        bpm_range=(135, 150),
        common_keys=['Fm', 'Am', 'Dm', 'Gm'],
        drum_pattern='trance',
        bass_style='rolling',
        characteristics=['euphoric', 'melodic', 'uplifting', 'energetic']
    ),
    'psytrance': GenreProfile(
        name='Psytrance',
        bpm_range=(140, 150),
        common_keys=['Fm', 'Gm', 'Am'],
        drum_pattern='psy',
        bass_style='acid',
        characteristics=['psychedelic', 'acid', 'complex', 'trippy']
    ),
    'drum-and-bass': GenreProfile(
        name='Drum & Bass',
        bpm_range=(160, 180),
        common_keys=['Am', 'Fm', 'Dm', 'Gm'],
        drum_pattern='breakbeat',
        bass_style='reese',
        characteristics=['fast', 'heavy', 'complex', 'energetic']
    ),
    'liquid-dnb': GenreProfile(
        name='Liquid Drum & Bass',
        bpm_range=(168, 174),
        common_keys=['Am', 'Fm', 'Dm'],
        drum_pattern='liquid',
        bass_style='musical',
        characteristics=['smooth', 'soulful', 'melodic', 'rolling']
    ),
    'ambient': GenreProfile(
        name='Ambient',
        bpm_range=(80, 110),
        common_keys=['C', 'Dm', 'Am', 'Em'],
        drum_pattern='none',
        bass_style='droning',
        characteristics=['atmospheric', 'textural', 'slow', 'ethereal']
    ),
}


# ============================================================================
# MAPEO DE ROLES VALIDOS - Evita elecciones absurdas
# ============================================================================
# Define qué tipos de samples son válidos para cada rol de drum
DRUM_ROLE_VALID_TYPES = {
    'kick': ['kick', 'bd', 'bass_drum', 'kickdrum', '808'],
    'snare': ['snare', 'snr', 'sd', 'rimshot', 'rim'],
    'clap': ['clap', 'clp', 'handclap'],
    'hat_closed': ['hat_closed', 'closed_hat', 'chh', 'hihat', 'hat'],
    'hat_open': ['hat_open', 'open_hat', 'ohh', 'hihat'],
    'hat_pedal': ['hat_pedal', 'pedal_hat', 'hihat'],
    'perc': ['perc', 'percussion', 'conga', 'bongo', 'timbale', 'tamb', 'shaker'],
    'tom': ['tom', 'tomtom'],
    'crash': ['crash', 'cymbal', 'china'],
    'ride': ['ride', 'cymbal', 'ride_bell'],
}

# Mapeo inverso: dado un sample_type, qué roles puede ocupar
SAMPLE_TYPE_TO_ROLES = defaultdict(list)
for role, valid_types in DRUM_ROLE_VALID_TYPES.items():
    for stype in valid_types:
        SAMPLE_TYPE_TO_ROLES[stype].append(role)

# Cooldown: families no se reusarán hasta después de N selecciones
COOLDOWN_WINDOW = 10  # Numero de selecciones antes de que una familia pueda reutilizarse

# Familias de samples para penalización de repeticiones
SAMPLE_FAMILIES = {
    # Drums - por fabricante/estilo
    '808': ['808', 'tr808', 'tr-808'],
    '909': ['909', 'tr909', 'tr-909'],
    'acoustic': ['acoustic', 'real', 'live', 'studio'],
    'electronic': ['electronic', 'digital', 'synthetic', 'synth'],
    'vintage': ['vintage', 'classic', 'old', 'retro'],
    'modern': ['modern', 'contemporary', 'new'],
    # Bass - por tipo
    'sub': ['sub', 'subby', 'subby'],
    'reese': ['reese', 'reese_bass'],
    'acid': ['acid', '303', 'tb303'],
    # Synth - por tipo
    'analog': ['analog', 'analogue', 'moog', 'oberheim'],
    'digital': ['digital', 'fm', 'wavetable', 'serum'],
    'vocal': ['vocal', 'voice', 'vox'],
}

# Umbrales para clasificación one-shot vs loop
ONESHOT_MAX_DURATION = 2.0  # segundos
LOOP_MIN_DURATION = 1.0     # segundos

# Preferencia one-shot vs loop por rol
# True = prefiere one-shot, False = prefiere loop, None = sin preferencia
ROLE_ONE_SHOT_PREFERENCE = {
    'kick': True,      # Debe ser one-shot
    'clap': True,      # Debe ser one-shot
    'hat': True,       # Debe ser one-shot
    'hat_closed': True,
    'hat_open': True,
    'snare': True,
    'bass_loop': False,  # Debe ser loop
    'vocal_loop': False, # Debe ser loop
    'perc_loop': False,
    'top_loop': False,
    'synth_loop': False,
}

# Patrones de rechazo duro para roles críticos
# Estos son ERRORES semanticos que nunca deberían pasar
# Expandidos para endurecimiento del sistema (Problema #4)
HARD_REJECT_PATTERNS = {
    'kick': {
        'exclude_keywords': [
            'roll', 'fill', 'loop', 'hat', 'snare', 'clap', 'vocal', 'synth', 'pad',
            'full drum', 'full mix', 'full_mix', 'fulldrum', 'fullmix', 'demo', 'song',
            'master', 'top loop', 'top_loop', 'drum loop', 'drum_loop', 'perc loop',
            'melodic', 'chord', 'stab', 'fx', 'riser', 'downlifter', 'atmos',
            'complete', 'mixed', 'stems', 'bounce', 'preview', 'final mix'
        ],
        'exclude_subcategories': ['snare', 'hat', 'clap', 'perc', 'fx', 'vocal', 'synth'],
        'max_duration': 2.0,  # Stricter: kicks longer than 2s are loops
        'must_contain_none': ['full', 'mix', 'demo', 'song', 'master'],
        'must_contain_one': ['kick', 'bd', 'bass_drum', '808', 'kickdrum', 'bass drum'],
    },
    'clap': {
        'exclude_keywords': [
            'roll', 'fill', 'loop', 'hat', 'kick', 'vocal', 'bass',
            'full drum', 'full mix', 'demo', 'song', 'master', 'top', 'perc loop',
            'snare roll', 'snare_roll', 'snareroll', 'complete', 'mixed', 'stems'
        ],
        'exclude_subcategories': ['kick', 'hat', 'fx', 'vocal', 'bass'],
        'must_contain_one': ['clap', 'hand', 'handclap'],
        'max_duration': 2.0,
        'must_contain_none': ['full', 'mix', 'snare roll', 'snare_roll'],
    },
    'hat': {
        'exclude_keywords': [
            'roll', 'kick', 'snare', 'clap', 'vocal', 'bass', 'synth', 'pad',
            'full drum', 'full mix', 'demo', 'song', 'master', 'bass loop',
            'top loop', 'drum loop', 'perc loop', 'full_mix', 'fulldrum',
            'complete', 'mixed', 'stems', 'kick drum', 'snare drum'
        ],
        'exclude_subcategories': ['kick', 'snare', 'clap', 'bass', 'vocal'],
        'max_duration': 1.5,
        'must_contain_none': ['full', 'mix', 'demo', 'complete'],
        'must_contain_one': ['hat', 'hh', 'hihat', 'hi-hat', 'cymbal', 'open hat', 'closed hat'],
    },
    'bass_loop': {
        'exclude_keywords': [
            'drum', 'hat', 'kick', 'snare', 'clap', 'perc', 'top loop', 'top_loop',
            'full drum', 'full mix', 'full_mix', 'fulldrum', 'fullmix', 'demo', 'song',
            'master', 'vocal', 'vocal loop', 'vocal_loop', 'fx', 'atmos', 'pad',
            'drum loop', 'drum_loop', 'perc loop', 'melodic', 'chord', 'synth loop',
            'complete', 'mixed', 'stems', 'bounce', 'preview', 'final mix'
        ],
        'exclude_subcategories': ['drum', 'perc', 'fx', 'vocal', 'hat'],
        'min_duration': 2.0,
        'must_contain_one': ['bass', 'sub', 'reese', '808', 'bassline', 'bass line'],
        'must_contain_none': ['full', 'mix', 'drum', 'top', 'vocal'],
    },
    'vocal_loop': {
        'exclude_keywords': [
            'drum', 'hat', 'kick', 'snare', 'bass', 'synth', 'pad', 'fx',
            'full drum', 'full mix', 'demo', 'song', 'master', 'one shot', 'oneshot',
            'shot', 'hit', 'stab', 'drum loop', 'bass loop', 'top loop',
            'complete', 'mixed', 'stems', 'bounce', 'preview', 'loop kit'
        ],
        'exclude_subcategories': ['drum', 'bass', 'perc', 'fx', 'hat'],
        'min_duration': 2.0,  # Must be at least 2s to be a loop
        'must_contain_one': ['vocal', 'vox', 'voice', 'sing', 'chorus', 'verse', 'chant', 'acapella'],
        'must_contain_none': ['full', 'mix', 'demo', 'shot', 'hit', 'one shot'],
    },
    'top_loop': {
        'exclude_keywords': [
            'bass', 'bass loop', 'vocal', 'vocal loop', 'synth loop', 'pad',
            'demo', 'song', 'master', 'fx', 'atmos', 'riser', 'downlifter',
            'melodic', 'chord', 'stab', 'complete', 'mixed', 'stems', 'snare roll'
        ],
        'exclude_subcategories': ['bass', 'vocal', 'fx', 'pad', 'synth'],
        'must_contain_one': ['top', 'perc', 'drum', 'groove', 'hat', 'shaker', 'conga', 'bongo', 'full drum'],
        'min_duration': 1.5,
        'must_contain_none': ['bass', 'vocal', 'synth loop'],
    },
    'fill_fx': {
        'exclude_keywords': [
            'kick', 'snare', 'hat', 'clap', 'bass', 'vocal', 'synth', 'pad',
            'full mix', 'demo', 'song', 'master', 'loop', 'groove', 'drum loop',
            'complete', 'mixed', 'stems', 'bass loop', 'vocal loop'
        ],
        'exclude_subcategories': ['kick', 'snare', 'hat', 'clap', 'bass', 'vocal'],
        'must_contain_one': ['fill', 'fx', 'riser', 'impact', 'crash', 'sweep', 'atmos', 'transition', 'downlifter'],
        'max_duration': 4.0,
    },
    'snare_roll': {
        'exclude_keywords': [
            'kick', 'hat', 'clap', 'bass', 'vocal', 'synth', 'pad',
            'full mix', 'demo', 'song', 'master', 'loop', 'groove', 'atmos',
            'complete', 'mixed', 'stems', 'one shot', 'drum loop', 'bass loop'
        ],
        'exclude_subcategories': ['kick', 'hat', 'clap', 'bass', 'vocal', 'fx'],
        'must_contain_one': ['snare', 'roll', 'fill', 'snareroll', 'buildup', 'build up'],
        'max_duration': 4.0,
    },
    'atmos_fx': {
        'exclude_keywords': [
            'kick', 'snare', 'hat', 'clap', 'bass', 'vocal loop',
            'full mix', 'demo', 'song', 'master', 'loop', 'groove', 'drum loop',
            'complete', 'mixed', 'stems', 'snare roll', 'fill', 'perc loop'
        ],
        'exclude_subcategories': ['kick', 'snare', 'hat', 'clap', 'bass'],
        'must_contain_one': ['atmos', 'pad', 'drone', 'ambience', 'texture', 'fx', 'riser', 'noise', 'ambient'],
        'min_duration': 2.0,
    },
    'crash_fx': {
        'must_contain_one': ['crash', 'impact', 'cymbal', 'ride', 'uplifter', 'downlifter'],
        'exclude_keywords': ['loop', 'bass', 'vocal', 'kick', 'snare', 'full mix', 'drum loop', 'complete kit'],
        'max_duration': 3.0,
    },
    'synth_loop': {
        'exclude_keywords': [
            'drum', 'kick', 'snare', 'hat', 'vocal', 'bass loop', 'full mix',
            'demo', 'song', 'master', 'complete', 'mixed', 'stems', 'drum loop',
            'perc loop', 'top loop', 'one shot'
        ],
        'must_contain_one': ['synth', 'lead', 'pad', 'chord', 'arp', 'pluck', 'melody', 'hook', 'sequence'],
        'min_duration': 1.5,
    },
}

# Keywords sospechosos que penalizan (pero no rechazan) el score
# Penalización soft del 30% por cada keyword encontrado
SUSPICIOUS_KEYWORDS = {
    'kick': ['full', 'mix', 'demo', 'song', 'master', 'complete', 'stereo', 'stems', 
             'bounce', 'preview', 'final', 'mixed', 'kit', 'pack'],
    'clap': ['full', 'mix', 'demo', 'song', 'snare roll', 'snare_roll', 'fill', 'stems',
             'bounce', 'preview', 'final', 'mixed', 'loop', 'groove', 'top loop'],
    'hat': ['full', 'mix', 'demo', 'song', 'loop', 'complete', 'stems', 'full kit',
            'bounce', 'preview', 'final', 'mixed', 'kick', 'snare', 'bass'],
    'bass_loop': ['full', 'mix', 'demo', 'vocal', 'top', 'drum loop', 'full drum', 'stems',
                  'bounce', 'preview', 'final', 'mixed', 'perc', 'snare', 'hat', 'kick'],
    'vocal_loop': ['full', 'mix', 'demo', 'shot', 'hit', 'one shot', 'drum', 'stems',
                   'bounce', 'preview', 'final', 'mixed', 'bass loop', 'loop kit'],
    'top_loop': ['bass', 'vocal', 'synth loop', 'demo', 'stems', 'snare roll',
                 'bounce', 'preview', 'final', 'mixed', 'percussion', 'hat loop'],
    'fill_fx': ['loop', 'groove', 'kick', 'snare', 'bass', 'vocal', 'stems',
                'bounce', 'preview', 'final', 'mixed', 'drum loop'],
    'snare_roll': ['loop', 'groove', 'kick', 'hat', 'bass', 'vocal', 'atmos', 'stems',
                   'bounce', 'preview', 'final', 'mixed', 'clap'],
    'atmos_fx': ['kick', 'snare', 'hat', 'clap', 'bass', 'loop', 'groove', 'stems',
                 'bounce', 'preview', 'final', 'mixed', 'drum loop', 'vocal loop'],
    'synth_loop': ['drum', 'vocal', 'bass loop', 'full mix', 'demo', 'stems',
                   'bounce', 'preview', 'final', 'mixed', 'one shot', 'hit'],
    'crash_fx': ['loop', 'bass', 'vocal', 'kick', 'snare', 'full mix', 'stems',
                 'bounce', 'preview', 'final', 'mixed', 'hat loop', 'top loop'],
    'perc_loop': ['bass', 'vocal', 'synth', 'demo', 'full mix', 'stems',
                  'bounce', 'preview', 'final', 'mixed', 'snare roll'],
}

# Keywords requeridos por rol - Validación positiva
ROLE_REQUIRED_KEYWORDS = {
    'kick': ['kick', 'bd', 'bass_drum', '808', 'kickdrum', 'bass drum'],
    'snare': ['snare', 'snr', 'sd', 'rim', 'rimshot'],
    'clap': ['clap', 'clp', 'handclap', 'hand clap'],
    'hat': ['hat', 'hh', 'hihat', 'hi hat', 'hi-hat', 'closed hat', 'open hat', 'cymbal'],
    'bass_loop': ['bass', 'sub', 'reese', '808', 'bassline', 'bass line'],
    'vocal_loop': ['vocal', 'vox', 'voice', 'acapella', 'chant', 'sing'],
    'top_loop': ['top', 'perc', 'drum', 'groove', 'hat', 'shaker', 'full drum'],
    'synth_loop': ['synth', 'lead', 'pad', 'chord', 'arp', 'pluck', 'melody'],
    'crash_fx': ['crash', 'cymbal', 'impact', 'ride', 'uplifter'],
    'fill_fx': ['fill', 'transition', 'tom', 'break', 'riser'],
    'snare_roll': ['roll', 'snare', 'build', 'buildup', 'snareroll'],
    'atmos_fx': ['atmos', 'drone', 'ambient', 'texture', 'noise', 'sweep'],
    'vocal_shot': ['vocal', 'vox', 'shot', 'chop', 'stab', 'importante'],
    'perc_loop': ['perc', 'percussion', 'shaker', 'conga', 'bongo'],
}

# ============================================================================
# SISTEMA DE EXCLUSIONES POR ROL - Problema #4
# Define qué samples NO son apropiados para cada rol
# ============================================================================
ROLE_EXCLUSION_PATTERNS = {
    'kick': {
        'exclude_keywords': [
            'full drum', 'full_mix', 'fullmix', 'fulldrum', 'full mix', 'demo', 'song', 
            'master', 'top loop', 'drum loop', 'snare roll', 'fill', 'hat loop', 
            'vocal loop', 'complete kit', 'full kit', 'mixed', 'stems', 'bounce', 'preview',
            'snare', 'clap', 'hat', 'bass loop', 'vocal', 'synth', 'pad', 'atmos'
        ],
        'max_duration': 2.5,  # Reject if longer than 2.5s
        'min_required_keywords': ['kick', 'bd', 'bass_drum', '808', 'kickdrum'],
    },
    'clap': {
        'exclude_keywords': [
            'full drum', 'full_mix', 'fullmix', 'full mix', 'demo', 'song', 'master',
            'snare roll', 'snare_roll', 'hat loop', 'kick loop', 'top loop', 'drum loop',
            'bass loop', 'complete kit', 'full kit', 'mixed', 'stems', 'bounce', 'preview',
            'kick', 'hat', 'vocal', 'bass', 'synth', 'pad'
        ],
        'max_duration': 2.0,
        'min_required_keywords': ['clap', 'hand', 'handclap'],
    },
    'hat': {
        'exclude_keywords': [
            'full drum', 'full_mix', 'fullmix', 'full mix', 'demo', 'song', 'master',
            'kick loop', 'snare loop', 'bass loop', 'vocal loop', 'complete', 'full kit',
            'mixed', 'stems', 'bounce', 'preview', 'kick', 'snare', 'clap', 'bass'
        ],
        'max_duration': 2.0,
        'min_required_keywords': ['hat', 'hh', 'hihat', 'cymbal', 'open hat', 'closed hat'],
    },
    'bass_loop': {
        'exclude_keywords': [
            'full drum', 'full_mix', 'fullmix', 'full mix', 'demo', 'song', 'master',
            'top loop', 'vocal loop', 'vocal_loop', 'drum loop', 'hat loop', 'snare loop',
            'perc loop', 'fx loop', 'atmos', 'complete', 'mixed', 'stems', 'bounce', 'preview',
            'kick', 'snare', 'hat', 'vocal'
        ],
        'min_duration': 1.5,
        'min_required_keywords': ['bass', 'sub', 'reese', '808', 'bassline', 'bass line'],
    },
    'vocal_loop': {
        'exclude_keywords': [
            'full drum', 'full_mix', 'fullmix', 'full mix', 'demo', 'song', 'master',
            'one shot', 'oneshot', 'hit', 'stab', 'drum loop', 'bass loop', 'top loop',
            'complete', 'mixed', 'stems', 'bounce', 'preview', 'kick', 'snare', 'hat', 'bass'
        ],
        'min_duration': 1.5,
        'min_required_keywords': ['vocal', 'vox', 'voice', 'sing', 'chant', 'acapella', 'phrase'],
    },
    'top_loop': {
        'exclude_keywords': [
            'bass loop', 'bass_loop', 'vocal loop', 'vocal_loop', 'demo', 'song', 'master',
            'synth loop', 'pad', 'atmos', 'riser', 'downlifter', 'complete', 'mixed', 
            'stems', 'bounce', 'preview', 'bass', 'vocal', 'synth'
        ],
        'min_duration': 1.0,
        'min_required_keywords': ['top', 'perc', 'drum', 'groove', 'hat', 'full drum', 'drum loop'],
    },
    'fill_fx': {
        'exclude_keywords': [
            'kick', 'snare', 'hat', 'clap', 'bass', 'vocal', 'full mix', 'demo', 'song',
            'master', 'loop', 'groove', 'complete', 'mixed', 'stems', 'bounce', 'preview',
            'drum loop', 'bass loop', 'vocal loop'
        ],
        'max_duration': 6.0,
        'min_required_keywords': ['fill', 'fx', 'riser', 'impact', 'crash', 'sweep', 'atmos', 'transition'],
    },
    'snare_roll': {
        'exclude_keywords': [
            'kick', 'hat', 'clap', 'bass', 'vocal', 'full mix', 'demo', 'song', 'master',
            'atmos', 'pad', 'complete', 'mixed', 'stems', 'bounce', 'preview', 'one shot',
            'loop', 'groove'
        ],
        'max_duration': 6.0,
        'min_required_keywords': ['roll', 'snare', 'fill', 'buildup', 'build up', 'snareroll'],
    },
    'atmos_fx': {
        'exclude_keywords': [
            'kick', 'snare', 'hat', 'clap', 'bass', 'full mix', 'demo', 'song', 'master',
            'drum loop', 'complete', 'mixed', 'stems', 'bounce', 'preview', 'snare roll',
            'fill', 'perc loop', 'vocal'
        ],
        'min_duration': 1.5,
        'min_required_keywords': ['atmos', 'pad', 'drone', 'ambience', 'texture', 'noise', 'ambient'],
    },
    'crash_fx': {
        'exclude_keywords': [
            'full mix', 'demo', 'song', 'master', 'loop', 'complete', 'mixed', 'stems',
            'bounce', 'preview', 'bass', 'vocal', 'kick', 'snare'
        ],
        'max_duration': 4.0,
        'min_required_keywords': ['crash', 'cymbal', 'impact', 'ride', 'uplifter', 'downlifter'],
    },
}


def _check_role_exclusion(sample_name: str, role: str) -> Tuple[bool, str]:
    """
    Verifica si un sample debe ser excluido para un rol específico.
    
    Returns:
        (excluded, reason) - True si debe ser excluido, False si pasa
    """
    role_lower = role.lower()
    if role_lower not in ROLE_EXCLUSION_PATTERNS:
        return False, ""
    
    patterns = ROLE_EXCLUSION_PATTERNS[role_lower]
    name_lower = sample_name.lower()
    
    # Check excluded keywords
    for keyword in patterns.get('exclude_keywords', []):
        if keyword in name_lower:
            return True, f"excluded keyword '{keyword}'"
    
    # Check required keywords
    required = patterns.get('min_required_keywords', [])
    if required:
        found = any(kw in name_lower for kw in required)
        if not found:
            return True, f"missing required keyword (need one of: {required})"
    
    return False, ""

ROLE_DURATION_RANGES = {
    'kick': (0.05, 2.5),
    'snare': (0.05, 3.0),
    'clap': (0.05, 2.0),
    'hat': (0.05, 2.0),
    'bass_loop': (1.5, 32.0),
    'vocal_loop': (1.0, 32.0),
    'top_loop': (0.75, 32.0),
    'synth_loop': (0.75, 32.0),
    'crash_fx': (0.3, 8.0),
    'fill_fx': (0.3, 12.0),
    'snare_roll': (0.5, 12.0),
    'atmos_fx': (0.5, 32.0),
    'vocal_shot': (0.1, 4.0),
    'perc_loop': (0.75, 32.0),
}


def _extract_sample_family(sample_name: str) -> str:
    """Extrae la familia de un sample basado en su nombre."""
    name_lower = sample_name.lower()
    for family, keywords in SAMPLE_FAMILIES.items():
        for kw in keywords:
            if kw in name_lower:
                return family
    return 'unknown'


def _is_oneshot(sample: 'Sample') -> bool:
    """Determina si un sample es one-shot basado en duración y nombre."""
    name_lower = sample.name.lower()
    duration = sample.duration or 0

    # Indicadores de one-shot en el nombre
    oneshot_keywords = ['one shot', 'oneshot', 'hit', 'single', 'stab']
    if any(kw in name_lower for kw in oneshot_keywords):
        return True

    # Indicadores de loop en el nombre
    loop_keywords = ['loop', 'groove', 'pattern', 'sequence']
    if any(kw in name_lower for kw in loop_keywords):
        return False

    # Por duración
    if duration > 0:
        return duration < ONESHOT_MAX_DURATION

    # Default: asumir one-shot para drums
    return sample.category == 'drums'


# ============================================================================
# MAPEO MIDI
# ============================================================================

# Mapeo de notas MIDI para diferentes tipos de samples
MIDI_NOTE_MAPPING = {
    # Drums (General MIDI)
    'kick': 36,        # C1
    'kick_deep': 35,   # B0
    'snare': 38,       # D1
    'snare_rim': 37,   # C#1
    'clap': 39,        # D#1 / también 50 (D2)
    'hat_closed': 42,  # F#1
    'hat_open': 46,    # A#1
    'hat_pedal': 44,   # G#1
    'tom_low': 41,     # F1
    'tom_mid': 47,     # B1
    'tom_high': 50,    # D2
    'crash': 49,       # C#2
    'ride': 51,        # D#2
    'ride_bell': 53,   # F2
    'perc_low': 43,    # G1
    'perc_mid': 45,    # A1
    'perc_high': 48,   # C2
    'shaker': 54,      # F#2
    'tambourine': 54,  # F#2
    'cowbell': 56,     # G#2

    # Instrumentos melódicos (rango usable)
    'bass': list(range(36, 48)),      # C1-B1
    'lead': list(range(60, 84)),      # C4-B6
    'pad': list(range(48, 72)),       # C2-B4
    'pluck': list(range(60, 84)),     # C4-B6
    'arp': list(range(60, 84)),       # C4-B6
    'chord': list(range(48, 72)),     # C2-B4
    'vocal': list(range(60, 84)),     # C4-B6
}


@dataclass
class DrumKit:
    """Kit de batería completo"""
    name: str
    kick: Optional[Sample] = None
    snare: Optional[Sample] = None
    clap: Optional[Sample] = None
    hat_closed: Optional[Sample] = None
    hat_open: Optional[Sample] = None
    perc1: Optional[Sample] = None
    perc2: Optional[Sample] = None
    tom: Optional[Sample] = None
    crash: Optional[Sample] = None

    def to_dict(self) -> Dict[str, Any]:
        """Convierte el kit a diccionario"""
        return {
            'name': self.name,
            'kick': self.kick.to_dict() if self.kick else None,
            'snare': self.snare.to_dict() if self.snare else None,
            'clap': self.clap.to_dict() if self.clap else None,
            'hat_closed': self.hat_closed.to_dict() if self.hat_closed else None,
            'hat_open': self.hat_open.to_dict() if self.hat_open else None,
            'perc1': self.perc1.to_dict() if self.perc1 else None,
            'perc2': self.perc2.to_dict() if self.perc2 else None,
            'tom': self.tom.to_dict() if self.tom else None,
            'crash': self.crash.to_dict() if self.crash else None,
        }

    def get_midi_mapping(self) -> Dict[int, Optional[Sample]]:
        """Retorna mapeo de notas MIDI a samples"""
        mapping = {}
        if self.kick:
            mapping[MIDI_NOTE_MAPPING['kick']] = self.kick
        if self.snare:
            mapping[MIDI_NOTE_MAPPING['snare']] = self.snare
        if self.clap:
            mapping[MIDI_NOTE_MAPPING['clap']] = self.clap
        if self.hat_closed:
            mapping[MIDI_NOTE_MAPPING['hat_closed']] = self.hat_closed
        if self.hat_open:
            mapping[MIDI_NOTE_MAPPING['hat_open']] = self.hat_open
        if self.tom:
            mapping[MIDI_NOTE_MAPPING['tom_mid']] = self.tom
        if self.crash:
            mapping[MIDI_NOTE_MAPPING['crash']] = self.crash
        return mapping


@dataclass
class InstrumentGroup:
    """Grupo de instrumentos para un estilo"""
    genre: str
    key: str
    bpm: float
    drums: DrumKit = field(default_factory=lambda: DrumKit(name="default"))
    bass: List[Sample] = field(default_factory=list)
    synths: List[Sample] = field(default_factory=list)
    fx: List[Sample] = field(default_factory=list)
    vocals: List[Sample] = field(default_factory=list)

    def to_dict(self) -> Dict[str, Any]:
        return {
            'genre': self.genre,
            'key': self.key,
            'bpm': self.bpm,
            'drums': self.drums.to_dict(),
            'bass': [s.to_dict() for s in self.bass],
            'synths': [s.to_dict() for s in self.synths],
            'fx': [s.to_dict() for s in self.fx],
            'vocals': [s.to_dict() for s in self.vocals],
        }


class SampleSelector:
    """
    Selector inteligente de samples (Fase 4 mejorada).

    Proporciona selección contextual basada en:
    - Género musical
    - Tonalidad (key) y compatibilidad armónica
    - BPM y tempo
    - Estilo y características

    Mejoras Fase 4:
    - Ranking multi-factor con scoring vectorizado
    - Seeding determinista para reproducibilidad
    - Validación de roles para evitar elecciones absurdas
    - Penalización de familias repetidas
    - Balance one-shots vs loops
    """

    def __init__(self, manager: Optional[SampleManager] = None, session_seed: Optional[int] = None):
        """
        Inicializa el selector.

        Args:
            manager: Instancia de SampleManager (usa global si None)
            session_seed: Semilla para reproducibilidad dentro de una sesión
        """
        if manager is None and MANAGER_AVAILABLE:
            manager = get_manager()

        self.manager = manager
        self.analyzer = AudioAnalyzer() if MANAGER_AVAILABLE else None

        # Historial de samples usados (ID -> timestamp)
        self._recent_sample_ids = deque(maxlen=100)
        # Historial de familias usadas (family -> count)
        self._recent_families = defaultdict(int)
        # Historial de roles usados (role -> [sample_ids])
        self._role_history = defaultdict(list)

        # Tracking de cooldown para familias
        self._family_last_used: Dict[str, int] = {}  # family -> selection_index
        self._selection_counter: int = 0  # Increment each time a sample is selected

        # Semilla de sesión para diversidad controlada
        self._session_seed = session_seed or int(time.time() * 1000) % (2**31)

        # Preferencias de balance one-shot vs loop
        self._oneshot_preference = 0.7  # 70% preferencia one-shots para drums
        self._loop_preference = 0.6     # 60% preferencia loops para synths

        # Configuración de GPU
        self._use_gpu = GPU_AVAILABLE
        if self._use_gpu:
            logger.info("GPU disponible, usando aceleración para cálculos vectorizados")

        # Decision logging
        self._decision_log: list[SampleDecision] = []
        self._log_decisions: bool = False  # Por defecto False para no impactar performance

    def _generate_selection_seed(self, context: str = "") -> int:
        """
        Genera una semilla determinista para cada selección.
        Combina session_seed, contador y contexto.
        """
        self._selection_counter += 1
        seed_data = f"{self._session_seed}_{self._selection_counter}_{context}"
        return int(hashlib.md5(seed_data.encode()).hexdigest()[:8], 16)

    def _calculate_sample_score(self,
                                 sample: 'Sample',
                                 target_key: Optional[str] = None,
                                 target_bpm: Optional[float] = None,
                                 target_role: Optional[str] = None,
                                 target_genre: Optional[str] = None,
                                 prefer_oneshot: Optional[bool] = None) -> float:
        """
        Calcula un score completo para un sample basado en múltiples factores.

        Factores:
        - Rating del sample (peso: 0.15)
        - Compatibilidad de key (peso: 0.20)
        - Compatibilidad de BPM (peso: 0.15)
        - Ajuste de género (peso: 0.10)
        - Validación de rol (peso: 0.15)
        - Penalización por repetición (peso: 0.10)
        - Balance one-shot/loop (peso: 0.10)
        - Energía y características (peso: 0.05)

        Returns:
            Score normalizado entre 0 y 1
        """
        score = 0.0
        weights = 0.0

        # 1. Rating del sample (0-5 -> 0-1)
        rating_score = min(1.0, (sample.rating or 0) / 5.0)
        score += rating_score * 0.15
        weights += 0.15

        # 2. Compatibilidad de key
        if target_key and sample.key:
            if MANAGER_AVAILABLE:
                key_compat = calculate_key_compatibility(target_key, sample.key)
            else:
                key_compat = 1.0 if sample.key == target_key else 0.5
            score += key_compat * 0.20
            weights += 0.20
        elif target_key:
            # Sin key info, score neutral
            score += 0.5 * 0.20
            weights += 0.20

        # 3. Compatibilidad de BPM
        if target_bpm and sample.bpm:
            bpm_diff = abs(sample.bpm - target_bpm)
            if bpm_diff == 0:
                bpm_score = 1.0
            elif bpm_diff <= 3:
                bpm_score = 0.95
            elif bpm_diff <= 6:
                bpm_score = 0.85
            elif bpm_diff <= 10:
                bpm_score = 0.70
            else:
                bpm_score = max(0.2, 1.0 - (bpm_diff / 30))
            score += bpm_score * 0.15
            weights += 0.15
        elif target_bpm:
            score += 0.5 * 0.15
            weights += 0.15

        # 4. Ajuste de género
        if target_genre and sample.genres:
            genre_lower = target_genre.lower().replace(' ', '-')
            sample_genres_lower = [g.lower().replace(' ', '-') for g in sample.genres]
            if genre_lower in sample_genres_lower:
                genre_score = 1.0
            elif any(g in genre_lower or genre_lower in g for g in sample_genres_lower):
                genre_score = 0.7
            else:
                genre_score = 0.3
            score += genre_score * 0.10
            weights += 0.10

        # 5. Validación de rol (EVITA ELECCIONES ABSURDAS)
        if target_role:
            role_score = self._validate_sample_for_role(sample, target_role)
            score += role_score * 0.15
            weights += 0.15

        # 6. Penalización por repetición reciente
        repetition_penalty = self._calculate_repetition_penalty(sample)
        score += repetition_penalty * 0.10
        weights += 0.10

        # 7. Balance one-shot vs loop
        if prefer_oneshot is not None:
            is_oneshot = _is_oneshot(sample)
            if prefer_oneshot and is_oneshot:
                balance_score = 0.9
            elif not prefer_oneshot and not is_oneshot:
                balance_score = 0.9
            else:
                balance_score = 0.5
            score += balance_score * 0.10
            weights += 0.10

        # Bonus por tipo correcto (one-shot vs loop) para roles críticos
        if target_role and target_role.lower() in ROLE_ONE_SHOT_PREFERENCE:
            prefers_oneshot = ROLE_ONE_SHOT_PREFERENCE[target_role.lower()]
            is_oneshot = _is_oneshot(sample)
            if prefers_oneshot == is_oneshot:
                score *= 1.2  # 20% bonus por tipo correcto
                weights += 0.1

        # 8. Energía y características espectrales
        if sample.rms_energy > 0:
            # Preferir samples con buena energía (no muy bajos ni saturados)
            energy_score = min(1.0, sample.rms_energy * 2)
            score += energy_score * 0.05
            weights += 0.05

        # 9. Cooldown por familia (penaliza familias recientemente usadas)
        if target_role and target_role.lower() in ['kick', 'clap', 'hat', 'bass_loop', 'vocal_loop']:
            family = _extract_sample_family(sample.name)
            cooldown_penalty = self._get_family_cooldown_penalty(family)
            score *= cooldown_penalty
            weights += 0.15  # Peso significativo para cooldown
            if cooldown_penalty < 0.5:
                logger.debug("COOLDOWN: family '%s' has cooldown penalty %.2f (used %d selections ago)",
                            family, cooldown_penalty, self._selection_counter - self._family_last_used.get(family, 0))

        # 10. Cross-generation penalty para roles críticos
        if target_role and target_role.lower() in ['kick', 'clap', 'hat', 'bass_loop', 'vocal_loop', 'top_loop', 'synth_loop', 'snare']:
            family = _extract_sample_family(sample.name)
            sample_path = getattr(sample, 'path', '') or getattr(sample, 'file_path', '') or ''
            cross_penalty = self._get_cross_generation_penalty(family, sample_path, target_role.lower())
            if cross_penalty < 1.0:
                score *= cross_penalty
                weights += 0.12
                logger.debug("CROSS_GEN: family '%s' has cross-gen penalty %.2f for role '%s' (used in %d prev generations)",
                            family, cross_penalty, target_role.lower(), _cross_generation_family_memory.get(family, 0))

        # Normalizar
        return score / weights if weights > 0 else 0.5

    def _validate_sample_for_role(self, sample: 'Sample', target_role: str) -> float:
        """
        Valida si un sample es apropiado para un rol específico.
        Retorna un score de 0 a 1, donde 0 significa "completamente inapropiado".

        Esto EVITA ELECCIONES ABSURDAS como:
        - Snare roll donde va clap
        - Hi-hat donde va kick
        - Vocal sample en drum kit
        """
        target_role_lower = target_role.lower()
        sample_name_lower = sample.name.lower()
        sample_type_lower = (sample.sample_type or '').lower()
        sample_subcat_lower = (sample.subcategory or '').lower()
        sample_duration = getattr(sample, 'duration', None) or 0

        # Check using old DRUM_ROLE_VALID_TYPES (legacy support)
        valid_types = DRUM_ROLE_VALID_TYPES.get(target_role_lower, [])
        for vtype in valid_types:
            if vtype in sample_type_lower or sample_type_lower in vtype:
                return 1.0
            if vtype in sample_subcat_lower or sample_subcat_lower in vtype:
                return 0.95

        for vtype in valid_types:
            if vtype in sample_name_lower:
                return 0.9

        # Check using ROLE_REQUIRED_KEYWORDS for required keywords validation
        required_keywords = ROLE_REQUIRED_KEYWORDS.get(target_role_lower, [])
        if required_keywords:
            for kw in required_keywords:
                if kw in sample_name_lower:
                    return 0.85
                if kw in sample_type_lower:
                    return 0.80

        duration_min, duration_max = ROLE_DURATION_RANGES.get(target_role_lower, (0.0, 999.0))
        if sample_duration > 0 and duration_max < 999.0:
            if duration_min <= sample_duration <= duration_max:
                pass
            elif sample_duration < duration_min:
                return 0.25
            elif sample_duration > duration_max:
                return 0.20

        if sample.category == 'drums':
            return 0.30

        exclusive_roles = {
            'kick': ['vocal', 'bass', 'synth', 'pad', 'fx'],
            'snare': ['vocal', 'bass', 'synth'],
            'clap': ['vocal', 'bass', 'kick'],
            'hat_closed': ['vocal', 'bass', 'kick'],
            'hat_open': ['vocal', 'bass', 'kick'],
            'bass_loop': ['drum', 'vocal', 'fx'],
            'vocal_loop': ['drum', 'bass', 'kick'],
        }

        excluded = exclusive_roles.get(target_role_lower, [])
        for excluded_type in excluded:
            if excluded_type in sample_name_lower:
                return 0.0

        return 0.15

    def _hard_reject_check(self, sample: 'Sample', target_role: str) -> tuple[bool, str]:
        """
        Verifica rechazo duro para roles críticos.

        Retorna (should_reject, reason) donde:
        - should_reject: True si el sample debe ser rechazado completamente
        - reason: string explicando por qué

        Esto es más estricto que _validate_sample_for_role() y captura
        casos que son claramente errores semánticos.
        
        Mejorado para Problema #4:
        - Integra ROLE_EXCLUSION_PATTERNS
        - Logging detallado de rechazos
        """
        target_role_lower = target_role.lower()
        sample_name_lower = sample.name.lower()
        sample_duration = getattr(sample, 'duration', None)

        # 1. Check ROLE_EXCLUSION_PATTERNS (nuevo sistema endurecido)
        excluded, exclusion_reason = _check_role_exclusion(sample.name, target_role)
        if excluded:
            logger.debug("HARD_REJECT (exclusion): %s for role '%s': %s", 
                        sample.name, target_role, exclusion_reason)
            return True, f"ROLE_EXCLUSION: {exclusion_reason}"

        # 2. Check HARD_REJECT_PATTERNS (sistema existente)
        if target_role_lower not in HARD_REJECT_PATTERNS:
            # Fallback a rangos de duración si no hay patrones específicos
            duration_min, duration_max = ROLE_DURATION_RANGES.get(target_role_lower, (0.0, 999.0))
            if sample_duration and duration_max < 999.0:
                if sample_duration < duration_min:
                    return True, f"duration {sample_duration:.1f}s below min {duration_min}s for {target_role}"
                if sample_duration > duration_max:
                    return True, f"duration {sample_duration:.1f}s exceeds max {duration_max}s for {target_role}"
            return False, ""

        patterns = HARD_REJECT_PATTERNS[target_role_lower]
        sample_type_lower = (sample.sample_type or '').lower()
        sample_subcat_lower = (sample.subcategory or '').lower()

        # Check excluded keywords
        for kw in patterns.get('exclude_keywords', []):
            if kw in sample_name_lower:
                logger.debug("HARD_REJECT (keyword): %s for role '%s': contains '%s'", 
                            sample.name, target_role, kw)
                return True, f"contains excluded keyword '{kw}'"

        # Check excluded subcategories
        for subcat in patterns.get('exclude_subcategories', []):
            if subcat in sample_subcat_lower or subcat in sample_type_lower:
                logger.debug("HARD_REJECT (subcat): %s for role '%s': subcategory '%s'", 
                            sample.name, target_role, subcat)
                return True, f"has excluded subcategory '{subcat}'"

        # Check duration constraints
        max_duration = patterns.get('max_duration')
        min_duration = patterns.get('min_duration')
        if sample_duration:
            if max_duration and sample_duration > max_duration:
                logger.debug("HARD_REJECT (duration): %s for role '%s': %.1fs > max %.1fs", 
                            sample.name, target_role, sample_duration, max_duration)
                return True, f"duration {sample_duration:.1f}s exceeds max {max_duration}s"
            if min_duration and sample_duration < min_duration:
                logger.debug("HARD_REJECT (duration): %s for role '%s': %.1fs < min %.1fs", 
                            sample.name, target_role, sample_duration, min_duration)
                return True, f"duration {sample_duration:.1f}s below min {min_duration}s"

        # Check must_contain requirements
        must_contain = patterns.get('must_contain_one', [])
        if must_contain:
            found = any(kw in sample_name_lower or kw in sample_type_lower for kw in must_contain)
            if not found:
                logger.debug("HARD_REJECT (missing): %s for role '%s': needs one of %s", 
                            sample.name, target_role, must_contain)
                return True, f"does not contain any of: {must_contain}"

        # Check must_contain_none keywords
        for kw in patterns.get('must_contain_none', []):
            if kw in sample_name_lower:
                logger.debug("HARD_REJECT (forbidden): %s for role '%s': contains '%s'", 
                            sample.name, target_role, kw)
                return True, f"contains excluded keyword '{kw}'"

        return False, ""


    def _validate_loop_preference(self, sample: 'Sample', target_role: str) -> tuple[bool, str]:
        """
        Valida preferencia de one-shot vs loop para roles críticos.

        Retorna (is_valid, reason) donde:
        - is_valid: True si el sample cumple la preferencia
        - reason: string explicando violación si aplica
        """
        target_role_lower = target_role.lower()

        if target_role_lower not in ROLE_ONE_SHOT_PREFERENCE:
            return True, ""  # No hay preferencia definida

        prefers_oneshot = ROLE_ONE_SHOT_PREFERENCE[target_role_lower]
        is_oneshot = _is_oneshot(sample)

        if prefers_oneshot and not is_oneshot:
            return False, f"role requires one-shot but sample is loop (duration={sample.duration:.1f}s)"
        elif not prefers_oneshot and is_oneshot:
            return False, f"role requires loop but sample is one-shot (duration={sample.duration:.1f}s)"

        return True, ""

    def _calculate_repetition_penalty(self, sample: 'Sample') -> float:
        """
        Calcula penalización por repetición de sample y familia.
        Retorna 1.0 (sin penalización) a 0.1 (penalización máxima).
        """
        penalty = 1.0

        # Penalizar sample ya usado
        if getattr(sample, "id", None) in self._recent_sample_ids:
            penalty *= 0.3

        # Penalizar familia repetida
        family = _extract_sample_family(sample.name)
        family_count = self._recent_families.get(family, 0)
        if family_count > 0:
            # Penalización decreciente: 0.85, 0.70, 0.55, ...
            penalty *= max(0.3, 1.0 - (family_count * 0.15))

        return penalty

    def _remember_sample(self, sample: Optional['Sample'], role: str = None) -> None:
        """Registra un sample como usado para evitar repeticiones.
        
        Ahora integra con diversity_memory.py para persistencia cross-generation.
        """
        if sample is not None and getattr(sample, "id", None):
            self._recent_sample_ids.append(sample.id)
            family = _extract_sample_family(sample.name)
            self._recent_families[family] += 1

            # Track para esta generación específica
            if hasattr(self, '_generation_families'):
                self._generation_families[family] += 1

            # Track path para cross-generation memory
            if hasattr(self, '_generation_paths'):
                sample_path = getattr(sample, 'path', '') or getattr(sample, 'file_path', '') or ''
                if sample_path:
                    self._generation_paths[sample_path] += 1

            # Track para cooldown (dentro de generación)
            self._selection_counter += 1
            self._family_last_used[family] = self._selection_counter

            # Add to recent sample diversity memory
            if role:
                sample_path = getattr(sample, 'path', '') or getattr(sample, 'file_path', '') or ''
                if sample_path:
                    add_to_recent_memory(role, sample_path)
            
            # REGISTRAR EN MEMORIA PERSISTENTE (diversity_memory.py)
            # Solo para roles críticos para evitar overhead excesivo
            if role and DIVERSITY_MEMORY_AVAILABLE:
                try:
                    sample_path = getattr(sample, 'path', '') or getattr(sample, 'file_path', '') or ''
                    if sample_path:
                        record_sample_usage(role, sample_path, sample.name)
                except Exception as e:
                    logger.debug("Error registrando sample en memoria persistente: %s", e)

    def _get_family_cooldown_penalty(self, family: str) -> float:
        """
        Calcula penalización por cooldown de familia.

        Retorna 1.0 (sin penalización) a 0.0 (penalización máxima - rechazo duro).

        Las familias recientemente usadas tienen penalización progresiva:
        - Usado hace 0 selecciones: 0.0 (rechazo duro - no reusable inmediatamente)
        - Usado hace 1 selección: 0.20
        - Usado hace 2 selecciones: 0.40
        - Usado hace 3 selecciones: 0.55
        - Usado hace 4 selecciones: 0.70
        - Usado hace 5 selecciones: 0.85
        - Usado hace COOLDOWN_WINDOW o más: 1.0 (sin penalización)
        """
        if family not in self._family_last_used:
            return 1.0

        selections_ago = self._selection_counter - self._family_last_used[family]

        if selections_ago <= 0:
            return 0.0
        elif selections_ago == 1:
            return 0.20
        elif selections_ago == 2:
            return 0.40
        elif selections_ago == 3:
            return 0.55
        elif selections_ago == 4:
            return 0.70
        elif selections_ago == 5:
            return 0.85
        elif selections_ago >= COOLDOWN_WINDOW:
            return 1.0
        else:
            return min(1.0, 0.20 + (selections_ago / COOLDOWN_WINDOW) * 0.80)

    def _get_cross_generation_penalty(self, family: str, path: str = None, role: str = None) -> float:
        """
        Calcula penalización por uso en generaciones anteriores.

        Retorna factor de penalty (0.0 - 1.0) basado en uso reciente.
        
        Ahora integra con diversity_memory.py para penalización persistente
        de familias para roles críticos.
        """
        # PRIMERO: Usar sistema persistente si está disponible y es rol crítico
        if role and DIVERSITY_MEMORY_AVAILABLE:
            try:
                persistent_penalty = get_penalty_for_sample(role, path or '', '')
                if persistent_penalty < 1.0:
                    logger.debug("CROSS_GEN (persistent): family penalty for role '%s': %.2f", 
                                role, persistent_penalty)
                    return persistent_penalty
            except Exception as e:
                logger.debug("Error obteniendo penalización persistente: %s", e)
        
        # FALLBACK: Usar memoria en RAM (legacy)
        family_penalty = 1.0
        cross_gen_count = _cross_generation_family_memory.get(family, 0)
        if cross_gen_count >= 4:
            family_penalty = 0.08
        elif cross_gen_count >= 3:
            family_penalty = 0.20
        elif cross_gen_count >= 2:
            family_penalty = 0.40
        elif cross_gen_count >= 1:
            family_penalty = 0.70

        path_penalty = 1.0
        if path and path in _cross_generation_path_memory:
            path_count = _cross_generation_path_memory.get(path, 0)
            if path_count >= 3:
                path_penalty = 0.05
            elif path_count >= 2:
                path_penalty = 0.15
            else:
                path_penalty = 0.35

        recent_role_penalty = 1.0
        if role and path:
            recent_role_penalty = get_recent_memory_penalty(role, path)

        return family_penalty * path_penalty * recent_role_penalty

    def _apply_suspicion_penalty(self, score: float, sample_name: str, role: str) -> float:
        """
        Aplica penalty a samples con nombres sospechosos para el rol.

        A diferencia de HARD_REJECT_PATTERNS, esto es un penalty suave
        que reduce el score pero no elimina completamente el candidato.

        Args:
            score: Score base del sample
            sample_name: Nombre del sample
            role: Rol objetivo

        Returns:
            Score ajustado con penalty aplicado
        """
        role_lower = role.lower() if role else ""
        if role_lower not in SUSPICIOUS_KEYWORDS:
            return score

        name_lower = sample_name.lower()
        suspicious = SUSPICIOUS_KEYWORDS[role_lower]

        penalty = 1.0
        for kw in suspicious:
            if kw in name_lower:
                penalty *= 0.7  # 30% penalty per suspicious keyword found

        return score * penalty

    def _break_tie_randomized(self, candidates: List[Dict], seed_base: str = "") -> List[Dict]:
        """
        Rompe empates con jitter determinista basado en hash.

        Cuando los scores son muy cercanos (dentro del 5%), usa randomización
        determinista para evitar que siempre gane el mismo candidato.

        Args:
            candidates: Lista de dicts con 'score' o 'final_score' y 'sample'
            seed_base: String base para el seed determinista

        Returns:
            Lista reordenada con empates rotos
        """
        if len(candidates) <= 1:
            return candidates

        # Group by similar scores (within 5%)
        result = []
        i = 0
        while i < len(candidates):
            # Find all candidates with similar scores
            current_score = candidates[i].get('final_score', candidates[i].get('score', 0))
            group = [candidates[i]]
            j = i + 1
            while j < len(candidates):
                other_score = candidates[j].get('final_score', candidates[j].get('score', 0))
                if abs(current_score - other_score) / max(current_score, other_score, 0.001) < 0.05:
                    group.append(candidates[j])
                    j += 1
                else:
                    break

            if len(group) > 1:
                # Shuffle group deterministically based on names
                sample_names = ""
                for c in group:
                    sample = c.get('sample')
                    if sample:
                        sample_names += getattr(sample, 'name', '')
                seed = int(hashlib.md5((seed_base + sample_names).encode()).hexdigest()[:8], 16)
                rng = random.Random(seed)
                rng.shuffle(group)

            result.extend(group)
            i = j

        return result

    def reset_cooldown_tracking(self) -> None:
        """Resetea el tracking de cooldown para nueva generación."""
        self._family_last_used.clear()
        self._selection_counter = 0
        self._recent_families.clear()
        self._recent_sample_ids.clear()

    def start_generation_tracking(self) -> None:
        """Marca el inicio de una nueva generación (llamar al inicio de generate_track)."""
        self._generation_families = defaultdict(int)
        self._generation_paths: Dict[str, int] = defaultdict(int)

    def end_generation_tracking(self) -> None:
        """Marca el fin de una generación y actualiza memoria cross-generation."""
        if hasattr(self, '_generation_families'):
            paths_used = list(self._generation_paths.keys()) if hasattr(self, '_generation_paths') else []
            _update_cross_generation_memory(self._generation_families, paths_used)
            delattr(self, '_generation_families')
            if hasattr(self, '_generation_paths'):
                delattr(self, '_generation_paths')

    def _log_decision(self, decision: SampleDecision) -> None:
        """Registra una decisión si logging está activado."""
        if self._log_decisions:
            self._decision_log.append(decision)
            logger.debug("SAMPLE_DECISION: %s", decision.to_log_str())

    def _pick_ranked_sample(self,
                             samples: List['Sample'],
                             target_key: Optional[str] = None,
                             target_bpm: Optional[float] = None,
                             target_role: Optional[str] = None,
                             target_genre: Optional[str] = None,
                             prefer_oneshot: Optional[bool] = None,
                             pool_size: int = 12,
                             context: str = "") -> Optional['Sample']:
        """
        Selecciona un sample usando ranking multi-factor con weighted random.

        Args:
            samples: Lista de samples candidatos
            target_key: Key objetivo para matching armónico
            target_bpm: BPM objetivo para matching de tempo
            target_role: Rol objetivo para validación (ej: 'kick', 'clap')
            target_genre: Género objetivo
            prefer_oneshot: Preferencia por one-shot (True) o loop (False)
            pool_size: Tamaño del pool de mejores candidatos
            context: Contexto para seeding determinista

        Returns:
            Sample seleccionado o None si no hay candidatos
        """
        if not samples:
            return None

        # Calcular scores para todos los samples
        scored_samples = []
        for sample in samples:
            score = self._calculate_sample_score(
                sample,
                target_key=target_key,
                target_bpm=target_bpm,
                target_role=target_role,
                target_genre=target_genre,
                prefer_oneshot=prefer_oneshot
            )
            # Apply suspicion penalty for samples with suspicious names
            if target_role:
                score = self._apply_suspicion_penalty(score, sample.name, target_role)
            scored_samples.append({'score': score, 'sample': sample, 'rejection_reasons': []})

        # Ordenar por score descendente
        scored_samples.sort(key=lambda x: x['score'], reverse=True)

        # Apply tie-breaking with deterministic randomization
        scored_samples = self._break_tie_randomized(scored_samples, context)

        # Filtrar por rechazo duro para roles críticos
        if target_role:
            filtered_samples = []
            for s in scored_samples:
                should_reject, reason = self._hard_reject_check(s['sample'], target_role)
                if should_reject:
                    s['rejection_reasons'].append(f"hard_reject: {reason}")
                    logger.debug("HARD_REJECT: %s for role '%s': %s", s['sample'].name, target_role, reason)
                else:
                    filtered_samples.append(s)
            scored_samples = filtered_samples

            if not scored_samples:
                logger.warning("All samples hard-rejected for role '%s', using fallback", target_role)
        # Validar preferencia one-shot/loop para roles críticos
        if target_role:
            filtered_samples = []
            for s in scored_samples:
                is_valid, reason = self._validate_loop_preference(s['sample'], target_role)
                if not is_valid:
                    s['rejection_reasons'].append(f"loop_pref: {reason}")
                    logger.debug("LOOP_PREF: rejecting %s for role '%s': %s", s['sample'].name, target_role, reason)
                else:
                    filtered_samples.append(s)
            scored_samples = filtered_samples

            if not scored_samples:
                logger.warning("All samples rejected by loop preference for role '%s'", target_role)


        # Tomar top pool_size candidatos
        top_samples = scored_samples[:max(1, min(pool_size, len(scored_samples)))]

        # Aplicar jitter con seeding determinista
        selection_seed = self._generate_selection_seed(context)
        rng = random.Random(selection_seed)

        # Weighted random selection con jitter
        weighted: List[Tuple[float, 'Sample']] = []
        for rank, s in enumerate(top_samples):
            score = s['score']
            sample = s['sample']
            # Decaimiento por posición en el ranking
            rank_weight = max(0.2, 1.0 - (rank * 0.07))
            # Jitter aleatorio
            jitter = 0.85 + (rng.random() * 0.30)
            final_weight = max(0.01, score * rank_weight * jitter)
            weighted.append((final_weight, sample))

        # Selección por weighted random
        if NUMPY_AVAILABLE and len(weighted) > 3:
            # Usar numpy para mejor performance
            weights = np.array([w for w, _ in weighted])
            weights = weights / weights.sum()
            idx = np.random.default_rng(selection_seed).choice(len(weighted), p=weights)
            selected = weighted[idx][1]
            final_score = weighted[idx][0]
            selected_idx = idx
        else:
            # Fallback a random estándar
            total = sum(weight for weight, _ in weighted)
            pivot = rng.random() * total
            running = 0.0
            selected = weighted[0][1]  # default
            final_score = weighted[0][0]
            selected_idx = 0
            for idx, (weight, sample) in enumerate(weighted):
                running += weight
                if pivot <= running:
                    selected = sample
                    final_score = weight
                    selected_idx = idx
                    break

        self._remember_sample(selected, role=target_role)

        # Log decision if enabled
        if self._log_decisions and selected:
            # Determine bonus factors (would need to be tracked during scoring)
            bonus_list = []

            # Log the selected sample
            decision = SampleDecision(
                sample_name=selected.name,
                target_role=target_role or "unknown",
                final_score=final_score,
                selected=True,
                selection_index=selected_idx,
                bonus_factors=bonus_list
            )
            self._log_decision(decision)

            # Also log top 5 rejections
            for idx, s in enumerate(scored_samples[:5]):  # Top 5 rejected
                if s['sample'].name != selected.name:
                    reject_decision = SampleDecision(
                        sample_name=s['sample'].name,
                        target_role=target_role or "unknown",
                        final_score=s['score'],
                        selected=False,
                        selection_index=idx,
                        rejection_reasons=s.get('rejection_reasons', [])
                    )
                    self._log_decision(reject_decision)

        return selected

    def _pick_multiple_ranked(self,
                               samples: List['Sample'],
                               count: int,
                               target_key: Optional[str] = None,
                               target_bpm: Optional[float] = None,
                               target_role: Optional[str] = None,
                               target_genre: Optional[str] = None,
                               prefer_oneshot: Optional[bool] = None,
                               pool_size: int = 15,
                               context: str = "") -> List['Sample']:
        """
        Selecciona múltiples samples con diversidad garantizada.
        """
        chosen: List['Sample'] = []
        if not samples or count <= 0:
            return chosen

        remaining = list(samples)
        seen_ids = set()
        sub_context = context

        while remaining and len(chosen) < count:
            selected = self._pick_ranked_sample(
                remaining,
                target_key=target_key,
                target_bpm=target_bpm,
                target_role=target_role,
                target_genre=target_genre,
                prefer_oneshot=prefer_oneshot,
                pool_size=pool_size,
                context=f"{sub_context}_{len(chosen)}"
            )
            if selected is None:
                break
            if selected.id not in seen_ids:
                chosen.append(selected)
                seen_ids.add(selected.id)
            remaining = [sample for sample in remaining if sample.id != selected.id]

        return chosen

    def get_decision_log(self) -> list[SampleDecision]:
        """Retorna el log de decisiones acumulado."""
        return self._decision_log.copy()

    def clear_decision_log(self) -> None:
        """Limpia el log de decisiones."""
        self._decision_log.clear()

    def enable_decision_logging(self, enabled: bool = True) -> None:
        """Activa/desactiva logging de decisiones."""
        self._log_decisions = enabled

    def select_for_genre(self,
                         genre: str,
                         key: Optional[str] = None,
                         bpm: Optional[float] = None,
                         variation: str = "standard",
                         session_seed: Optional[int] = None) -> InstrumentGroup:
        """
        Selecciona un grupo completo de instrumentos para un género.

        Args:
            genre: Género musical
            key: Tonalidad preferida (auto-selecciona si None)
            bpm: BPM preferido (auto-selecciona si None)
            variation: Variación del estilo
            session_seed: Semilla para reproducibilidad (actualiza si se provee)

        Returns:
            InstrumentGroup con samples seleccionados
        """
        # Actualizar semilla de sesión si se provee
        if session_seed is not None:
            self._session_seed = session_seed
            self._selection_counter = 0

        # Normalizar género
        genre_profile = self._get_genre_profile(genre)

        # Seleccionar key si no se especificó (con seeding determinista)
        if key is None:
            rng = random.Random(self._generate_selection_seed("key"))
            key = rng.choice(genre_profile.common_keys)

        # Seleccionar BPM si no se especificó (con seeding determinista)
        if bpm is None:
            rng = random.Random(self._generate_selection_seed("bpm"))
            bpm = rng.randint(genre_profile.bpm_range[0], genre_profile.bpm_range[1])

        # Crear grupo
        group = InstrumentGroup(
            genre=genre_profile.name,
            key=key,
            bpm=float(bpm)
        )

        # Seleccionar drums CON validación de roles
        group.drums = self._select_drum_kit(genre, variation, target_key=key)

        # Seleccionar bass con matching armónico
        group.bass = self._select_bass_samples(genre, key, bpm, count=3)

        # Seleccionar synths con diversidad
        group.synths = self._select_synth_samples(genre, key, bpm, count=3)

        # Seleccionar FX
        group.fx = self._select_fx_samples(genre, count=2, target_bpm=bpm)

        return group

    def _get_genre_profile(self, genre: str) -> GenreProfile:
        """Obtiene el perfil de un género"""
        genre_lower = genre.lower().replace(' ', '-')

        # Búsqueda exacta
        if genre_lower in GENRE_PROFILES:
            return GENRE_PROFILES[genre_lower]

        # Búsqueda parcial
        for name, profile in GENRE_PROFILES.items():
            if genre_lower in name or name in genre_lower:
                return profile

        # Fallback a techno
        logger.warning(f"Género '{genre}' no encontrado, usando techno")
        return GENRE_PROFILES['techno']

    def _select_drum_kit(self, genre: str, variation: str = "standard", target_key: Optional[str] = None) -> DrumKit:
        """
        Selecciona un kit de batería coherente con validación de roles.

        Mejoras Fase 4:
        - Valida que cada sample sea apropiado para su rol
        - Penaliza samples inapropiados (ej: snare en rol clap)
        - Balancea entre one-shots preferentemente
        """
        if not self.manager:
            return DrumKit(name="empty")

        kit = DrumKit(name=f"{genre}_{variation}")

        # Función mejorada para encontrar drums con validación de rol
        def find_drum(drum_role: str, keywords: List[str], prefer_oneshot: bool = True) -> Optional[Sample]:
            all_results = []

            # Buscar con múltiples keywords y acumular
            for keyword in keywords:
                results = self.manager.search(
                    query=keyword,
                    category="drums",
                    limit=50
                )
                all_results.extend(results)

            # Eliminar duplicados
            seen_ids = set()
            unique_results = []
            for s in all_results:
                if s.id not in seen_ids:
                    seen_ids.add(s.id)
                    unique_results.append(s)

            if not unique_results:
                return None

            # Usar el selector mejorado con validación de rol
            return self._pick_ranked_sample(
                unique_results,
                target_key=target_key,
                target_role=drum_role,  # Validación de rol
                target_genre=genre,
                prefer_oneshot=prefer_oneshot,
                pool_size=12,
                context=f"drum_{drum_role}"
            )

        # Kick - siempre one-shot
        kit.kick = find_drum("kick", ["kick", "bd", "bass_drum"], prefer_oneshot=True)

        # Snare o Clap según género - CON VALIDACIÓN DE ROL
        if genre in ['house', 'tech-house', 'deep-house']:
            # En house, clap es más común que snare
            kit.clap = find_drum("clap", ["clap", "handclap"], prefer_oneshot=True)
            kit.snare = find_drum("snare", ["snare", "rim"], prefer_oneshot=True)
        else:
            # En techno, snare es más común
            kit.snare = find_drum("snare", ["snare", "rimshot"], prefer_oneshot=True)
            kit.clap = find_drum("clap", ["clap"], prefer_oneshot=True)

        # Hats - validar que sean realmente hats
        kit.hat_closed = find_drum("hat_closed", ["closed hat", "hihat", "hat"], prefer_oneshot=True)
        kit.hat_open = find_drum("hat_open", ["open hat", "ohh"], prefer_oneshot=True)

        # Percusión adicional - validar roles
        kit.perc1 = find_drum("perc", ["perc", "shaker", "tamb"], prefer_oneshot=True)
        kit.perc2 = find_drum("perc", ["percussion", "conga", "bongo"], prefer_oneshot=True)

        # Tom
        kit.tom = find_drum("tom", ["tom", "tomtom"], prefer_oneshot=True)

        # Crash (opcional)
        kit.crash = find_drum("crash", ["crash", "cymbal"], prefer_oneshot=True)

        # Registrar roles usados
        if kit.kick:
            self._role_history['kick'].append(kit.kick.id)
        if kit.snare:
            self._role_history['snare'].append(kit.snare.id)
        if kit.clap:
            self._role_history['clap'].append(kit.clap.id)

        return kit

    def _select_bass_samples(self,
                              genre: str,
                              key: str,
                              bpm: float,
                              count: int = 3) -> List[Sample]:
        """
        Selecciona samples de bajo compatibles con mejor ranking.

        Mejoras Fase 4:
        - Matching armónico mejorado
        - Balance one-shot vs loop según contexto
        - Penalización de familias repetidas
        """
        if not self.manager:
            return []

        # Buscar por key primero
        results = self.manager.search(
            category="bass",
            key=key,
            bpm=bpm,
            bpm_tolerance=5,
            limit=count * 10
        )

        # Si no hay suficientes, buscar sin key
        if len(results) < count:
            more = self.manager.search(
                category="bass",
                bpm=bpm,
                bpm_tolerance=10,
                limit=count * 10
            )
            results.extend(more)

        # Buscar por género también
        genre_results = self.manager.search(
            category="bass",
            genres=[genre],
            limit=count * 8
        )
        results.extend(genre_results)

        # Eliminar duplicados
        seen = set()
        unique = []
        for s in results:
            if s.id not in seen:
                seen.add(s.id)
                unique.append(s)

        # Para bass, preferimos loops en la mayoría de casos
        # excepto para bass one-shots (808, stabs)
        prefer_oneshot = 'trap' in genre.lower() or 'hip-hop' in genre.lower()

        return self._pick_multiple_ranked(
            unique,
            count=count,
            target_key=key,
            target_bpm=bpm,
            target_genre=genre,
            prefer_oneshot=prefer_oneshot,
            pool_size=15,
            context="bass"
        )

    def _select_synth_samples(self,
                               genre: str,
                               key: str,
                               bpm: float,
                               count: int = 3) -> List[Sample]:
        """
        Selecciona samples de sintetizador compatibles con mejor ranking.

        Mejoras Fase 4:
        - Diversidad de tipos (lead, pad, pluck, chord)
        - Balance loops preferentemente para texturas
        - Penalización de familias repetidas
        """
        if not self.manager:
            return []

        # Buscar diferentes tipos de synths
        synth_types = ['lead', 'pad', 'pluck', 'chord']
        results = []

        for synth_type in synth_types:
            type_results = self.manager.search(
                sample_type=synth_type,
                key=key,
                bpm=bpm,
                bpm_tolerance=5,
                limit=12
            )
            results.extend(type_results)

        # Completar con búsqueda general
        if len(results) < count * 2:
            more = self.manager.search(
                category="synths",
                key=key,
                limit=count * 10
            )
            results.extend(more)

        # Eliminar duplicados
        seen = set()
        unique = []
        for s in results:
            if s.id not in seen:
                seen.add(s.id)
                unique.append(s)

        # Para synths, preferimos loops para pads y chords
        # one-shots para leads y plucks
        prefer_oneshot = False  # Default a loops para texturas

        return self._pick_multiple_ranked(
            unique,
            count=count,
            target_key=key,
            target_bpm=bpm,
            target_genre=genre,
            prefer_oneshot=prefer_oneshot,
            pool_size=15,
            context="synth"
        )

    def _select_fx_samples(self, genre: str, count: int = 2, target_bpm: Optional[float] = None) -> List[Sample]:
        """
        Selecciona efectos apropiados con mejor ranking.
        """
        if not self.manager:
            return []

        results = self.manager.search(
            category="one_shots",
            sample_type="fx",
            genres=[genre],
            limit=count * 8
        )

        # También buscar en category fx directamente
        fx_results = self.manager.search(
            category="fx",
            limit=count * 6
        )
        results.extend(fx_results)

        # Eliminar duplicados
        seen = set()
        unique = []
        for s in results:
            if s.id not in seen:
                seen.add(s.id)
                unique.append(s)

        return self._pick_multiple_ranked(
            unique,
            count=count,
            target_bpm=target_bpm,
            target_genre=genre,
            prefer_oneshot=True,  # FX generalmente son one-shots
            pool_size=10,
            context="fx"
        )

    def find_compatible_samples(self,
                                 reference_sample: Sample,
                                 sample_type: str = "",
                                 max_results: int = 10) -> List[Tuple[Sample, float]]:
        """
        Encuentra samples compatibles con uno de referencia.

        Calcula score de compatibilidad basado en:
        - Key (armonía)
        - BPM (tempo)
        - Género
        - Características de audio
        """
        if not self.manager:
            return []

        # Buscar candidatos
        candidates = self.manager.search(
            sample_type=sample_type or reference_sample.sample_type,
            limit=50
        )

        results = []
        for candidate in candidates:
            if candidate.id == reference_sample.id:
                continue

            score = self._calculate_compatibility(reference_sample, candidate)
            if score > 0.5:  # Umbral mínimo
                results.append((candidate, score))

        # Ordenar por score
        results.sort(key=lambda x: x[1], reverse=True)
        return results[:max_results]

    def _calculate_compatibility(self, sample1: Sample, sample2: Sample) -> float:
        """Calcula un score de compatibilidad entre dos samples"""
        score = 0.0
        weights = 0.0

        # Compatibilidad de key (peso: 0.4)
        if sample1.key and sample2.key:
            if MANAGER_AVAILABLE:
                key_compat = calculate_key_compatibility(sample1.key, sample2.key)
            else:
                key_compat = 1.0 if sample1.key == sample2.key else 0.5
            score += key_compat * 0.4
            weights += 0.4

        # Compatibilidad de BPM (peso: 0.3)
        if sample1.bpm and sample2.bpm:
            bpm_diff = abs(sample1.bpm - sample2.bpm)
            if bpm_diff == 0:
                bpm_compat = 1.0
            elif bpm_diff <= 3:
                bpm_compat = 0.9
            elif bpm_diff <= 6:
                bpm_compat = 0.7
            elif bpm_diff <= 10:
                bpm_compat = 0.5
            else:
                bpm_compat = max(0.0, 1.0 - (bpm_diff / 50))
            score += bpm_compat * 0.3
            weights += 0.3

        # Compatibilidad de género (peso: 0.2)
        if sample1.genres and sample2.genres:
            common_genres = set(sample1.genres) & set(sample2.genres)
            if common_genres:
                genre_compat = len(common_genres) / max(len(sample1.genres), len(sample2.genres))
                score += genre_compat * 0.2
                weights += 0.2

        # Compatibilidad de categoría (peso: 0.1)
        if sample1.category == sample2.category:
            score += 0.1
            weights += 0.1

        return score / weights if weights > 0 else 0.0

    def get_midi_mapping_for_kit(self, kit: DrumKit) -> Dict[str, Any]:
        """
        Genera un mapeo MIDI completo para un kit de batería.

        Returns:
            Dict con información de mapeo para Ableton
        """
        mapping = {
            'kit_name': kit.name,
            'notes': {},
            'drum_rack_slots': {},
        }

        midi_map = kit.get_midi_mapping()

        for note, sample in midi_map.items():
            note_name = self._midi_note_to_name(note)
            mapping['notes'][note] = {
                'name': note_name,
                'sample': sample.name if sample else None,
                'sample_path': sample.path if sample else None,
            }

            # Mapeo para Drum Rack (0-127 pads)
            if note in range(36, 52):  # Rango de drums común
                drum_rack_slot = note - 36
                mapping['drum_rack_slots'][drum_rack_slot] = {
                    'note': note,
                    'sample': sample.name if sample else None,
                    'sample_path': sample.path if sample else None,
                }

        return mapping

    def _midi_note_to_name(self, note: int) -> str:
        """Convierte número de nota MIDI a nombre"""
        note_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
        octave = (note // 12) - 1
        name = note_names[note % 12]
        return f"{name}{octave}"

    def suggest_key_change(self,
                          current_key: str,
                          direction: str = "fifth_up") -> str:
        """
        Sugiere un cambio de key armónico.

        Args:
            current_key: Key actual
            direction: 'fifth_up', 'fifth_down', 'relative', 'parallel'

        Returns:
            Nueva key sugerida
        """
        # Círculo de quintas
        circle_major = ['C', 'G', 'D', 'A', 'E', 'B', 'F#', 'C#', 'G#', 'D#', 'A#', 'F']
        circle_minor = ['Am', 'Em', 'Bm', 'F#m', 'C#m', 'G#m', 'D#m', 'A#m', 'Fm', 'Cm', 'Gm', 'Dm']

        is_minor = current_key.endswith('m')
        root = current_key.rstrip('m')

        circle = circle_minor if is_minor else circle_major

        try:
            idx = circle.index(current_key)
        except ValueError:
            # Intentar encontrar equivalente
            return current_key

        if direction == "fifth_up":
            new_idx = (idx + 1) % 12
            return circle[new_idx]
        elif direction == "fifth_down":
            new_idx = (idx - 1) % 12
            return circle[new_idx]
        elif direction == "relative":
            # Cambiar entre mayor/menor relativo
            if is_minor:
                # De menor a mayor relativo (3 semitonos arriba)
                rel_idx = (idx + 3) % 12
                return circle_major[rel_idx]
            else:
                # De mayor a menor relativo (3 semitonos abajo)
                rel_idx = (idx - 3) % 12
                return circle_minor[rel_idx]
        elif direction == "parallel":
            # Cambiar entre mayor/menor paralelo
            if is_minor:
                return root
            else:
                return root + 'm'

        return current_key

    def create_variation(self,
                        original_group: InstrumentGroup,
                        variation_type: str = "energy_up") -> InstrumentGroup:
        """
        Crea una variación de un grupo de instrumentos.

        Args:
            original_group: Grupo original
            variation_type: Tipo de variación

        Returns:
            Nuevo InstrumentGroup variado
        """
        new_group = InstrumentGroup(
            genre=original_group.genre,
            key=original_group.key,
            bpm=original_group.bpm
        )

        if variation_type == "energy_up":
            # Buscar samples más intensos
            new_group.drums = self._select_drum_kit(
                original_group.genre,
                variation="heavy",
                target_key=original_group.key
            )
            # Mantener key, buscar bass más agresivo
            new_group.bass = self._select_bass_samples(
                original_group.genre,
                original_group.key,
                original_group.bpm,
                count=3
            )

        elif variation_type == "breakdown":
            # Reducir elementos, mantener key
            new_group.drums = DrumKit(name="minimal")
            new_group.drums.kick = original_group.drums.kick
            new_group.drums.hat_closed = original_group.drums.hat_closed
            # Solo pads y elementos atmosféricos
            new_group.synths = self._select_synth_samples(
                original_group.genre,
                original_group.key,
                original_group.bpm,
                count=2
            )

        elif variation_type == "key_change":
            # Cambiar de tonalidad
            new_key = self.suggest_key_change(original_group.key, "fifth_up")
            new_group.key = new_key
            new_group.bass = self._select_bass_samples(
                original_group.genre,
                new_key,
                original_group.bpm,
                count=3
            )
            new_group.synths = self._select_synth_samples(
                original_group.genre,
                new_key,
                original_group.bpm,
                count=3
            )

        return new_group


# ============================================================================
# Funciones de conveniencia
# ============================================================================

_selector: Optional[SampleSelector] = None


def get_selector(session_seed: Optional[int] = None) -> SampleSelector:
    """Obtiene la instancia global del selector"""
    global _selector
    if _selector is None:
        _selector = SampleSelector(session_seed=session_seed)
    elif session_seed is not None:
        _selector._session_seed = session_seed
        _selector._selection_counter = 0
    return _selector


def reset_selector():
    """Resetea el selector global para una nueva sesión"""
    global _selector
    _selector = None


def select_samples_for_track(genre: str,
                              key: Optional[str] = None,
                              bpm: Optional[float] = None,
                              session_seed: Optional[int] = None) -> Dict[str, Any]:
    """
    Selecciona samples para un track completo.

    Args:
        genre: Género musical
        key: Tonalidad (auto-selecciona si None)
        bpm: BPM (auto-selecciona si None)
        session_seed: Semilla para reproducibilidad

    Returns:
        Dict con toda la información de selección
    """
    selector = get_selector(session_seed=session_seed)
    group = selector.select_for_genre(genre, key, bpm)

    return {
        'genre': group.genre,
        'key': group.key,
        'bpm': group.bpm,
        'drum_kit': group.drums.to_dict(),
        'midi_mapping': selector.get_midi_mapping_for_kit(group.drums),
        'bass_samples': [s.to_dict() for s in group.bass],
        'synth_samples': [s.to_dict() for s in group.synths],
        'fx_samples': [s.to_dict() for s in group.fx],
        'session_seed': selector._session_seed,
    }


def get_drum_kit(genre: str, variation: str = "standard", key: Optional[str] = None) -> Dict[str, Any]:
    """
    Obtiene un kit de batería para un género.

    Args:
        genre: Género musical
        variation: Variación del kit
        key: Key para matching armónico
    """
    selector = get_selector()
    kit = selector._select_drum_kit(genre, variation, target_key=key)

    return {
        'kit': kit.to_dict(),
        'midi_mapping': selector.get_midi_mapping_for_kit(kit),
    }


def find_compatible(sample_path: str, max_results: int = 10) -> List[Dict[str, Any]]:
    """Encuentra samples compatibles con uno dado"""
    selector = get_selector()
    manager = get_manager()

    sample = manager.get_by_path(sample_path)
    if not sample:
        return []

    compatible = selector.find_compatible_samples(sample, max_results=max_results)
    return [
        {
            'sample': s.to_dict(),
            'compatibility_score': score
        }
        for s, score in compatible
    ]


# ============================================================================
# Funciones para GPU/Embeddings (opcional)
# ============================================================================

def calculate_embedding_similarity(samples: List['Sample'],
                                    reference: 'Sample',
                                    use_gpu: bool = True) -> List[Tuple['Sample', float]]:
    """
    Calcula similitud de embeddings entre samples usando operaciones vectorizadas.
    Requiere que los samples tengan embeddings pre-calculados.

    Args:
        samples: Lista de samples a comparar
        reference: Sample de referencia
        use_gpu: Usar GPU si está disponible

    Returns:
        Lista de (sample, similarity_score) ordenada por similitud
    """
    if not NUMPY_AVAILABLE:
        logger.warning("NumPy no disponible, usando similitud básica")
        return [(s, 0.5) for s in samples]

    # Verificar si hay embeddings disponibles
    ref_embedding = getattr(reference, 'embedding', None)
    if ref_embedding is None:
        logger.warning("No hay embedding de referencia, usando similitud básica")
        return [(s, 0.5) for s in samples]

    results = []
    xp = cp if (use_gpu and GPU_AVAILABLE) else np

    try:
        ref_vec = xp.array(ref_embedding)
        ref_norm = xp.linalg.norm(ref_vec)

        for sample in samples:
            sample_embedding = getattr(sample, 'embedding', None)
            if sample_embedding is not None:
                sample_vec = xp.array(sample_embedding)
                sample_norm = xp.linalg.norm(sample_vec)

                if ref_norm > 0 and sample_norm > 0:
                    similarity = float(xp.dot(ref_vec, sample_vec) / (ref_norm * sample_norm))
                else:
                    similarity = 0.0
            else:
                similarity = 0.0

            results.append((sample, similarity))

        # Ordenar por similitud descendente
        results.sort(key=lambda x: x[1], reverse=True)

    except Exception as e:
        logger.warning(f"Error calculando similitud de embeddings: {e}")
        return [(s, 0.5) for s in samples]

    return results


def batch_score_samples(samples: List['Sample'],
                        target_key: Optional[str] = None,
                        target_bpm: Optional[float] = None,
                        target_genre: Optional[str] = None,
                        use_gpu: bool = True) -> List[Tuple['Sample', float]]:
    """
    Calcula scores para múltiples samples de forma vectorizada.
    Usa NumPy o CuPy para aceleración.

    Args:
        samples: Lista de samples a puntuar
        target_key: Key objetivo
        target_bpm: BPM objetivo
        target_genre: Género objetivo
        use_gpu: Usar GPU si está disponible

    Returns:
        Lista de (sample, score) ordenada por score descendente
    """
    if not samples:
        return []

    if not NUMPY_AVAILABLE or len(samples) < 10:
        # Para pocos samples, usar scoring individual
        selector = get_selector()
        results = []
        for sample in samples:
            score = selector._calculate_sample_score(
                sample,
                target_key=target_key,
                target_bpm=target_bpm,
                target_genre=target_genre
            )
            results.append((sample, score))
        results.sort(key=lambda x: x[1], reverse=True)
        return results

    # Vectorized scoring con NumPy/CuPy
    xp = cp if (use_gpu and GPU_AVAILABLE) else np

    ratings = xp.array([min(1.0, (s.rating or 0) / 5.0) for s in samples])

    # Key compatibility
    key_scores = xp.zeros(len(samples))
    if target_key:
        for i, s in enumerate(samples):
            if s.key:
                if MANAGER_AVAILABLE:
                    key_scores[i] = calculate_key_compatibility(target_key, s.key)
                else:
                    key_scores[i] = 1.0 if s.key == target_key else 0.5
            else:
                key_scores[i] = 0.5

    # BPM compatibility
    bpm_scores = xp.zeros(len(samples))
    if target_bpm:
        for i, s in enumerate(samples):
            if s.bpm:
                diff = abs(s.bpm - target_bpm)
                if diff == 0:
                    bpm_scores[i] = 1.0
                elif diff <= 3:
                    bpm_scores[i] = 0.95
                elif diff <= 6:
                    bpm_scores[i] = 0.85
                elif diff <= 10:
                    bpm_scores[i] = 0.70
                else:
                    bpm_scores[i] = max(0.2, 1.0 - (diff / 30))
            else:
                bpm_scores[i] = 0.5

    # Genre compatibility
    genre_scores = xp.zeros(len(samples))
    if target_genre:
        genre_lower = target_genre.lower().replace(' ', '-')
        for i, s in enumerate(samples):
            if s.genres:
                sample_genres = [g.lower().replace(' ', '-') for g in s.genres]
                if genre_lower in sample_genres:
                    genre_scores[i] = 1.0
                elif any(g in genre_lower or genre_lower in g for g in sample_genres):
                    genre_scores[i] = 0.7
                else:
                    genre_scores[i] = 0.3
            else:
                genre_scores[i] = 0.5

    # Combined score (weighted)
    weights = xp.array([0.25, 0.25, 0.25, 0.25])  # rating, key, bpm, genre
    scores_matrix = xp.stack([ratings, key_scores, bpm_scores, genre_scores])
    final_scores = xp.dot(weights, scores_matrix)

    # Convertir a lista y ordenar
    results = [(samples[i], float(final_scores[i])) for i in range(len(samples))]
    results.sort(key=lambda x: x[1], reverse=True)

    return results


# Testing
if __name__ == "__main__":

    logging.basicConfig(level=logging.INFO)

    print("Sample Selector - Test (Fase 4 mejorada)")
    print("=" * 60)

    selector = SampleSelector()

    # Test de selección por género
    genres = ['techno', 'house', 'tech-house', 'deep-house']

    for genre in genres:
        print(f"\n{genre.upper()}:")
        profile = selector._get_genre_profile(genre)
        print(f"  BPM: {profile.bpm_range}")
        print(f"  Keys: {profile.common_keys}")
        print(f"  Características: {', '.join(profile.characteristics)}")

    # Test de selección completa con reproducibilidad
    print("\n" + "=" * 60)
    print("SELECCIÓN PARA TECHNO (session_seed=12345):")

    # Usar semilla para reproducibilidad
    selector_test = SampleSelector(session_seed=12345)
    group = selector_test.select_for_genre('techno', key='F#m', bpm=130)

    print(f"\nKey: {group.key}, BPM: {group.bpm}")
    print(f"Session Seed: {selector_test._session_seed}")
    print(f"\nDrum Kit: {group.drums.name}")
    if group.drums.kick:
        print(f"  Kick: {group.drums.kick.name} (role validated)")
    if group.drums.snare:
        print(f"  Snare: {group.drums.snare.name} (role validated)")
    if group.drums.clap:
        print(f"  Clap: {group.drums.clap.name} (role validated)")
    if group.drums.hat_closed:
        print(f"  Hat: {group.drums.hat_closed.name} (role validated)")

    print(f"\nBass samples: {len(group.bass)}")
    print(f"Synth samples: {len(group.synths)}")

    # Test de reproducibilidad - segunda corrida con misma semilla
    print("\n" + "=" * 60)
    print("TEST DE REPRODUCIBILIDAD (misma semilla):")

    selector_test2 = SampleSelector(session_seed=12345)
    group2 = selector_test2.select_for_genre('techno', key='F#m', bpm=130)

    print(f"Misma key: {group.key == group2.key}")
    print(f"Mismo BPM: {group.bpm == group2.bpm}")

    # Test de validación de roles
    print("\n" + "=" * 60)
    print("TEST DE VALIDACIÓN DE ROLES:")

    # Crear un sample mock para testing
    class MockSample:
        def __init__(self, name, sample_type, category):
            self.name = name
            self.sample_type = sample_type
            self.category = category
            self.subcategory = ""
            self.id = name
            self.key = None
            self.bpm = None
            self.rating = 3
            self.genres = []
            self.rms_energy = 0.5
            self.duration = 0.5

    # Test samples correctos
    kick_sample = MockSample("Techno_Kick_01", "kick", "drums")
    snare_sample = MockSample("Techno_Snare_02", "snare", "drums")
    clap_sample = MockSample("Techno_Clap_03", "clap", "drums")

    print(f"  Kick para rol 'kick': {selector._validate_sample_for_role(kick_sample, 'kick'):.2f}")
    print(f"  Snare para rol 'snare': {selector._validate_sample_for_role(snare_sample, 'snare'):.2f}")
    print(f"  Clap para rol 'clap': {selector._validate_sample_for_role(clap_sample, 'clap'):.2f}")

    # Test samples incorrectos (ABSURDOS)
    print(f"  Snare para rol 'kick': {selector._validate_sample_for_role(snare_sample, 'kick'):.2f} (debería ser bajo)")
    print(f"  Clap para rol 'hat_closed': {selector._validate_sample_for_role(clap_sample, 'hat_closed'):.2f} (debería ser bajo)")

    print("\n" + "=" * 60)
    print(f"NumPy disponible: {NUMPY_AVAILABLE}")
    print(f"GPU disponible: {GPU_AVAILABLE}")