feat: Implement senior audio injection with 5 fallback methods

- Add _cmd_create_arrangement_audio_pattern with 5-method fallback chain - Method 1: track.insert_arrangement_clip() [Live 12+] - Method 2: track.create_audio_clip() [Live 11+] - Method 3: arrangement_clips.add_new_clip() [Live 12+] - Method 4: Session->duplicate_clip_to_arrangement [Legacy] - Method 5: Session->Recording [Universal] - Add _cmd_duplicate_clip_to_arrangement for session-to-arrangement workflow - Update skills documentation - Verified: 3 clips created at positions [0, 4, 8] in Arrangement View Closes: Audio injection in Arrangement View
2026-04-12 14:02:32 -03:00
commit 5ce8187c65
118 changed files with 55075 additions and 0 deletions
--- a/mcp_server/engines/embedding_engine.py
+++ b/mcp_server/engines/embedding_engine.py
@@ -0,0 +1,635 @@
+"""
+Embedding Engine - Vector embeddings for audio samples
+Crea embeddings vectoriales normalizados para samples usando features espectrales.
+"""
+
+import json
+import os
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+import numpy as np
+
+# Intentar importar libreria_analyzer para integración
+# Si no existe, funcionar independientemente
+try:
+    from .libreria_analyzer import LibreriaAnalyzer, NOTE_TO_NUMBER
+    HAS_ANALYZER = True
+except ImportError:
+    HAS_ANALYZER = False
+    NOTE_TO_NUMBER = {
+        'C': 0, 'C#': 1, 'Db': 1, 'D': 2, 'D#': 3, 'Eb': 3,
+        'E': 4, 'F': 5, 'F#': 6, 'Gb': 6, 'G': 7, 'G#': 8,
+        'Ab': 8, 'A': 9, 'A#': 10, 'Bb': 10, 'B': 11
+    }
+
+
+class EmbeddingEngine:
+    """
+    Motor de embeddings vectoriales para samples de audio.
+    
+    Crea vectores de ~20 dimensiones combinando:
+    - BPM (normalizado)
+    - Key (convertido a número 0-11)
+    - RMS
+    - Spectral Centroid
+    - Spectral Rolloff
+    - Zero Crossing Rate
+    - MFCCs (13 coeficientes)
+    - Onset Strength
+    - Duration
+    
+    Todos los embeddings son normalizados usando min-max scaling.
+    """
+    
+    EMBEDDING_DIM = 20  # 1 BPM + 1 Key + 1 RMS + 1 SC + 1 SR + 1 ZCR + 13 MFCCs + 1 OS + 1 Duration
+    EMBEDDINGS_FILE = Path("C:/ProgramData/Ableton/Live 12 Suite/Resources/MIDI Remote Scripts/libreria/reggaeton/.embeddings_index.json")
+    FEATURES_CACHE = Path("C:/ProgramData/Ableton/Live 12 Suite/Resources/MIDI Remote Scripts/libreria/reggaeton/.features_cache.json")
+    
+    def __init__(self, features_data: Optional[Dict] = None):
+        """
+        Inicializa el motor de embeddings.
+        
+        Args:
+            features_data: Datos de features precargados (opcional)
+        """
+        self.embeddings: Dict[str, np.ndarray] = {}
+        self.normalized_embeddings: Dict[str, np.ndarray] = {}
+        self.min_values: Optional[np.ndarray] = None
+        self.max_values: Optional[np.ndarray] = None
+        self.features_data = features_data or {}
+        
+        # Cargar embeddings existentes si hay
+        self._load_embeddings()
+    
+    def _key_to_number(self, key: str) -> float:
+        """
+        Convierte una key musical (ej: 'C#m', 'F', 'Ab') a número 0-11.
+        
+        Args:
+            key: Key en formato string (puede incluir 'm' para menor)
+            
+        Returns:
+            float: Número de la key (0-11) o 0 si no se reconoce
+        """
+        if not key or key == "":
+            return 0.0
+        
+        # Limpiar (quitar espacios, 'm' de menor, números)
+        key_clean = key.strip().upper()
+        key_clean = key_clean.replace('M', '').replace('MINOR', '').replace('MAJOR', '')
+        key_clean = ''.join([c for c in key_clean if c.isalpha() or c == '#'])
+        
+        # Extraer nota base (1-2 caracteres)
+        if len(key_clean) >= 2 and key_clean[1] in ['#', 'B']:
+            note = key_clean[:2]
+        else:
+            note = key_clean[:1] if key_clean else 'C'
+        
+        return float(NOTE_TO_NUMBER.get(note, 0))
+    
+    def _bpm_to_normalized(self, bpm: float) -> float:
+        """
+        Normaliza BPM a rango 0-1 (asumiendo rango típico 60-200).
+        
+        Args:
+            bpm: BPM del sample
+            
+        Returns:
+            float: BPM normalizado (0-1)
+        """
+        if bpm <= 0:
+            return 0.5  # Valor neutral si no hay BPM
+        
+        # Rango típico de música electrónica: 60-200 BPM
+        min_bpm, max_bpm = 60.0, 200.0
+        normalized = (bpm - min_bpm) / (max_bpm - min_bpm)
+        return np.clip(normalized, 0.0, 1.0)
+    
+    def create_embedding(self, features: Dict) -> np.ndarray:
+        """
+        Crea un vector de embedding de ~20 dimensiones a partir de features.
+        
+        Args:
+            features: Diccionario con features del sample
+            
+        Returns:
+            np.ndarray: Vector de embedding (20 dimensiones)
+        """
+        embedding = np.zeros(self.EMBEDDING_DIM, dtype=np.float32)
+        
+        # 1. BPM normalizado (índice 0)
+        bpm = features.get('bpm', 0)
+        embedding[0] = self._bpm_to_normalized(bpm)
+        
+        # 2. Key convertida a número (índice 1)
+        key = features.get('key', '')
+        embedding[1] = self._key_to_number(key) / 11.0  # Normalizar 0-1
+        
+        # 3. RMS (índice 2) - ya viene en dB, normalizar -60 a 0 dB
+        rms = features.get('rms', -30)
+        embedding[2] = np.clip((rms - (-60)) / 60.0, 0.0, 1.0)
+        
+        # 4. Spectral Centroid (índice 3) - normalizar 0-10000 Hz
+        sc = features.get('spectral_centroid', 2000)
+        embedding[3] = np.clip(sc / 10000.0, 0.0, 1.0)
+        
+        # 5. Spectral Rolloff (índice 4) - normalizar 0-20000 Hz
+        sr = features.get('spectral_rolloff', 8000)
+        embedding[4] = np.clip(sr / 20000.0, 0.0, 1.0)
+        
+        # 6. Zero Crossing Rate (índice 5) - ya está en 0-1
+        zcr = features.get('zero_crossing_rate', 0.1)
+        embedding[5] = np.clip(zcr, 0.0, 1.0)
+        
+        # 7-19. MFCCs (13 coeficientes) - índices 6-18
+        mfccs = features.get('mfccs', [0] * 13)
+        if len(mfccs) < 13:
+            mfccs = list(mfccs) + [0] * (13 - len(mfccs))
+        # Los MFCCs típicamente están en rango -100 a 100, normalizar
+        for i in range(13):
+            embedding[6 + i] = np.clip((mfccs[i] + 100) / 200.0, 0.0, 1.0)
+        
+        # 20. Onset Strength (índice 19) - ya está en 0-1 típicamente
+        onset = features.get('onset_strength', 0.5)
+        embedding[19] = np.clip(onset, 0.0, 1.0)
+        
+        # 21. Duration (índice 20, pero no hay espacio... incluir en índice 0?)
+        # Reemplazar: usar índice 0 como duración normalizada en lugar de BPM
+        # o expandir dimensión... vamos a usar índice 0 como duración
+        # y mover BPM al final si hay espacio
+        # Ajuste: usar los primeros valores de forma diferente
+        
+        # Recalcular con ajuste:
+        # 0: Duration, 1: BPM, 2: Key, 3: RMS, 4: SC, 5: SR, 6: ZCR, 7-19: MFCCs
+        duration = features.get('duration', 1.0)
+        
+        embedding = np.zeros(self.EMBEDDING_DIM, dtype=np.float32)
+        embedding[0] = np.clip(duration / 10.0, 0.0, 1.0)  # Normalizar 0-10 segundos
+        embedding[1] = self._bpm_to_normalized(bpm)
+        embedding[2] = self._key_to_number(key) / 11.0
+        embedding[3] = np.clip((rms - (-60)) / 60.0, 0.0, 1.0)
+        embedding[4] = np.clip(sc / 10000.0, 0.0, 1.0)
+        embedding[5] = np.clip(sr / 20000.0, 0.0, 1.0)
+        embedding[6] = np.clip(zcr, 0.0, 1.0)
+        
+        # MFCCs en índices 7-19 (13 coeficientes)
+        for i in range(13):
+            if i < len(mfccs):
+                embedding[7 + i] = np.clip((mfccs[i] + 100) / 200.0, 0.0, 1.0)
+            else:
+                embedding[7 + i] = 0.5
+        
+        return embedding
+    
+    def normalize_embeddings(self) -> None:
+        """
+        Normaliza todos los embeddings usando min-max scaling.
+        Cada dimensión se escala independientemente al rango [0, 1].
+        """
+        if not self.embeddings:
+            return
+        
+        # Convertir a matriz numpy
+        paths = list(self.embeddings.keys())
+        matrix = np.array([self.embeddings[p] for p in paths], dtype=np.float32)
+        
+        # Calcular min y max por dimensión
+        self.min_values = matrix.min(axis=0)
+        self.max_values = matrix.max(axis=0)
+        
+        # Evitar división por cero
+        ranges = self.max_values - self.min_values
+        ranges[ranges == 0] = 1.0
+        
+        # Normalizar
+        normalized_matrix = (matrix - self.min_values) / ranges
+        
+        # Guardar embeddings normalizados
+        self.normalized_embeddings = {
+            path: normalized_matrix[i] 
+            for i, path in enumerate(paths)
+        }
+    
+    def build_from_features(self, features_data: Optional[Dict] = None) -> None:
+        """
+        Construye embeddings a partir de datos de features.
+        
+        Args:
+            features_data: Diccionario con features de samples
+        """
+        if features_data is None:
+            features_data = self.features_data
+        
+        if not features_data or 'samples' not in features_data:
+            # Intentar cargar desde archivo
+            if self.FEATURES_CACHE.exists():
+                with open(self.FEATURES_CACHE, 'r') as f:
+                    features_data = json.load(f)
+        
+        if not features_data or 'samples' not in features_data:
+            print("[EmbeddingEngine] No features data available")
+            return
+        
+        samples = features_data.get('samples', {})
+        print(f"[EmbeddingEngine] Building embeddings for {len(samples)} samples...")
+        
+        self.embeddings = {}
+        for path, features in samples.items():
+            try:
+                embedding = self.create_embedding(features)
+                self.embeddings[path] = embedding
+            except Exception as e:
+                print(f"[EmbeddingEngine] Error creating embedding for {path}: {e}")
+        
+        # Normalizar
+        self.normalize_embeddings()
+        
+        print(f"[EmbeddingEngine] Created {len(self.embeddings)} embeddings")
+    
+    def save_embeddings(self) -> None:
+        """
+        Guarda los embeddings normalizados en archivo JSON.
+        """
+        if not self.normalized_embeddings:
+            print("[EmbeddingEngine] No embeddings to save")
+            return
+        
+        # Serializar embeddings como listas
+        data = {
+            'version': '1.0',
+            'dimensions': self.EMBEDDING_DIM,
+            'total_samples': len(self.normalized_embeddings),
+            'created_at': str(np.datetime64('now')),
+            'min_values': self.min_values.tolist() if self.min_values is not None else None,
+            'max_values': self.max_values.tolist() if self.max_values is not None else None,
+            'embeddings': {
+                path: embedding.tolist()
+                for path, embedding in self.normalized_embeddings.items()
+            }
+        }
+        
+        # Asegurar que existe el directorio
+        self.EMBEDDINGS_FILE.parent.mkdir(parents=True, exist_ok=True)
+        
+        with open(self.EMBEDDINGS_FILE, 'w') as f:
+            json.dump(data, f, indent=2)
+        
+        print(f"[EmbeddingEngine] Saved {len(self.normalized_embeddings)} embeddings to {self.EMBEDDINGS_FILE}")
+    
+    def _load_embeddings(self) -> bool:
+        """
+        Carga embeddings desde archivo si existe.
+        
+        Returns:
+            bool: True si se cargaron exitosamente
+        """
+        if not self.EMBEDDINGS_FILE.exists():
+            return False
+        
+        try:
+            with open(self.EMBEDDINGS_FILE, 'r') as f:
+                data = json.load(f)
+            
+            self.EMBEDDING_DIM = data.get('dimensions', 20)
+            self.min_values = np.array(data.get('min_values')) if data.get('min_values') else None
+            self.max_values = np.array(data.get('max_values')) if data.get('max_values') else None
+            
+            self.normalized_embeddings = {
+                path: np.array(emb, dtype=np.float32)
+                for path, emb in data.get('embeddings', {}).items()
+            }
+            
+            self.embeddings = self.normalized_embeddings.copy()
+            
+            print(f"[EmbeddingEngine] Loaded {len(self.normalized_embeddings)} embeddings from cache")
+            return True
+            
+        except Exception as e:
+            print(f"[EmbeddingEngine] Error loading embeddings: {e}")
+            return False
+    
+    def cosine_distance(self, emb1: np.ndarray, emb2: np.ndarray) -> float:
+        """
+        Calcula la distancia coseno entre dos embeddings.
+        
+        Args:
+            emb1: Primer embedding
+            emb2: Segundo embedding
+            
+        Returns:
+            float: Distancia coseno (0 = idénticos, 1 = opuestos)
+        """
+        # Normalizar vectores
+        norm1 = np.linalg.norm(emb1)
+        norm2 = np.linalg.norm(emb2)
+        
+        if norm1 == 0 or norm2 == 0:
+            return 1.0
+        
+        similarity = np.dot(emb1, emb2) / (norm1 * norm2)
+        # Convertir a distancia (0 = similar, 1 = diferente)
+        return 1.0 - np.clip(similarity, -1.0, 1.0)
+    
+    def euclidean_distance(self, emb1: np.ndarray, emb2: np.ndarray) -> float:
+        """
+        Calcula la distancia euclidiana entre dos embeddings.
+        
+        Args:
+            emb1: Primer embedding
+            emb2: Segundo embedding
+            
+        Returns:
+            float: Distancia euclidiana normalizada
+        """
+        diff = emb1 - emb2
+        return np.sqrt(np.sum(diff ** 2)) / np.sqrt(self.EMBEDDING_DIM)
+    
+    def find_similar(self, sample_path: str, top_n: int = 10, 
+                     use_cosine: bool = True) -> List[Tuple[str, float]]:
+        """
+        Encuentra los samples más similares a un sample dado.
+        
+        Args:
+            sample_path: Ruta del sample de referencia
+            top_n: Número de resultados a retornar
+            use_cosine: True para usar distancia coseno, False para euclidiana
+            
+        Returns:
+            List[Tuple[str, float]]: Lista de (path, distancia) ordenada por similitud
+        """
+        if not self.normalized_embeddings:
+            print("[EmbeddingEngine] No embeddings available")
+            return []
+        
+        # Usar path absoluto
+        sample_path = str(Path(sample_path).resolve())
+        
+        if sample_path not in self.normalized_embeddings:
+            print(f"[EmbeddingEngine] Sample not found: {sample_path}")
+            return []
+        
+        reference_emb = self.normalized_embeddings[sample_path]
+        
+        # Calcular distancias
+        distances = []
+        distance_func = self.cosine_distance if use_cosine else self.euclidean_distance
+        
+        for path, emb in self.normalized_embeddings.items():
+            if path != sample_path:  # Excluir el propio sample
+                dist = distance_func(reference_emb, emb)
+                distances.append((path, dist))
+        
+        # Ordenar por distancia (menor = más similar)
+        distances.sort(key=lambda x: x[1])
+        
+        return distances[:top_n]
+    
+    def find_by_audio_reference(self, audio_file_path: str, top_n: int = 20,
+                                use_cosine: bool = True) -> List[Tuple[str, float]]:
+        """
+        Analiza un archivo de audio y encuentra samples similares.
+        
+        Args:
+            audio_file_path: Ruta del archivo de audio a analizar
+            top_n: Número de samples similares a retornar
+            use_cosine: True para usar distancia coseno
+            
+        Returns:
+            List[Tuple[str, float]]: Lista de (path, distancia) ordenada por similitud
+        """
+        if not self.normalized_embeddings:
+            print("[EmbeddingEngine] No embeddings available")
+            return []
+        
+        # Intentar usar el analyzer para extraer features
+        features = None
+        
+        if HAS_ANALYZER:
+            try:
+                analyzer = LibreriaAnalyzer()
+                features = analyzer.analyze_single_file(audio_file_path)
+            except Exception as e:
+                print(f"[EmbeddingEngine] Error analyzing reference: {e}")
+        
+        if features is None:
+            # Fallback: crear features mínimas
+            print("[EmbeddingEngine] Using fallback analysis")
+            features = self._fallback_analyze(audio_file_path)
+        
+        if features is None:
+            print(f"[EmbeddingEngine] Could not analyze: {audio_file_path}")
+            return []
+        
+        # Crear embedding para el audio de referencia
+        reference_emb = self.create_embedding(features)
+        
+        # Normalizar usando los mismos min/max que el índice
+        if self.min_values is not None and self.max_values is not None:
+            ranges = self.max_values - self.min_values
+            ranges[ranges == 0] = 1.0
+            reference_emb = (reference_emb - self.min_values) / ranges
+        
+        # Calcular distancias
+        distances = []
+        distance_func = self.cosine_distance if use_cosine else self.euclidean_distance
+        
+        for path, emb in self.normalized_embeddings.items():
+            dist = distance_func(reference_emb, emb)
+            distances.append((path, dist))
+        
+        # Ordenar por distancia
+        distances.sort(key=lambda x: x[1])
+        
+        return distances[:top_n]
+    
+    def _fallback_analyze(self, audio_file_path: str) -> Optional[Dict]:
+        """
+        Análisis fallback básico cuando librosa no está disponible.
+        
+        Args:
+            audio_file_path: Ruta del archivo
+            
+        Returns:
+            Dict con features mínimas o None
+        """
+        try:
+            # Información básica del archivo
+            stat = os.stat(audio_file_path)
+            
+            # Valores por defecto basados en reggaetón típico
+            return {
+                'bpm': 95.0,
+                'key': 'C',
+                'rms': -12.0,
+                'spectral_centroid': 3000.0,
+                'spectral_rolloff': 8000.0,
+                'zero_crossing_rate': 0.1,
+                'mfccs': [0.0] * 13,
+                'onset_strength': 0.6,
+                'duration': 4.0,
+                'sample_rate': 44100,
+                'channels': 2
+            }
+        except Exception:
+            return None
+    
+    def get_embedding(self, sample_path: str) -> Optional[np.ndarray]:
+        """
+        Obtiene el embedding de un sample específico.
+        
+        Args:
+            sample_path: Ruta del sample
+            
+        Returns:
+            np.ndarray: Embedding del sample o None si no existe
+        """
+        sample_path = str(Path(sample_path).resolve())
+        return self.normalized_embeddings.get(sample_path)
+    
+    def get_stats(self) -> Dict:
+        """
+        Retorna estadísticas de los embeddings.
+        
+        Returns:
+            Dict con estadísticas
+        """
+        if not self.normalized_embeddings:
+            return {'total_samples': 0}
+        
+        matrix = np.array(list(self.normalized_embeddings.values()))
+        
+        return {
+            'total_samples': len(self.normalized_embeddings),
+            'dimensions': self.EMBEDDING_DIM,
+            'mean_per_dim': matrix.mean(axis=0).tolist(),
+            'std_per_dim': matrix.std(axis=0).tolist(),
+            'min_per_dim': matrix.min(axis=0).tolist(),
+            'max_per_dim': matrix.max(axis=0).tolist()
+        }
+
+
+# Funciones de conveniencia para uso directo
+
+def create_embeddings_index(features_file: Optional[str] = None,
+                            output_file: Optional[str] = None) -> EmbeddingEngine:
+    """
+    Crea el índice de embeddings completo.
+    
+    Args:
+        features_file: Ruta al archivo de features (default: .features_cache.json)
+        output_file: Ruta de salida (default: .embeddings_index.json)
+        
+    Returns:
+        EmbeddingEngine configurado con embeddings creados
+    """
+    engine = EmbeddingEngine()
+    
+    if features_file:
+        with open(features_file, 'r') as f:
+            features_data = json.load(f)
+        engine.build_from_features(features_data)
+    else:
+        engine.build_from_features()
+    
+    if output_file:
+        engine.EMBEDDINGS_FILE = Path(output_file)
+    
+    engine.save_embeddings()
+    return engine
+
+
+def find_similar_samples(sample_path: str, top_n: int = 10,
+                         embeddings_file: Optional[str] = None) -> List[Tuple[str, float]]:
+    """
+    Función de conveniencia para encontrar samples similares.
+    
+    Args:
+        sample_path: Ruta del sample de referencia
+        top_n: Número de resultados
+        embeddings_file: Ruta al archivo de embeddings (opcional)
+        
+    Returns:
+        Lista de (path, distancia)
+    """
+    engine = EmbeddingEngine()
+    
+    if embeddings_file:
+        engine.EMBEDDINGS_FILE = Path(embeddings_file)
+        engine._load_embeddings()
+    
+    return engine.find_similar(sample_path, top_n)
+
+
+def find_samples_like_audio(audio_path: str, top_n: int = 20,
+                            embeddings_file: Optional[str] = None) -> List[Tuple[str, float]]:
+    """
+    Función de conveniencia para encontrar samples similares a un audio.
+    
+    Args:
+        audio_path: Ruta del audio de referencia
+        top_n: Número de resultados
+        embeddings_file: Ruta al archivo de embeddings (opcional)
+        
+    Returns:
+        Lista de (path, distancia)
+    """
+    engine = EmbeddingEngine()
+    
+    if embeddings_file:
+        engine.EMBEDDINGS_FILE = Path(embeddings_file)
+        engine._load_embeddings()
+    
+    return engine.find_by_audio_reference(audio_path, top_n)
+
+
+def cosine_similarity(emb1, emb2) -> float:
+    """Compatibility helper used by server.py."""
+    v1 = np.asarray(emb1, dtype=float)
+    v2 = np.asarray(emb2, dtype=float)
+    denom = np.linalg.norm(v1) * np.linalg.norm(v2)
+    if denom == 0:
+        return 0.0
+    return float(np.dot(v1, v2) / denom)
+
+
+# Test simple
+if __name__ == '__main__':
+    print("[EmbeddingEngine] Running basic tests...")
+    
+    # Test 1: Crear embedding de features dummy
+    dummy_features = {
+        'bpm': 95,
+        'key': 'C',
+        'rms': -12.5,
+        'spectral_centroid': 2500.0,
+        'spectral_rolloff': 8000.0,
+        'zero_crossing_rate': 0.15,
+        'mfccs': [0.5, -0.3, 0.1, 0.2, -0.1, 0.0, 0.3, -0.2, 0.1, 0.0, -0.1, 0.2, 0.1],
+        'onset_strength': 0.85,
+        'duration': 0.5,
+        'sample_rate': 44100,
+        'channels': 1
+    }
+    
+    engine = EmbeddingEngine()
+    emb = engine.create_embedding(dummy_features)
+    
+    print(f"[Test] Created embedding with shape: {emb.shape}")
+    print(f"[Test] Embedding values: {emb[:5]}...")
+    print(f"[Test] Embedding range: [{emb.min():.3f}, {emb.max():.3f}]")
+    
+    # Test 2: Normalización
+    engine.embeddings = {
+        'sample1.wav': emb,
+        'sample2.wav': emb * 0.8,
+        'sample3.wav': emb * 1.2
+    }
+    engine.normalize_embeddings()
+    
+    print(f"[Test] Normalized {len(engine.normalized_embeddings)} embeddings")
+    
+    # Test 3: Distancia coseno
+    dist = engine.cosine_distance(emb, emb * 0.9)
+    print(f"[Test] Cosine distance (emb vs 0.9*emb): {dist:.4f}")
+    
+    print("[EmbeddingEngine] All tests passed!")