Initial commit: AbletonMCP-AI complete system

- MCP Server with audio fallback, sample management - Song generator with bus routing - Reference listener and audio resampler - Vector-based sample search - Master chain with limiter and calibration - Fix: Audio fallback now works without M4L - Fix: Full song detection in sample loader Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-28 22:53:10 -03:00
commit 6ec8663954
120 changed files with 59101 additions and 0 deletions
--- a/AbletonMCP_AI/MCP_Server/vector_manager.py
+++ b/AbletonMCP_AI/MCP_Server/vector_manager.py
@@ -0,0 +1,163 @@
+import os
+import json
+import logging
+from pathlib import Path
+from typing import List, Dict, Tuple
+
+try:
+    from sentence_transformers import SentenceTransformer
+    from sklearn.metrics.pairwise import cosine_similarity
+    import numpy as np
+    HAS_ML = True
+except ImportError:
+    HAS_ML = False
+
+logger = logging.getLogger("VectorManager")
+logging.basicConfig(level=logging.INFO)
+
+class VectorManager:
+    def __init__(self, library_dir: str):
+        self.library_dir = Path(library_dir)
+        self.index_file = self.library_dir / ".sample_embeddings.json"
+        
+        self.model = None
+        self.embeddings = []
+        self.metadata = []
+        
+        if HAS_ML:
+            try:
+                # Load a very lightweight model for fast embeddings
+                logger.info("Loading sentence-transformers model (all-MiniLM-L6-v2)...")
+                self.model = SentenceTransformer('all-MiniLM-L6-v2')
+            except Exception as e:
+                logger.error(f"Failed to load embedding model: {e}")
+                
+        self._load_or_build_index()
+
+    def _load_or_build_index(self):
+        if self.index_file.exists():
+            logger.info("Loading existing vector index...")
+            try:
+                with open(self.index_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    self.metadata = data.get('metadata', [])
+                    
+                    if HAS_ML and 'embeddings' in data:
+                        self.embeddings = np.array(data['embeddings'])
+                    else:
+                        logger.warning("No embeddings found in loaded index.")
+            except Exception as e:
+                logger.error(f"Failed to load index: {e}")
+                self._build_index()
+        else:
+            self._build_index()
+
+    def _build_index(self):
+        logger.info(f"Scanning library {self.library_dir} for new embeddings...")
+        extensions = {'.wav', '.aif', '.aiff', '.mp3'}
+        
+        files_to_process = []
+        for ext in extensions:
+            files_to_process.extend(self.library_dir.rglob('*' + ext))
+            files_to_process.extend(self.library_dir.rglob('*' + ext.upper()))
+
+        if not files_to_process:
+            logger.warning(f"No audio files found in {self.library_dir} to embed.")
+            return
+
+        texts_to_embed = []
+        self.metadata = []
+        
+        for f in set(files_to_process):
+            # Clean up the name for better semantic understanding
+            name = f.stem
+            clean_name = name.replace('_', ' ').replace('-', ' ').lower()
+            
+            # Use relative path as part of the context since folders represent duration and type
+            try:
+                rel_path = f.relative_to(self.library_dir)
+                parts = rel_path.parts[:-1]
+                path_context = " ".join(parts).lower()
+            except ValueError:
+                path_context = ""
+
+            description = f"{clean_name} {path_context}"
+            texts_to_embed.append(description)
+            
+            self.metadata.append({
+                'path': str(f),
+                'name': name,
+                'description': description
+            })
+
+        if HAS_ML and self.model:
+            logger.info(f"Generating vectors for {len(texts_to_embed)} samples. This might take a moment...")
+            embeddings = self.model.encode(texts_to_embed)
+            self.embeddings = embeddings
+            
+            # Save the vectors
+            with open(self.index_file, 'w', encoding='utf-8') as f:
+                json.dump({
+                    'metadata': self.metadata,
+                    'embeddings': embeddings.tolist()
+                }, f)
+            logger.info(f"Saved {len(self.metadata)} embeddings to {self.index_file}.")
+        else:
+            logger.error("ML libraries not installed. Run 'pip install sentence-transformers scikit-learn numpy'")
+
+    def semantic_search(self, query: str, limit: int = 5) -> List[Dict]:
+        """
+        Returns a list of metadata dicts sorted by semantic relevance down to the limit.
+        Fallback to basic substring matching if ML is unavailable.
+        """
+        if not HAS_ML or self.model is None or len(self.embeddings) == 0:
+            logger.warning("ML unavailable, falling back to substring search.")
+            return self._fallback_search(query, limit)
+            
+        logger.info(f"Performing semantic search for: '{query}'")
+        query_emb = self.model.encode([query])
+        
+        # Calculate cosine similarity between query and all stored embeddings
+        similarities = cosine_similarity(query_emb, self.embeddings)[0]
+        
+        # Get top indices
+        top_indices = np.argsort(similarities)[::-1][:limit]
+        
+        results = []
+        for idx in top_indices:
+            score = float(similarities[idx])
+            meta = self.metadata[idx].copy()
+            meta['score'] = score
+            results.append(meta)
+            
+        return results
+
+    def _fallback_search(self, query: str, limit: int = 5) -> List[Dict]:
+        query = query.lower()
+        scored = []
+        for m in self.metadata:
+            score = 0
+            if query in m['name'].lower():
+                score += 10
+            if query in m['description'].lower():
+                score += 5
+                
+            if score > 0:
+                scored.append((score, m))
+                
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [m for s, m in scored[:limit]]
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        path = sys.argv[1]
+        vm = VectorManager(path)
+        if len(sys.argv) > 2:
+            query = sys.argv[2]
+            res = vm.semantic_search(query)
+            print("Search Results for", query)
+            for r in res:
+                print(r['score'], r['name'], r['path'])
+    else:
+        print("Usage: python vector_manager.py <library_dir> [search_query]")