Initial commit: AbletonMCP-AI complete system
- MCP Server with audio fallback, sample management - Song generator with bus routing - Reference listener and audio resampler - Vector-based sample search - Master chain with limiter and calibration - Fix: Audio fallback now works without M4L - Fix: Full song detection in sample loader Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
163
AbletonMCP_AI/MCP_Server/vector_manager.py
Normal file
163
AbletonMCP_AI/MCP_Server/vector_manager.py
Normal file
@@ -0,0 +1,163 @@
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Tuple
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
import numpy as np
|
||||
HAS_ML = True
|
||||
except ImportError:
|
||||
HAS_ML = False
|
||||
|
||||
logger = logging.getLogger("VectorManager")
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
class VectorManager:
|
||||
def __init__(self, library_dir: str):
|
||||
self.library_dir = Path(library_dir)
|
||||
self.index_file = self.library_dir / ".sample_embeddings.json"
|
||||
|
||||
self.model = None
|
||||
self.embeddings = []
|
||||
self.metadata = []
|
||||
|
||||
if HAS_ML:
|
||||
try:
|
||||
# Load a very lightweight model for fast embeddings
|
||||
logger.info("Loading sentence-transformers model (all-MiniLM-L6-v2)...")
|
||||
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load embedding model: {e}")
|
||||
|
||||
self._load_or_build_index()
|
||||
|
||||
def _load_or_build_index(self):
|
||||
if self.index_file.exists():
|
||||
logger.info("Loading existing vector index...")
|
||||
try:
|
||||
with open(self.index_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
self.metadata = data.get('metadata', [])
|
||||
|
||||
if HAS_ML and 'embeddings' in data:
|
||||
self.embeddings = np.array(data['embeddings'])
|
||||
else:
|
||||
logger.warning("No embeddings found in loaded index.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load index: {e}")
|
||||
self._build_index()
|
||||
else:
|
||||
self._build_index()
|
||||
|
||||
def _build_index(self):
|
||||
logger.info(f"Scanning library {self.library_dir} for new embeddings...")
|
||||
extensions = {'.wav', '.aif', '.aiff', '.mp3'}
|
||||
|
||||
files_to_process = []
|
||||
for ext in extensions:
|
||||
files_to_process.extend(self.library_dir.rglob('*' + ext))
|
||||
files_to_process.extend(self.library_dir.rglob('*' + ext.upper()))
|
||||
|
||||
if not files_to_process:
|
||||
logger.warning(f"No audio files found in {self.library_dir} to embed.")
|
||||
return
|
||||
|
||||
texts_to_embed = []
|
||||
self.metadata = []
|
||||
|
||||
for f in set(files_to_process):
|
||||
# Clean up the name for better semantic understanding
|
||||
name = f.stem
|
||||
clean_name = name.replace('_', ' ').replace('-', ' ').lower()
|
||||
|
||||
# Use relative path as part of the context since folders represent duration and type
|
||||
try:
|
||||
rel_path = f.relative_to(self.library_dir)
|
||||
parts = rel_path.parts[:-1]
|
||||
path_context = " ".join(parts).lower()
|
||||
except ValueError:
|
||||
path_context = ""
|
||||
|
||||
description = f"{clean_name} {path_context}"
|
||||
texts_to_embed.append(description)
|
||||
|
||||
self.metadata.append({
|
||||
'path': str(f),
|
||||
'name': name,
|
||||
'description': description
|
||||
})
|
||||
|
||||
if HAS_ML and self.model:
|
||||
logger.info(f"Generating vectors for {len(texts_to_embed)} samples. This might take a moment...")
|
||||
embeddings = self.model.encode(texts_to_embed)
|
||||
self.embeddings = embeddings
|
||||
|
||||
# Save the vectors
|
||||
with open(self.index_file, 'w', encoding='utf-8') as f:
|
||||
json.dump({
|
||||
'metadata': self.metadata,
|
||||
'embeddings': embeddings.tolist()
|
||||
}, f)
|
||||
logger.info(f"Saved {len(self.metadata)} embeddings to {self.index_file}.")
|
||||
else:
|
||||
logger.error("ML libraries not installed. Run 'pip install sentence-transformers scikit-learn numpy'")
|
||||
|
||||
def semantic_search(self, query: str, limit: int = 5) -> List[Dict]:
|
||||
"""
|
||||
Returns a list of metadata dicts sorted by semantic relevance down to the limit.
|
||||
Fallback to basic substring matching if ML is unavailable.
|
||||
"""
|
||||
if not HAS_ML or self.model is None or len(self.embeddings) == 0:
|
||||
logger.warning("ML unavailable, falling back to substring search.")
|
||||
return self._fallback_search(query, limit)
|
||||
|
||||
logger.info(f"Performing semantic search for: '{query}'")
|
||||
query_emb = self.model.encode([query])
|
||||
|
||||
# Calculate cosine similarity between query and all stored embeddings
|
||||
similarities = cosine_similarity(query_emb, self.embeddings)[0]
|
||||
|
||||
# Get top indices
|
||||
top_indices = np.argsort(similarities)[::-1][:limit]
|
||||
|
||||
results = []
|
||||
for idx in top_indices:
|
||||
score = float(similarities[idx])
|
||||
meta = self.metadata[idx].copy()
|
||||
meta['score'] = score
|
||||
results.append(meta)
|
||||
|
||||
return results
|
||||
|
||||
def _fallback_search(self, query: str, limit: int = 5) -> List[Dict]:
|
||||
query = query.lower()
|
||||
scored = []
|
||||
for m in self.metadata:
|
||||
score = 0
|
||||
if query in m['name'].lower():
|
||||
score += 10
|
||||
if query in m['description'].lower():
|
||||
score += 5
|
||||
|
||||
if score > 0:
|
||||
scored.append((score, m))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [m for s, m in scored[:limit]]
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if len(sys.argv) > 1:
|
||||
path = sys.argv[1]
|
||||
vm = VectorManager(path)
|
||||
if len(sys.argv) > 2:
|
||||
query = sys.argv[2]
|
||||
res = vm.semantic_search(query)
|
||||
print("Search Results for", query)
|
||||
for r in res:
|
||||
print(r['score'], r['name'], r['path'])
|
||||
else:
|
||||
print("Usage: python vector_manager.py <library_dir> [search_query]")
|
||||
Reference in New Issue
Block a user