Files
ableton-mcp-ai/mcp_server/engines/coherence_scorer.py
OpenCode Agent 5ce8187c65 feat: Implement senior audio injection with 5 fallback methods
- Add _cmd_create_arrangement_audio_pattern with 5-method fallback chain
- Method 1: track.insert_arrangement_clip() [Live 12+]
- Method 2: track.create_audio_clip() [Live 11+]
- Method 3: arrangement_clips.add_new_clip() [Live 12+]
- Method 4: Session->duplicate_clip_to_arrangement [Legacy]
- Method 5: Session->Recording [Universal]

- Add _cmd_duplicate_clip_to_arrangement for session-to-arrangement workflow
- Update skills documentation
- Verified: 3 clips created at positions [0, 4, 8] in Arrangement View

Closes: Audio injection in Arrangement View
2026-04-12 14:02:32 -03:00

841 lines
31 KiB
Python

"""
CoherenceScorer - Advanced Coherence Calculation Engine
Calculates multi-dimensional coherence scores between audio samples using
timbre similarity (MFCC), transient compatibility, spectral balance, and
energy consistency.
Professional-grade tool with 0.90 threshold enforcement.
File: AbletonMCP_AI/mcp_server/engines/coherence_scorer.py
"""
import os
import numpy as np
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from pathlib import Path
class CoherenceError(Exception):
"""Raised when coherence score falls below professional threshold."""
def __init__(self, score: float, weak_components: List[str], suggestions: List[str]):
self.score = score
self.weak_components = weak_components
self.suggestions = suggestions
super().__init__(self._format_message())
def _format_message(self) -> str:
msg = f"\n{'='*60}\n"
msg += f"COHERENCE ERROR: Professional threshold not met\n"
msg += f"{'='*60}\n"
msg += f"Current Score: {self.score:.3f} (MIN_COHERENCE: 0.900)\n"
msg += f"Status: {'PASS ✓' if self.score >= 0.90 else 'FAIL ✗'}\n\n"
if self.weak_components:
msg += f"Weak Components ({len(self.weak_components)}):\n"
for comp in self.weak_components:
msg += f"{comp}\n"
if self.suggestions:
msg += f"\nSuggestions for Improvement:\n"
for i, sug in enumerate(self.suggestions, 1):
msg += f" {i}. {sug}\n"
msg += f"{'='*60}\n"
return msg
@dataclass
class AudioFeatures:
"""Container for extracted audio features."""
mfccs: np.ndarray # MFCC coefficients (timbre)
spectral_centroid: float # Brightness
spectral_rolloff: float # Bandwidth
spectral_flux: np.ndarray # Spectral change (transients)
zero_crossing_rate: float # Noisiness
rms_energy: np.ndarray # Loudness envelope
attack_time: float # Transient attack
sustain_level: float # Sustain level
low_energy: float # Low band energy (20-250Hz)
mid_energy: float # Mid band energy (250-2000Hz)
high_energy: float # High band energy (2000-20000Hz)
duration: float # Audio duration in seconds
sample_rate: int # Sample rate
@dataclass
class ScoreBreakdown:
"""Detailed breakdown of coherence score components."""
overall_score: float
timbre_similarity: float # MFCC cosine similarity (40%)
transient_compatibility: float # Attack characteristic match (30%)
spectral_balance: float # Low/mid/high ratio match (20%)
energy_consistency: float # RMS correlation (10%)
is_professional: bool
weak_components: List[str]
suggestions: List[str]
def to_dict(self) -> Dict:
return {
'overall_score': round(self.overall_score, 4),
'timbre_similarity': round(self.timbre_similarity, 4),
'transient_compatibility': round(self.transient_compatibility, 4),
'spectral_balance': round(self.spectral_balance, 4),
'energy_consistency': round(self.energy_consistency, 4),
'is_professional': self.is_professional,
'weak_components': self.weak_components,
'suggestions': self.suggestions
}
class CoherenceScorer:
"""
Professional coherence calculation engine.
Calculates multi-dimensional coherence scores between audio samples
using real audio feature extraction and weighted component analysis.
Weights:
- Timbre similarity (MFCC): 40%
- Transient compatibility: 30%
- Spectral balance: 20%
- Energy consistency: 10%
Professional threshold: 0.90 (MIN_COHERENCE)
"""
# Professional threshold - no compromise
MIN_COHERENCE = 0.90
# Component weights (must sum to 1.0)
WEIGHTS = {
'timbre': 0.40,
'transient': 0.30,
'spectral': 0.20,
'energy': 0.10
}
# Thresholds for component quality
THRESHOLDS = {
'timbre': 0.75,
'transient': 0.70,
'spectral': 0.65,
'energy': 0.60
}
def __init__(self, sample_rate: int = 22050):
"""
Initialize the CoherenceScorer.
Args:
sample_rate: Target sample rate for analysis (default 22050)
"""
self.sample_rate = sample_rate
self.last_breakdown: Optional[ScoreBreakdown] = None
def _load_audio(self, file_path: str) -> Tuple[np.ndarray, int]:
"""
Load audio file using librosa.
Args:
file_path: Path to audio file (.wav, .mp3, etc.)
Returns:
Tuple of (audio_array, sample_rate)
Raises:
FileNotFoundError: If file doesn't exist
ValueError: If file format unsupported or corrupted
"""
try:
import librosa
except ImportError:
raise ImportError(
"librosa is required for audio analysis. "
"Install with: pip install librosa"
)
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"Audio file not found: {file_path}")
if not path.suffix.lower() in ['.wav', '.mp3', '.aif', '.aiff', '.flac']:
raise ValueError(f"Unsupported audio format: {path.suffix}")
try:
y, sr = librosa.load(file_path, sr=self.sample_rate, mono=True)
if len(y) == 0:
raise ValueError(f"Audio file is empty: {file_path}")
return y, sr
except Exception as e:
raise ValueError(f"Failed to load audio file {file_path}: {str(e)}")
def _extract_features(self, audio: np.ndarray, sr: int) -> AudioFeatures:
"""
Extract comprehensive audio features.
Args:
audio: Audio time series
sr: Sample rate
Returns:
AudioFeatures dataclass with all extracted features
"""
import librosa
# Basic spectral features
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sr))
spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=audio, sr=sr))
spectral_flux = librosa.onset.onset_strength(y=audio, sr=sr)
zcr = np.mean(librosa.feature.zero_crossing_rate(audio))
rms = librosa.feature.rms(y=audio)[0]
# Band energy analysis
# Low: 20-250Hz, Mid: 250-2000Hz, High: 2000-20000Hz
stft = np.abs(librosa.stft(audio))
freqs = librosa.fft_frequencies(sr=sr)
low_mask = (freqs >= 20) & (freqs <= 250)
mid_mask = (freqs > 250) & (freqs <= 2000)
high_mask = (freqs > 2000) & (freqs <= 20000)
low_energy = np.sum(stft[low_mask, :]) / stft.shape[1]
mid_energy = np.sum(stft[mid_mask, :]) / stft.shape[1]
high_energy = np.sum(stft[high_mask, :]) / stft.shape[1]
# Normalize band energies
total_energy = low_energy + mid_energy + high_energy
if total_energy > 0:
low_energy /= total_energy
mid_energy /= total_energy
high_energy /= total_energy
# Transient analysis (attack detection)
onset_env = librosa.onset.onset_strength(y=audio, sr=sr)
onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr)
if len(onset_frames) > 0:
# Calculate average attack time from first transient
first_onset = onset_frames[0]
window_start = max(0, first_onset - 10)
window_end = min(len(audio), first_onset + 50)
if window_end > window_start:
attack_segment = audio[window_start:window_end]
# Attack time: time from 10% to 90% of peak
peak_idx = np.argmax(np.abs(attack_segment))
peak_val = np.abs(attack_segment[peak_idx])
if peak_val > 0:
# Find 10% and 90% points
ten_percent = 0.1 * peak_val
ninety_percent = 0.9 * peak_val
ten_idx = np.where(np.abs(attack_segment[:peak_idx]) >= ten_percent)[0]
ninety_idx = np.where(np.abs(attack_segment[:peak_idx]) >= ninety_percent)[0]
if len(ten_idx) > 0 and len(ninety_idx) > 0:
attack_time = (ninety_idx[0] - ten_idx[0]) / sr * 1000 # ms
else:
attack_time = 10.0 # Default 10ms
else:
attack_time = 10.0
# Sustain level: average after attack
sustain_start = peak_idx + int(0.01 * sr) # 10ms after peak
if sustain_start < len(attack_segment):
sustain_level = np.mean(np.abs(attack_segment[sustain_start:]))
else:
sustain_level = 0.0
else:
attack_time = 10.0
sustain_level = np.mean(np.abs(audio)) * 0.5
else:
attack_time = 50.0 # Long attack for non-transient sounds
sustain_level = np.mean(np.abs(audio))
return AudioFeatures(
mfccs=mfccs,
spectral_centroid=spectral_centroid,
spectral_rolloff=spectral_rolloff,
spectral_flux=spectral_flux,
zero_crossing_rate=zcr,
rms_energy=rms,
attack_time=attack_time,
sustain_level=float(sustain_level),
low_energy=float(low_energy),
mid_energy=float(mid_energy),
high_energy=float(high_energy),
duration=len(audio) / sr,
sample_rate=sr
)
def _calculate_timbre_similarity(self, feat1: AudioFeatures, feat2: AudioFeatures) -> float:
"""
Calculate timbre similarity using MFCC cosine similarity.
Uses mean MFCC vectors and accounts for temporal evolution.
Args:
feat1: Features from first sample
feat2: Features from second sample
Returns:
Similarity score 0.0-1.0
"""
# Mean MFCC vectors
mfcc1_mean = np.mean(feat1.mfccs, axis=1)
mfcc2_mean = np.mean(feat2.mfccs, axis=1)
# Cosine similarity
dot_product = np.dot(mfcc1_mean, mfcc2_mean)
norm1 = np.linalg.norm(mfcc1_mean)
norm2 = np.linalg.norm(mfcc2_mean)
if norm1 == 0 or norm2 == 0:
return 0.0
cosine_sim = dot_product / (norm1 * norm2)
# Convert from [-1, 1] to [0, 1]
similarity = (cosine_sim + 1) / 2
# Also compare spectral centroid (brightness match)
centroid_diff = abs(feat1.spectral_centroid - feat2.spectral_centroid)
max_centroid = max(feat1.spectral_centroid, feat2.spectral_centroid)
if max_centroid > 0:
centroid_sim = 1 - (centroid_diff / max_centroid)
else:
centroid_sim = 1.0
# Weighted combination: 80% MFCC, 20% centroid
final_similarity = 0.8 * similarity + 0.2 * centroid_sim
return float(np.clip(final_similarity, 0.0, 1.0))
def _calculate_transient_compatibility(self, feat1: AudioFeatures, feat2: AudioFeatures) -> float:
"""
Calculate transient/attack characteristic compatibility.
Compares attack times, sustain levels, and spectral flux patterns.
Args:
feat1: Features from first sample
feat2: Features from second sample
Returns:
Compatibility score 0.0-1.0
"""
# Attack time compatibility
attack_diff = abs(feat1.attack_time - feat2.attack_time)
max_attack = max(feat1.attack_time, feat2.attack_time, 1.0)
attack_compatibility = 1 - (attack_diff / max_attack)
# Sustain level compatibility
max_sustain = max(feat1.sustain_level, feat2.sustain_level, 0.001)
sustain_diff = abs(feat1.sustain_level - feat2.sustain_level)
sustain_compatibility = 1 - (sustain_diff / max_sustain)
# Spectral flux pattern correlation
flux1 = feat1.spectral_flux
flux2 = feat2.spectral_flux
# Normalize lengths
min_len = min(len(flux1), len(flux2))
if min_len > 1:
flux1_norm = flux1[:min_len]
flux2_norm = flux2[:min_len]
# Normalize to unit vectors
flux1_norm = flux1_norm / (np.linalg.norm(flux1_norm) + 1e-10)
flux2_norm = flux2_norm / (np.linalg.norm(flux2_norm) + 1e-10)
flux_corr = np.corrcoef(flux1_norm, flux2_norm)[0, 1]
if np.isnan(flux_corr):
flux_corr = 0.0
else:
flux_corr = 0.5
# Weighted combination
# Attack: 40%, Sustain: 30%, Flux correlation: 30%
compatibility = (
0.4 * attack_compatibility +
0.3 * sustain_compatibility +
0.3 * max(0, flux_corr) # Clip negative correlations
)
return float(np.clip(compatibility, 0.0, 1.0))
def _calculate_spectral_balance(self, feat1: AudioFeatures, feat2: AudioFeatures) -> float:
"""
Calculate spectral balance match (low/mid/high ratio comparison).
Args:
feat1: Features from first sample
feat2: Features from second sample
Returns:
Balance score 0.0-1.0
"""
# Energy band ratios
bands1 = np.array([feat1.low_energy, feat1.mid_energy, feat1.high_energy])
bands2 = np.array([feat2.low_energy, feat2.mid_energy, feat2.high_energy])
# Cosine similarity of band distributions
dot = np.dot(bands1, bands2)
norm1 = np.linalg.norm(bands1)
norm2 = np.linalg.norm(bands2)
if norm1 == 0 or norm2 == 0:
return 0.5
balance_sim = dot / (norm1 * norm2)
# Also compare rolloff (high-frequency content boundary)
rolloff_diff = abs(feat1.spectral_rolloff - feat2.spectral_rolloff)
max_rolloff = max(feat1.spectral_rolloff, feat2.spectral_rolloff, 1.0)
rolloff_sim = 1 - (rolloff_diff / max_rolloff)
# Combined: 70% band balance, 30% rolloff match
final_balance = 0.7 * balance_sim + 0.3 * rolloff_sim
return float(np.clip(final_balance, 0.0, 1.0))
def _calculate_energy_consistency(self, feat1: AudioFeatures, feat2: AudioFeatures) -> float:
"""
Calculate energy envelope consistency.
Compares RMS energy patterns and overall loudness.
Args:
feat1: Features from first sample
feat2: Features from second sample
Returns:
Consistency score 0.0-1.0
"""
rms1 = feat1.rms_energy
rms2 = feat2.rms_energy
# Match lengths
min_len = min(len(rms1), len(rms2))
if min_len < 2:
return 0.5
rms1_norm = rms1[:min_len]
rms2_norm = rms2[:min_len]
# Normalize
max_rms1 = np.max(rms1_norm) + 1e-10
max_rms2 = np.max(rms2_norm) + 1e-10
rms1_norm = rms1_norm / max_rms1
rms2_norm = rms2_norm / max_rms2
# Correlation of energy envelopes
corr = np.corrcoef(rms1_norm, rms2_norm)[0, 1]
if np.isnan(corr):
corr = 0.0
# Mean energy similarity
mean1 = np.mean(feat1.rms_energy)
mean2 = np.mean(feat2.rms_energy)
max_mean = max(mean1, mean2, 0.001)
mean_sim = 1 - (abs(mean1 - mean2) / max_mean)
# Combined: 60% correlation, 40% mean level
consistency = 0.6 * max(0, corr) + 0.4 * mean_sim
return float(np.clip(consistency, 0.0, 1.0))
def score_pair(self, sample1_path: str, sample2_path: str, enforce_threshold: bool = True) -> float:
"""
Calculate coherence score between two samples.
Args:
sample1_path: Path to first audio file
sample2_path: Path to second audio file
enforce_threshold: If True, raises CoherenceError if score < 0.90
Returns:
Overall coherence score (0.0-1.0)
Raises:
CoherenceError: If score < MIN_COHERENCE and enforce_threshold=True
FileNotFoundError: If audio files not found
ValueError: If audio loading fails
"""
# Load and extract features
audio1, sr1 = self._load_audio(sample1_path)
audio2, sr2 = self._load_audio(sample2_path)
feat1 = self._extract_features(audio1, sr1)
feat2 = self._extract_features(audio2, sr2)
# Calculate component scores
timbre_score = self._calculate_timbre_similarity(feat1, feat2)
transient_score = self._calculate_transient_compatibility(feat1, feat2)
spectral_score = self._calculate_spectral_balance(feat1, feat2)
energy_score = self._calculate_energy_consistency(feat1, feat2)
# Calculate weighted overall score
overall_score = (
self.WEIGHTS['timbre'] * timbre_score +
self.WEIGHTS['transient'] * transient_score +
self.WEIGHTS['spectral'] * spectral_score +
self.WEIGHTS['energy'] * energy_score
)
# Identify weak components
weak_components = []
suggestions = []
scores = {
'timbre_similarity': timbre_score,
'transient_compatibility': transient_score,
'spectral_balance': spectral_score,
'energy_consistency': energy_score
}
for component, score in scores.items():
threshold = self.THRESHOLDS.get(component.replace('_similarity', 'timbre')
.replace('_compatibility', 'transient')
.replace('_balance', 'spectral')
.replace('_consistency', 'energy'), 0.6)
if score < threshold:
weak_components.append(f"{component}: {score:.3f} (threshold: {threshold:.2f})")
# Add specific suggestions
if 'timbre' in component:
suggestions.append(
"Consider samples from the same source/pack for timbral consistency. "
"Try layering with a shared reverb bus."
)
elif 'transient' in component:
suggestions.append(
"Adjust transient timing with warp markers or apply transient shaping. "
"Samples have different attack characteristics."
)
elif 'spectral' in component:
suggestions.append(
"Use EQ to match frequency profiles. "
"Check if samples occupy different frequency ranges."
)
elif 'energy' in component:
suggestions.append(
"Adjust clip gain to match perceived loudness. "
"Apply compression for consistent dynamics."
)
# Create breakdown
self.last_breakdown = ScoreBreakdown(
overall_score=overall_score,
timbre_similarity=timbre_score,
transient_compatibility=transient_score,
spectral_balance=spectral_score,
energy_consistency=energy_score,
is_professional=overall_score >= self.MIN_COHERENCE,
weak_components=weak_components,
suggestions=list(set(suggestions)) # Remove duplicates
)
# Enforce professional threshold
if enforce_threshold and overall_score < self.MIN_COHERENCE:
raise CoherenceError(overall_score, weak_components, suggestions)
return overall_score
def score_kit(self, sample_paths: List[str], enforce_threshold: bool = True) -> float:
"""
Calculate overall kit coherence (average of all pairwise scores).
Args:
sample_paths: List of audio file paths
enforce_threshold: If True, raises CoherenceError if score < 0.90
Returns:
Kit coherence score (0.0-1.0)
Raises:
CoherenceError: If score < MIN_COHERENCE and enforce_threshold=True
ValueError: If fewer than 2 samples provided
"""
if len(sample_paths) < 2:
raise ValueError("Need at least 2 samples to calculate kit coherence")
# Calculate all pairwise scores
scores = []
pair_details = []
for i in range(len(sample_paths)):
for j in range(i + 1, len(sample_paths)):
try:
score = self.score_pair(
sample_paths[i],
sample_paths[j],
enforce_threshold=False # Don't raise until we check all
)
scores.append(score)
pair_details.append({
'pair': (Path(sample_paths[i]).name, Path(sample_paths[j]).name),
'score': score
})
except Exception as e:
print(f"Warning: Could not compare {sample_paths[i]} vs {sample_paths[j]}: {e}")
scores.append(0.0)
if not scores:
raise ValueError("No valid pairwise comparisons could be made")
# Average score
kit_score = np.mean(scores)
# Find worst pairs
sorted_pairs = sorted(pair_details, key=lambda x: x['score'])
weak_pairs = [p for p in sorted_pairs if p['score'] < 0.75]
# Build suggestions
suggestions = []
if weak_pairs:
worst = weak_pairs[:3] # Top 3 worst
suggestions.append(
f"{len(weak_pairs)} weak pair(s) detected. "
f"Worst: {worst[0]['pair']} = {worst[0]['score']:.3f}"
)
suggestions.append(
"Consider replacing or processing weak pairs for better cohesion."
)
self.last_breakdown = ScoreBreakdown(
overall_score=kit_score,
timbre_similarity=0.0, # Not meaningful for kit average
transient_compatibility=0.0,
spectral_balance=0.0,
energy_consistency=0.0,
is_professional=kit_score >= self.MIN_COHERENCE,
weak_components=[f"Weak pair: {p['pair']} ({p['score']:.3f})" for p in weak_pairs[:3]],
suggestions=suggestions
)
if enforce_threshold and kit_score < self.MIN_COHERENCE:
raise CoherenceError(kit_score, self.last_breakdown.weak_components, suggestions)
return kit_score
def score_section_transition(self, samples_a: List[str], samples_b: List[str],
enforce_threshold: bool = True) -> float:
"""
Calculate coherence of transition between two sections.
Compares all samples in section A against all samples in section B
to ensure smooth transition.
Args:
samples_a: List of sample paths in first section
samples_b: List of sample paths in second section
enforce_threshold: If True, raises CoherenceError if score < 0.90
Returns:
Transition coherence score (0.0-1.0)
"""
if not samples_a or not samples_b:
raise ValueError("Both sections must contain at least one sample")
# Cross-section comparisons
scores = []
for sample_a in samples_a:
for sample_b in samples_b:
try:
score = self.score_pair(sample_a, sample_b, enforce_threshold=False)
scores.append(score)
except Exception as e:
print(f"Warning: Cross-section comparison failed: {e}")
if not scores:
raise ValueError("No valid cross-section comparisons")
transition_score = np.mean(scores)
# Analyze worst transitions
if scores:
min_score = min(scores)
weak_count = sum(1 for s in scores if s < 0.75)
else:
min_score = 0.0
weak_count = 0
suggestions = []
if min_score < 0.70:
suggestions.append(
f"Poor transition detected (worst pair: {min_score:.3f}). "
"Consider using transition FX or crossfade."
)
if weak_count > len(scores) * 0.3:
suggestions.append(
f"{weak_count}/{len(scores)} transitions are weak. "
"Sections may be harmonically or sonically incompatible."
)
self.last_breakdown = ScoreBreakdown(
overall_score=transition_score,
timbre_similarity=0.0,
transient_compatibility=0.0,
spectral_balance=0.0,
energy_consistency=0.0,
is_professional=transition_score >= self.MIN_COHERENCE,
weak_components=[f"Weak transitions: {weak_count}"] if weak_count > 0 else [],
suggestions=suggestions if suggestions else ["Transition coherence is acceptable"]
)
if enforce_threshold and transition_score < self.MIN_COHERENCE:
raise CoherenceError(transition_score, self.last_breakdown.weak_components, suggestions)
return transition_score
def get_score_breakdown(self) -> Dict:
"""
Get detailed breakdown of the last coherence calculation.
Returns:
Dictionary with component scores and analysis
"""
if self.last_breakdown is None:
return {
'error': 'No coherence calculation performed yet. '
'Call score_pair(), score_kit(), or score_section_transition() first.'
}
return self.last_breakdown.to_dict()
@staticmethod
def is_professional_grade(score: float) -> bool:
"""
Check if a coherence score meets professional standards.
Args:
score: Coherence score to evaluate
Returns:
True if score >= MIN_COHERENCE (0.90)
"""
return score >= CoherenceScorer.MIN_COHERENCE
def batch_score(self, sample_paths: List[str], mode: str = 'pairwise') -> Dict:
"""
Batch coherence analysis for multiple samples.
Args:
sample_paths: List of sample paths to analyze
mode: 'pairwise' for all pairs, 'kit' for overall coherence
Returns:
Dictionary with scores and analysis
"""
if mode == 'pairwise':
results = {
'mode': 'pairwise',
'pairs': [],
'min_score': 1.0,
'max_score': 0.0,
'avg_score': 0.0
}
scores = []
for i in range(len(sample_paths)):
for j in range(i + 1, len(sample_paths)):
try:
score = self.score_pair(
sample_paths[i],
sample_paths[j],
enforce_threshold=False
)
scores.append(score)
results['pairs'].append({
'sample_a': Path(sample_paths[i]).name,
'sample_b': Path(sample_paths[j]).name,
'score': round(score, 4),
'professional': score >= self.MIN_COHERENCE
})
except Exception as e:
results['pairs'].append({
'sample_a': Path(sample_paths[i]).name,
'sample_b': Path(sample_paths[j]).name,
'error': str(e)
})
if scores:
results['min_score'] = round(min(scores), 4)
results['max_score'] = round(max(scores), 4)
results['avg_score'] = round(np.mean(scores), 4)
return results
elif mode == 'kit':
score = self.score_kit(sample_paths, enforce_threshold=False)
return {
'mode': 'kit',
'kit_score': round(score, 4),
'professional': score >= self.MIN_COHERENCE,
'sample_count': len(sample_paths),
'breakdown': self.get_score_breakdown()
}
else:
raise ValueError(f"Unknown mode: {mode}. Use 'pairwise' or 'kit'")
# Convenience functions for quick access
def check_coherence(sample1: str, sample2: str) -> Dict:
"""
Quick coherence check between two samples.
Args:
sample1: Path to first audio file
sample2: Path to second audio file
Returns:
Dictionary with score and breakdown
"""
scorer = CoherenceScorer()
try:
score = scorer.score_pair(sample1, sample2, enforce_threshold=False)
return {
'coherent': score >= CoherenceScorer.MIN_COHERENCE,
'score': round(score, 4),
'details': scorer.get_score_breakdown()
}
except Exception as e:
return {
'coherent': False,
'error': str(e)
}
def check_kit_coherence(sample_paths: List[str]) -> Dict:
"""
Quick kit coherence check.
Args:
sample_paths: List of sample paths
Returns:
Dictionary with kit score and analysis
"""
scorer = CoherenceScorer()
try:
score = scorer.score_kit(sample_paths, enforce_threshold=False)
return {
'coherent': score >= CoherenceScorer.MIN_COHERENCE,
'score': round(score, 4),
'details': scorer.get_score_breakdown()
}
except Exception as e:
return {
'coherent': False,
'error': str(e)
}