- Add _cmd_create_arrangement_audio_pattern with 5-method fallback chain - Method 1: track.insert_arrangement_clip() [Live 12+] - Method 2: track.create_audio_clip() [Live 11+] - Method 3: arrangement_clips.add_new_clip() [Live 12+] - Method 4: Session->duplicate_clip_to_arrangement [Legacy] - Method 5: Session->Recording [Universal] - Add _cmd_duplicate_clip_to_arrangement for session-to-arrangement workflow - Update skills documentation - Verified: 3 clips created at positions [0, 4, 8] in Arrangement View Closes: Audio injection in Arrangement View
614 lines
23 KiB
Python
614 lines
23 KiB
Python
"""
|
|
AudioAnalyzerDual - Dual-backend audio analyzer for AbletonMCP_AI
|
|
|
|
Primary: librosa for full spectral analysis
|
|
Fallback: filename-based inference when librosa unavailable
|
|
|
|
This module provides intelligent audio sample analysis with graceful
|
|
degradation when heavy dependencies aren't available.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import wave
|
|
import struct
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional, List, Dict, Tuple, Any
|
|
from pathlib import Path
|
|
|
|
|
|
@dataclass
|
|
class AudioFeatures:
|
|
"""Complete audio feature set for sample analysis."""
|
|
bpm: Optional[float]
|
|
key: Optional[str]
|
|
key_confidence: float
|
|
duration: float
|
|
sample_rate: int
|
|
sample_type: str
|
|
spectral_centroid: float
|
|
spectral_rolloff: float
|
|
zero_crossing_rate: float
|
|
rms_energy: float
|
|
is_harmonic: bool
|
|
is_percussive: bool
|
|
suggested_genres: List[str] = field(default_factory=list)
|
|
groove_template: Optional[Dict] = None
|
|
transients: Optional[List[float]] = None
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert features to dictionary for serialization."""
|
|
return {
|
|
'bpm': self.bpm,
|
|
'key': self.key,
|
|
'key_confidence': self.key_confidence,
|
|
'duration': self.duration,
|
|
'sample_rate': self.sample_rate,
|
|
'sample_type': self.sample_type,
|
|
'spectral_centroid': self.spectral_centroid,
|
|
'spectral_rolloff': self.spectral_rolloff,
|
|
'zero_crossing_rate': self.zero_crossing_rate,
|
|
'rms_energy': self.rms_energy,
|
|
'is_harmonic': self.is_harmonic,
|
|
'is_percussive': self.is_percussive,
|
|
'suggested_genres': self.suggested_genres,
|
|
'groove_template': self.groove_template,
|
|
'transients': self.transients
|
|
}
|
|
|
|
|
|
class AudioAnalyzerDual:
|
|
"""
|
|
Dual-backend audio analyzer:
|
|
- Primary: librosa for full spectral analysis
|
|
- Fallback: filename-based inference when librosa unavailable
|
|
"""
|
|
|
|
# Key profiles for Krumhansl-Schmuckler algorithm (major and minor)
|
|
KRUMHANSL_MAJOR = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
|
|
KRUMHANSL_MINOR = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
|
|
|
|
# Circle of fifths positions for key detection
|
|
KEY_NAMES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
|
KEY_NAMES_FLAT = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']
|
|
|
|
# Genre suggestions based on BPM ranges
|
|
GENRE_BPM_RANGES = {
|
|
'reggaeton': (85, 100),
|
|
'trap': (130, 150),
|
|
'hip_hop': (85, 110),
|
|
'house': (120, 130),
|
|
'techno': (125, 140),
|
|
'dubstep': (140, 150),
|
|
'drum_and_bass': (160, 180),
|
|
'pop': (100, 130),
|
|
'rock': (120, 140),
|
|
'jazz': (120, 180),
|
|
'ambient': (60, 85),
|
|
'lofi': (70, 90)
|
|
}
|
|
|
|
# Sample type keywords for filename-based classification
|
|
TYPE_KEYWORDS = {
|
|
'kick': ['kick', 'bd', 'bass_drum', 'kck'],
|
|
'snare': ['snare', 'sd', 'rim', 'snr'],
|
|
'clap': ['clap', 'cp'],
|
|
'hihat': ['hihat', 'hat', 'hh', 'hi_hat', 'openhat', 'closedhat'],
|
|
'perc': ['perc', 'percussion', 'bongo', 'conga', 'timbal'],
|
|
'tom': ['tom', 'toms'],
|
|
'cymbal': ['cymbal', 'crash', 'ride', 'splash'],
|
|
'bass': ['bass', 'sub', '808', 'bassline'],
|
|
'synth': ['synth', 'pad', 'lead', 'pluck', 'arp'],
|
|
'fx': ['fx', 'effect', 'riser', 'downer', 'sweep', 'impact'],
|
|
'vocal': ['vocal', 'voice', 'vox', 'chant'],
|
|
'loop': ['loop', 'full', 'groove']
|
|
}
|
|
|
|
def __init__(self, backend="auto"):
|
|
"""Initialize the analyzer with specified backend."""
|
|
self.backend = self._detect_backend(backend)
|
|
self.librosa = None
|
|
self.numpy = None
|
|
self._init_libraries()
|
|
|
|
def _detect_backend(self, preferred):
|
|
"""Detect and return the appropriate backend."""
|
|
if preferred == "librosa":
|
|
try:
|
|
import librosa
|
|
import numpy as np
|
|
return "librosa"
|
|
except ImportError:
|
|
return "basic"
|
|
elif preferred == "basic":
|
|
return "basic"
|
|
else: # auto
|
|
try:
|
|
import librosa
|
|
import numpy as np
|
|
return "librosa"
|
|
except ImportError:
|
|
return "basic"
|
|
|
|
def _init_libraries(self):
|
|
"""Initialize library references if available."""
|
|
if self.backend == "librosa":
|
|
try:
|
|
import librosa
|
|
import numpy as np
|
|
self.librosa = librosa
|
|
self.numpy = np
|
|
except ImportError:
|
|
self.backend = "basic"
|
|
self.librosa = None
|
|
self.numpy = None
|
|
|
|
def analyze_sample(self, file_path):
|
|
"""
|
|
Main entry point for audio analysis.
|
|
|
|
Args:
|
|
file_path: Path to audio file
|
|
|
|
Returns:
|
|
AudioFeatures dataclass with analysis results
|
|
"""
|
|
if not os.path.exists(file_path):
|
|
raise FileNotFoundError(f"Audio file not found: {file_path}")
|
|
|
|
if self.backend == "librosa":
|
|
try:
|
|
return self._analyze_with_librosa(file_path)
|
|
except Exception as e:
|
|
# Fall back to basic analysis if librosa fails
|
|
return self._analyze_basic(file_path, error_context=str(e))
|
|
else:
|
|
return self._analyze_basic(file_path)
|
|
|
|
def _analyze_with_librosa(self, file_path):
|
|
"""
|
|
Full analysis using librosa:
|
|
1. Load audio: librosa.load()
|
|
2. Detect BPM: librosa.beat.beat_track()
|
|
3. Extract spectral: centroid, rolloff, zcr, rms
|
|
4. Detect key: chromagram + Krumhansl-Schmuckler
|
|
5. HPSS: harmonic/percussive separation
|
|
6. Classify type based on features
|
|
7. Extract groove template (for drums)
|
|
8. Suggest genres based on BPM
|
|
"""
|
|
y, sr = self.librosa.load(file_path, sr=None)
|
|
|
|
# Basic info
|
|
duration = self.librosa.get_duration(y=y, sr=sr)
|
|
|
|
# BPM detection
|
|
bpm = self._detect_bpm_librosa(y, sr)
|
|
|
|
# Spectral features
|
|
spectral_centroid = float(self.numpy.mean(self.librosa.feature.spectral_centroid(y=y, sr=sr)))
|
|
spectral_rolloff = float(self.numpy.mean(self.librosa.feature.spectral_rolloff(y=y, sr=sr)))
|
|
zero_crossing_rate = float(self.numpy.mean(self.librosa.feature.zero_crossing_rate(y)))
|
|
rms_energy = float(self.numpy.mean(self.librosa.feature.rms(y=y)))
|
|
|
|
# Key detection
|
|
key, key_confidence = self._detect_key_librosa(y, sr)
|
|
|
|
# HPSS separation
|
|
y_harmonic, y_percussive = self.librosa.effects.hpss(y)
|
|
harmonic_energy = self.numpy.sum(y_harmonic ** 2)
|
|
percussive_energy = self.numpy.sum(y_percussive ** 2)
|
|
total_energy = harmonic_energy + percussive_energy
|
|
|
|
is_harmonic = (harmonic_energy / total_energy) > 0.6 if total_energy > 0 else False
|
|
is_percussive = (percussive_energy / total_energy) > 0.6 if total_energy > 0 else False
|
|
|
|
# Classify sample type
|
|
sample_type = self._classify_sample_type(file_path, is_harmonic, is_percussive, spectral_centroid)
|
|
|
|
# Extract groove template for drum loops
|
|
groove_template = None
|
|
transients = None
|
|
if is_percussive or sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'loop']:
|
|
groove_template = self._extract_groove_template(y, sr)
|
|
transients = groove_template.get('transient_positions', []) if groove_template else []
|
|
|
|
# Genre suggestions
|
|
suggested_genres = self._suggest_genres(bpm)
|
|
|
|
return AudioFeatures(
|
|
bpm=bpm,
|
|
key=key,
|
|
key_confidence=key_confidence,
|
|
duration=duration,
|
|
sample_rate=sr,
|
|
sample_type=sample_type,
|
|
spectral_centroid=spectral_centroid,
|
|
spectral_rolloff=spectral_rolloff,
|
|
zero_crossing_rate=zero_crossing_rate,
|
|
rms_energy=rms_energy,
|
|
is_harmonic=is_harmonic,
|
|
is_percussive=is_percussive,
|
|
suggested_genres=suggested_genres,
|
|
groove_template=groove_template,
|
|
transients=transients
|
|
)
|
|
|
|
def _analyze_basic(self, file_path, error_context=None):
|
|
"""
|
|
Filename-based analysis:
|
|
- Extract BPM from filename patterns
|
|
- Extract key from filename patterns
|
|
- Estimate duration (if wave module available)
|
|
- Classify type by keyword matching
|
|
- Set default spectral features based on type
|
|
"""
|
|
filename = os.path.basename(file_path)
|
|
|
|
# Extract info from filename
|
|
bpm = self._extract_bpm_from_name(filename)
|
|
key = self._extract_key_from_name(filename)
|
|
sample_type = self._classify_by_filename(filename)
|
|
|
|
# Try to get duration from wave header
|
|
duration, sample_rate = self._get_wave_info(file_path)
|
|
|
|
# Set default spectral features based on type
|
|
defaults = self._get_default_features_by_type(sample_type)
|
|
|
|
# Suggest genres based on BPM
|
|
suggested_genres = self._suggest_genres(bpm)
|
|
|
|
# Determine harmonic/percussive nature by type
|
|
is_harmonic = sample_type in ['synth', 'bass', 'vocal', 'pad', 'lead', 'pluck']
|
|
is_percussive = sample_type in ['kick', 'snare', 'clap', 'hihat', 'perc', 'tom', 'cymbal']
|
|
|
|
return AudioFeatures(
|
|
bpm=bpm,
|
|
key=key,
|
|
key_confidence=0.5 if key else 0.0, # Moderate confidence for filename-based
|
|
duration=duration,
|
|
sample_rate=sample_rate,
|
|
sample_type=sample_type,
|
|
spectral_centroid=defaults['spectral_centroid'],
|
|
spectral_rolloff=defaults['spectral_rolloff'],
|
|
zero_crossing_rate=defaults['zero_crossing_rate'],
|
|
rms_energy=defaults['rms_energy'],
|
|
is_harmonic=is_harmonic,
|
|
is_percussive=is_percussive,
|
|
suggested_genres=suggested_genres,
|
|
groove_template=None,
|
|
transients=None
|
|
)
|
|
|
|
def _detect_key_librosa(self, y, sr):
|
|
"""
|
|
Uses chromagram and Krumhansl-Schmuckler key profiles.
|
|
|
|
Returns:
|
|
(key, confidence)
|
|
"""
|
|
# Compute chromagram
|
|
chromagram = self.librosa.feature.chroma_stft(y=y, sr=sr)
|
|
chroma_mean = self.numpy.mean(chromagram, axis=1)
|
|
|
|
# Calculate correlation with major and minor profiles for all keys
|
|
best_score = -1
|
|
best_key = None
|
|
best_mode = None
|
|
|
|
for shift in range(12):
|
|
# Rotate chroma to test this key
|
|
rotated_chroma = self.numpy.roll(chroma_mean, shift)
|
|
|
|
# Normalize
|
|
rotated_chroma = rotated_chroma / (self.numpy.sum(rotated_chroma) + 1e-10)
|
|
|
|
# Correlation with major
|
|
major_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MAJOR)[0, 1]
|
|
if major_corr > best_score:
|
|
best_score = major_corr
|
|
best_key = shift
|
|
best_mode = 'major'
|
|
|
|
# Correlation with minor
|
|
minor_corr = self.numpy.corrcoef(rotated_chroma, self.KRUMHANSL_MINOR)[0, 1]
|
|
if minor_corr > best_score:
|
|
best_score = minor_corr
|
|
best_key = shift
|
|
best_mode = 'minor'
|
|
|
|
# Convert to key name
|
|
key_name = self.KEY_NAMES[best_key]
|
|
if best_mode == 'minor':
|
|
key_name += 'm'
|
|
|
|
# Confidence is the correlation score (normalized to 0-1)
|
|
confidence = (best_score + 1) / 2 # Convert from [-1, 1] to [0, 1]
|
|
confidence = max(0.0, min(1.0, confidence))
|
|
|
|
return key_name, confidence
|
|
|
|
def _extract_key_from_name(self, filename):
|
|
r"""
|
|
Extract key from filename using regex patterns.
|
|
|
|
Patterns:
|
|
- [_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]
|
|
- \bin\s+([A-G][#b]?(?:m|min|minor)?)\b
|
|
- Key[_\s]?([A-G][#b]?m?)
|
|
"""
|
|
# Pattern 1: Key surrounded by separators
|
|
pattern1 = r'[_\s\-]([A-G][#b]?(?:m|min|minor)?)[_\s\-]'
|
|
match = re.search(pattern1, filename, re.IGNORECASE)
|
|
if match:
|
|
return self._normalize_key(match.group(1))
|
|
|
|
# Pattern 2: "in Key" format
|
|
pattern2 = r'\bin\s+([A-G][#b]?(?:m|min|minor)?)\b'
|
|
match = re.search(pattern2, filename, re.IGNORECASE)
|
|
if match:
|
|
return self._normalize_key(match.group(1))
|
|
|
|
# Pattern 3: Key prefix
|
|
pattern3 = r'Key[_\s]?([A-G][#b]?m?)'
|
|
match = re.search(pattern3, filename, re.IGNORECASE)
|
|
if match:
|
|
return self._normalize_key(match.group(1))
|
|
|
|
return None
|
|
|
|
def _normalize_key(self, key_str):
|
|
"""Normalize key string to standard format."""
|
|
key_str = key_str.strip().upper()
|
|
|
|
# Handle variations
|
|
if 'MINOR' in key_str or key_str.endswith('MIN'):
|
|
root = key_str.replace('MINOR', '').replace('MIN', '').strip()
|
|
return root + 'm'
|
|
|
|
# Handle flat/sharp notation
|
|
if 'B' in key_str and '#' not in key_str and len(key_str) > 1:
|
|
# Convert flats to sharps where applicable
|
|
flat_to_sharp = {'DB': 'C#', 'EB': 'D#', 'GB': 'F#', 'AB': 'G#', 'BB': 'A#'}
|
|
root = key_str.rstrip('M').rstrip('m')
|
|
if root in flat_to_sharp:
|
|
key_str = flat_to_sharp[root] + ('m' if 'm' in key_str.lower() else '')
|
|
|
|
return key_str
|
|
|
|
def _detect_bpm_librosa(self, y, sr):
|
|
"""Detect BPM using librosa.beat.beat_track()."""
|
|
try:
|
|
tempo, _ = self.librosa.beat.beat_track(y=y, sr=sr)
|
|
if isinstance(tempo, self.numpy.ndarray):
|
|
tempo = float(tempo.item())
|
|
return float(tempo) if tempo > 0 else None
|
|
except Exception:
|
|
return None
|
|
|
|
def _extract_bpm_from_name(self, filename):
|
|
r"""
|
|
Extract BPM from filename using regex patterns.
|
|
|
|
Patterns:
|
|
- [_\s\-](\d{2,3})\s*BPM
|
|
- [_\s\-](\d{2,3})[_\s\-]
|
|
- (\d{2,3})bpm
|
|
|
|
Range validation: 60-200 BPM
|
|
"""
|
|
# Pattern 1: Explicit BPM suffix
|
|
pattern1 = r'[_\s\-](\d{2,3})\s*BPM'
|
|
match = re.search(pattern1, filename, re.IGNORECASE)
|
|
if match:
|
|
bpm = int(match.group(1))
|
|
if 60 <= bpm <= 200:
|
|
return float(bpm)
|
|
|
|
# Pattern 2: Number surrounded by separators
|
|
pattern2 = r'[_\s\-](\d{2,3})[_\s\-]'
|
|
matches = re.findall(pattern2, filename)
|
|
for m in matches:
|
|
bpm = int(m)
|
|
if 60 <= bpm <= 200:
|
|
return float(bpm)
|
|
|
|
# Pattern 3: BPM suffix without separator
|
|
pattern3 = r'(\d{2,3})bpm'
|
|
match = re.search(pattern3, filename, re.IGNORECASE)
|
|
if match:
|
|
bpm = int(match.group(1))
|
|
if 60 <= bpm <= 200:
|
|
return float(bpm)
|
|
|
|
return None
|
|
|
|
def _extract_groove_template(self, y, sr):
|
|
"""
|
|
Extract groove template for drum loops.
|
|
|
|
For drum loops:
|
|
1. Detect transients: librosa.onset.onset_detect()
|
|
2. Filter by RMS threshold
|
|
3. Categorize by velocity: kick-like, snare-like, hat-like
|
|
4. Map to beat grid
|
|
5. Return template dict
|
|
"""
|
|
# Detect onsets
|
|
onset_frames = self.librosa.onset.onset_detect(y=y, sr=sr)
|
|
onset_times = self.librosa.frames_to_time(onset_frames, sr=sr)
|
|
|
|
# Calculate RMS around each onset for velocity
|
|
hop_length = 512
|
|
rms = self.librosa.feature.rms(y=y, hop_length=hop_length)[0]
|
|
|
|
# Filter by RMS threshold
|
|
rms_threshold = self.numpy.mean(rms) * 0.5
|
|
|
|
transients = []
|
|
for onset_time in onset_times:
|
|
frame_idx = self.librosa.time_to_frames(onset_time, sr=sr, hop_length=hop_length)
|
|
if frame_idx < len(rms) and rms[frame_idx] > rms_threshold:
|
|
transients.append({
|
|
'time': float(onset_time),
|
|
'velocity': float(rms[frame_idx]),
|
|
'category': self._categorize_transient(rms[frame_idx], self.numpy.mean(rms))
|
|
})
|
|
|
|
# Map to beat grid (assume 4/4, map to 16th notes)
|
|
if transients:
|
|
max_time = max(t['time'] for t in transients)
|
|
num_beats = max(4, int(max_time / (60.0 / 95.0))) # Assume 95 BPM if unknown
|
|
|
|
grid_positions = []
|
|
for t in transients:
|
|
beat_pos = (t['time'] / max_time) * num_beats
|
|
sixteenth = int((beat_pos % 1) * 16)
|
|
grid_positions.append({
|
|
'beat': int(beat_pos),
|
|
'sixteenth': sixteenth,
|
|
'velocity': t['velocity'],
|
|
'category': t['category']
|
|
})
|
|
|
|
return {
|
|
'transient_positions': [t['time'] for t in transients],
|
|
'grid_positions': grid_positions,
|
|
'num_beats': num_beats,
|
|
'kick_positions': [p for p in grid_positions if p['category'] == 'kick'],
|
|
'snare_positions': [p for p in grid_positions if p['category'] == 'snare'],
|
|
'hat_positions': [p for p in grid_positions if p['category'] == 'hat']
|
|
}
|
|
|
|
return None
|
|
|
|
def _categorize_transient(self, velocity, mean_rms):
|
|
"""Categorize transient by velocity level."""
|
|
ratio = velocity / (mean_rms + 1e-10)
|
|
if ratio > 1.5:
|
|
return 'kick'
|
|
elif ratio > 0.8:
|
|
return 'snare'
|
|
else:
|
|
return 'hat'
|
|
|
|
def _classify_sample_type(self, file_path, is_harmonic, is_percussive, spectral_centroid):
|
|
"""Classify sample type based on analysis and filename."""
|
|
filename = os.path.basename(file_path).lower()
|
|
|
|
# First try filename matching
|
|
type_by_name = self._classify_by_filename(filename)
|
|
if type_by_name != 'unknown':
|
|
return type_by_name
|
|
|
|
# Fall back to spectral classification
|
|
if is_percussive:
|
|
if spectral_centroid < 500:
|
|
return 'kick'
|
|
elif spectral_centroid < 2000:
|
|
return 'snare'
|
|
elif spectral_centroid < 8000:
|
|
return 'hihat'
|
|
else:
|
|
return 'cymbal'
|
|
elif is_harmonic:
|
|
if spectral_centroid < 500:
|
|
return 'bass'
|
|
elif spectral_centroid < 2000:
|
|
return 'synth'
|
|
else:
|
|
return 'synth'
|
|
|
|
return 'unknown'
|
|
|
|
def _classify_by_filename(self, filename):
|
|
"""Classify sample type by keywords in filename."""
|
|
filename_lower = filename.lower()
|
|
|
|
for sample_type, keywords in self.TYPE_KEYWORDS.items():
|
|
for keyword in keywords:
|
|
if keyword in filename_lower:
|
|
return sample_type
|
|
|
|
return 'unknown'
|
|
|
|
def _get_default_features_by_type(self, sample_type):
|
|
"""Return default spectral features based on sample type."""
|
|
defaults = {
|
|
'kick': {'spectral_centroid': 300, 'spectral_rolloff': 800, 'zero_crossing_rate': 0.05, 'rms_energy': 0.3},
|
|
'snare': {'spectral_centroid': 1500, 'spectral_rolloff': 4000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.25},
|
|
'clap': {'spectral_centroid': 2000, 'spectral_rolloff': 5000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2},
|
|
'hihat': {'spectral_centroid': 8000, 'spectral_rolloff': 15000, 'zero_crossing_rate': 0.3, 'rms_energy': 0.1},
|
|
'perc': {'spectral_centroid': 2500, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.2, 'rms_energy': 0.2},
|
|
'tom': {'spectral_centroid': 800, 'spectral_rolloff': 2000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.25},
|
|
'cymbal': {'spectral_centroid': 10000, 'spectral_rolloff': 18000, 'zero_crossing_rate': 0.35, 'rms_energy': 0.15},
|
|
'bass': {'spectral_centroid': 400, 'spectral_rolloff': 1200, 'zero_crossing_rate': 0.03, 'rms_energy': 0.2},
|
|
'synth': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.1, 'rms_energy': 0.15},
|
|
'fx': {'spectral_centroid': 5000, 'spectral_rolloff': 12000, 'zero_crossing_rate': 0.25, 'rms_energy': 0.2},
|
|
'vocal': {'spectral_centroid': 2000, 'spectral_rolloff': 6000, 'zero_crossing_rate': 0.08, 'rms_energy': 0.18},
|
|
'loop': {'spectral_centroid': 2500, 'spectral_rolloff': 7000, 'zero_crossing_rate': 0.12, 'rms_energy': 0.2},
|
|
'unknown': {'spectral_centroid': 3000, 'spectral_rolloff': 8000, 'zero_crossing_rate': 0.15, 'rms_energy': 0.2}
|
|
}
|
|
|
|
return defaults.get(sample_type, defaults['unknown'])
|
|
|
|
def _suggest_genres(self, bpm):
|
|
"""Suggest genres based on BPM."""
|
|
if bpm is None:
|
|
return []
|
|
|
|
suggestions = []
|
|
for genre, (min_bpm, max_bpm) in self.GENRE_BPM_RANGES.items():
|
|
if min_bpm <= bpm <= max_bpm:
|
|
suggestions.append(genre)
|
|
|
|
return suggestions
|
|
|
|
def _get_wave_info(self, file_path):
|
|
"""Try to get duration and sample rate from wave file header."""
|
|
duration = 0.0
|
|
sample_rate = 44100
|
|
|
|
try:
|
|
if file_path.lower().endswith('.wav'):
|
|
with wave.open(file_path, 'rb') as wf:
|
|
sample_rate = wf.getframerate()
|
|
n_frames = wf.getnframes()
|
|
duration = n_frames / sample_rate
|
|
except Exception:
|
|
# If wave fails, try to estimate from file size (rough)
|
|
try:
|
|
file_size = os.path.getsize(file_path)
|
|
# Rough estimate: assume 16-bit stereo at 44.1kHz = ~176KB per second
|
|
duration = file_size / (44100 * 2 * 2)
|
|
except Exception:
|
|
duration = 0.0
|
|
|
|
return duration, sample_rate
|
|
|
|
def get_backend_info(self):
|
|
"""Return information about current backend."""
|
|
return {
|
|
'backend': self.backend,
|
|
'librosa_available': self.librosa is not None,
|
|
'numpy_available': self.numpy is not None,
|
|
'version': '1.0.0'
|
|
}
|
|
|
|
|
|
# Convenience function for direct usage
|
|
def analyze_audio(file_path, backend="auto"):
|
|
"""
|
|
Analyze an audio file and return features.
|
|
|
|
Args:
|
|
file_path: Path to audio file
|
|
backend: "auto", "librosa", or "basic"
|
|
|
|
Returns:
|
|
AudioFeatures dataclass
|
|
"""
|
|
analyzer = AudioAnalyzerDual(backend=backend)
|
|
return analyzer.analyze_sample(file_path)
|