feat: fix sample variety per section and reorganize sample library

- Fix compose.py to select different samples per section instead of one per role - Add select_many() to SampleSelector for diverse sample selection - Migrate 862 samples from scattered dirs to libreria/samples/{role}/ - Rename files with consistent convention: {role}_{key}_{bpm}_{character}_{hash}.wav - Add migrate_library.py script with dry-run and verification - Backup original index as sample_index_pre_migration.json - 72 tests passing
2026-05-03 14:43:11 -03:00
parent d5c2490a05
commit 32dafd94e0
5 changed files with 128944 additions and 6 deletions
--- a/src/selector/init.py
+++ b/src/selector/init.py
@@ -328,3 +328,51 @@ class SampleSelector:
        if not results:
            return None
        return random.choice(results).sample
+
+    def select_diverse(
+        self,
+        role: str,
+        n: int = 1,
+        exclude: Optional[list[str]] = None,
+        **kwargs,
+    ) -> list[dict]:
+        """Return n different samples for role, excluding known IDs.
+
+        Uses randomized scoring to ensure diversity across calls.
+        Returns fewer than n if not enough candidates available after exclusion.
+
+        Args:
+            role: Required. Production role (kick, bass, lead, etc.)
+            n: Number of different samples to return
+            exclude: List of sample IDs (file_hash) to exclude from results
+            **kwargs: Passed to select() (key, bpm, character, etc.)
+
+        Returns:
+            List of sample dicts (length <= n, never includes excluded IDs)
+        """
+        import random
+
+        exclude = exclude or []
+        results: list[dict] = []
+
+        # Keep trying until we have n samples or run out of candidates
+        remaining = self.select(role=role, limit=100, **kwargs)  # Get enough candidates
+
+        for match in remaining:
+            sample = match.sample
+            sample_id = sample.get("file_hash", "")
+
+            if sample_id in exclude:
+                continue
+
+            # Add small random noise to score for diversity
+            # This way repeated calls with same params can return different results
+            scored_sample = (match.score + random.uniform(-0.05, 0.05), sample)
+            results.append(scored_sample)
+
+            if len(results) >= n:
+                break
+
+        # Sort by randomized score (descending) and extract samples
+        results.sort(key=lambda x: x[0], reverse=True)
+        return [sample for _, sample in results[:n]]