feat: Initial pipeline for Twitch highlight detection

- New 2-of-3 detection system (chat + audio + color) - GPU support (PyTorch ROCm/CUDA ready) - Draft mode (360p) for fast testing - HD mode (1080p) for final render - Auto download video + chat - CLI pipeline script - Documentation in Spanish
2026-02-18 20:41:58 -03:00
parent f9836a4265
commit fb8b390740
21 changed files with 1412 additions and 673 deletions
--- a/detector.py
+++ b/detector.py
@@ -0,0 +1,95 @@
+import sys
+import re
+import json
+import logging
+import numpy as np
+from datetime import datetime
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+def detect_highlights(chat_file, min_duration=10, threshold=2.0):
+    """Detecta highlights por chat saturado"""
+    
+    logger.info("Analizando picos de chat...")
+    
+    # Leer mensajes
+    messages = []
+    with open(chat_file, 'r', encoding='utf-8') as f:
+        for line in f:
+            match = re.match(r'\[(\d{4}-\d{2}-\d{2}T[\d:.]+Z?)\]', line)
+            if match:
+                timestamp_str = match.group(1).replace('Z', '+00:00')
+                try:
+                    timestamp = datetime.fromisoformat(timestamp_str)
+                    messages.append((timestamp, line))
+                except:
+                    pass
+    
+    if not messages:
+        logger.error("No se encontraron mensajes")
+        return []
+    
+    start_time = messages[0][0]
+    end_time = messages[-1][0]
+    duration = (end_time - start_time).total_seconds()
+    
+    logger.info(f"Chat: {len(messages)} mensajes, duración: {duration:.1f}s")
+    
+    # Agrupar por segundo
+    time_buckets = {}
+    for timestamp, _ in messages:
+        second = int((timestamp - start_time).total_seconds())
+        time_buckets[second] = time_buckets.get(second, 0) + 1
+    
+    # Calcular estadísticas
+    counts = list(time_buckets.values())
+    mean_count = np.mean(counts)
+    std_count = np.std(counts)
+    
+    logger.info(f"Stats: media={mean_count:.1f}, std={std_count:.1f}")
+    
+    # Detectar picos
+    peak_seconds = []
+    for second, count in time_buckets.items():
+        if std_count > 0:
+            z_score = (count - mean_count) / std_count
+            if z_score > threshold:
+                peak_seconds.append(second)
+    
+    logger.info(f"Picos encontrados: {len(peak_seconds)}")
+    
+    # Unir segundos consecutivos
+    if not peak_seconds:
+        return []
+    
+    intervals = []
+    start = peak_seconds[0]
+    prev = peak_seconds[0]
+    
+    for second in peak_seconds[1:]:
+        if second - prev > 1:
+            if second - start >= min_duration:
+                intervals.append((start, prev))
+            start = second
+        prev = second
+    
+    if prev - start >= min_duration:
+        intervals.append((start, prev))
+    
+    return intervals
+
+
+if __name__ == "__main__":
+    chat_file = "chat.txt"
+    
+    highlights = detect_highlights(chat_file)
+    
+    print(f"\nHighlights encontrados: {len(highlights)}")
+    for i, (start, end) in enumerate(highlights):
+        print(f"  {i+1}. {start}s - {end}s (duración: {end-start}s)")
+    
+    # Guardar JSON
+    with open("highlights.json", "w") as f:
+        json.dump(highlights, f)
+    print(f"\nGuardado en highlights.json")