feat: Initial pipeline for Twitch highlight detection
- New 2-of-3 detection system (chat + audio + color) - GPU support (PyTorch ROCm/CUDA ready) - Draft mode (360p) for fast testing - HD mode (1080p) for final render - Auto download video + chat - CLI pipeline script - Documentation in Spanish
This commit is contained in:
95
detector.py
Normal file
95
detector.py
Normal file
@@ -0,0 +1,95 @@
|
||||
import sys
|
||||
import re
|
||||
import json
|
||||
import logging
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def detect_highlights(chat_file, min_duration=10, threshold=2.0):
|
||||
"""Detecta highlights por chat saturado"""
|
||||
|
||||
logger.info("Analizando picos de chat...")
|
||||
|
||||
# Leer mensajes
|
||||
messages = []
|
||||
with open(chat_file, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
match = re.match(r'\[(\d{4}-\d{2}-\d{2}T[\d:.]+Z?)\]', line)
|
||||
if match:
|
||||
timestamp_str = match.group(1).replace('Z', '+00:00')
|
||||
try:
|
||||
timestamp = datetime.fromisoformat(timestamp_str)
|
||||
messages.append((timestamp, line))
|
||||
except:
|
||||
pass
|
||||
|
||||
if not messages:
|
||||
logger.error("No se encontraron mensajes")
|
||||
return []
|
||||
|
||||
start_time = messages[0][0]
|
||||
end_time = messages[-1][0]
|
||||
duration = (end_time - start_time).total_seconds()
|
||||
|
||||
logger.info(f"Chat: {len(messages)} mensajes, duración: {duration:.1f}s")
|
||||
|
||||
# Agrupar por segundo
|
||||
time_buckets = {}
|
||||
for timestamp, _ in messages:
|
||||
second = int((timestamp - start_time).total_seconds())
|
||||
time_buckets[second] = time_buckets.get(second, 0) + 1
|
||||
|
||||
# Calcular estadísticas
|
||||
counts = list(time_buckets.values())
|
||||
mean_count = np.mean(counts)
|
||||
std_count = np.std(counts)
|
||||
|
||||
logger.info(f"Stats: media={mean_count:.1f}, std={std_count:.1f}")
|
||||
|
||||
# Detectar picos
|
||||
peak_seconds = []
|
||||
for second, count in time_buckets.items():
|
||||
if std_count > 0:
|
||||
z_score = (count - mean_count) / std_count
|
||||
if z_score > threshold:
|
||||
peak_seconds.append(second)
|
||||
|
||||
logger.info(f"Picos encontrados: {len(peak_seconds)}")
|
||||
|
||||
# Unir segundos consecutivos
|
||||
if not peak_seconds:
|
||||
return []
|
||||
|
||||
intervals = []
|
||||
start = peak_seconds[0]
|
||||
prev = peak_seconds[0]
|
||||
|
||||
for second in peak_seconds[1:]:
|
||||
if second - prev > 1:
|
||||
if second - start >= min_duration:
|
||||
intervals.append((start, prev))
|
||||
start = second
|
||||
prev = second
|
||||
|
||||
if prev - start >= min_duration:
|
||||
intervals.append((start, prev))
|
||||
|
||||
return intervals
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
chat_file = "chat.txt"
|
||||
|
||||
highlights = detect_highlights(chat_file)
|
||||
|
||||
print(f"\nHighlights encontrados: {len(highlights)}")
|
||||
for i, (start, end) in enumerate(highlights):
|
||||
print(f" {i+1}. {start}s - {end}s (duración: {end-start}s)")
|
||||
|
||||
# Guardar JSON
|
||||
with open("highlights.json", "w") as f:
|
||||
json.dump(highlights, f)
|
||||
print(f"\nGuardado en highlights.json")
|
||||
Reference in New Issue
Block a user