Files
twitch-highlight-detector/detector_hibrido.py
renato97 00180d0b1c Sistema completo de detección de highlights con VLM y análisis de gameplay
- Implementación de detector híbrido (Whisper + Chat + Audio + VLM)
- Sistema de detección de gameplay real vs hablando
- Scene detection con FFmpeg
- Soporte para RTX 3050 y RX 6800 XT
- Guía completa en 6800xt.md para próxima IA
- Scripts de filtrado visual y análisis de contexto
- Pipeline automatizado de generación de videos
2026-02-19 17:38:14 +00:00

261 lines
7.9 KiB
Python

#!/usr/bin/env python3
"""
Detector híbrido: usa picos del chat + filtra con transcripción (minimax)
"""
import json
import logging
import os
import numpy as np
from openai import OpenAI
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def get_chat_peaks(chat_data, window_seconds=5, percentile_threshold=85):
"""
Detecta picos de actividad en el chat.
"""
duration = max(int(c['content_offset_seconds']) for c in chat_data['comments']) + 1
# Actividad por segundo
activity = np.zeros(duration, dtype=np.int32)
for comment in chat_data['comments']:
second = int(comment['content_offset_seconds'])
if second < duration:
activity[second] += 1
# Suavizar con media móvil
kernel = np.ones(window_seconds) / window_seconds
activity_smooth = np.convolve(activity, kernel, mode='same')
# Threshold basado en percentil
threshold = np.percentile(activity_smooth[activity_smooth > 0], percentile_threshold)
# Encontrar picos
peak_mask = activity_smooth > threshold
peak_indices = np.where(peak_mask)[0]
logger.info(f"Picos de chat: {len(peak_indices)} segundos con actividad alta")
logger.info(f"Threshold: {threshold:.1f} mensajes/segundo (percentil {percentile_threshold})")
return peak_indices, activity_smooth
def group_peaks_into_intervals(peak_indices, min_duration=15, max_duration=30, gap_seconds=8):
"""
Agrupa picos cercanos en intervalos de 15-30 segundos.
"""
if len(peak_indices) == 0:
return []
intervals = []
start = peak_indices[0]
prev = peak_indices[0]
for idx in peak_indices[1:]:
if idx - prev > gap_seconds:
duration = prev - start
# Ajustar duración al rango deseado
if duration < min_duration:
duration = min_duration
elif duration > max_duration:
duration = max_duration
intervals.append((int(start), int(start + duration)))
start = idx
prev = idx
# Último intervalo
duration = prev - start
if duration < min_duration:
duration = min_duration
elif duration > max_duration:
duration = max_duration
intervals.append((int(start), int(start + duration)))
return intervals
def get_transcript_segments(transcripcion_json, start, end):
"""
Obtiene el texto de la transcripción en un intervalo.
"""
with open(transcripcion_json, 'r', encoding='utf-8') as f:
data = json.load(f)
segments = data.get("segments", [])
relevant_segments = []
for seg in segments:
seg_start = seg["start"]
seg_end = seg["end"]
# Si el segmento se superpone con el intervalo
if seg_end >= start and seg_start <= end:
relevant_segments.append(seg["text"].strip())
return " ".join(relevant_segments)
def filter_intervals_with_minimax(intervals, transcripcion_json, api_key=None):
"""
Usa minimax para filtrar intervalos y detectar si son interesantes.
"""
base_url = os.environ.get("OPENAI_BASE_URL", "https://api.minimax.io/v1")
if not api_key:
api_key = os.environ.get("OPENAI_API_KEY")
client = OpenAI(base_url=base_url, api_key=api_key)
# Obtener texto de cada intervalo
interval_texts = []
for i, (start, end) in enumerate(intervals):
text = get_transcript_segments(transcripcion_json, start, end)
mins = start // 60
secs = start % 60
interval_texts.append({
"index": i,
"start": start,
"end": end,
"timestamp": f"[{mins:02d}:{secs:02d}]",
"text": text
})
# Crear resumen para la IA
summary_lines = []
for it in interval_texts[:50]: # Limitar a 50
summary_lines.append(f"{it['timestamp']} {it['text'][:100]}")
full_summary = "\n".join(summary_lines)
prompt = f"""Eres un filtrador de contenido de Twitch.
CLIPS A ANALIZAR ({len(interval_texts)} clips):
{full_summary}
TU TAREA: Para cada clip, responde SOLO "SI" o "NO".
"SI" = incluir, si el clip tiene:
- Risas, carcajadas, jajaja
- Emoción, entusiasmo, celebración
- Algo gracioso o inesperado
- Mencion de jugada épica
"NO" = excluir, si el clip tiene:
- Quejas, insultos, rage negativo
- Conversación aburrida
- Silencio o texto muy corto
- Repetición de palabras sin sentido (como "Gigi")
IMPORTANTE: Responde en una sola línea con SI/NO separados por coma.
Ejemplo: SI,NO,SI,SI,NO,SI,NO
Tu respuesta para los {len(interval_texts)} clips:"""
try:
response = client.chat.completions.create(
model="MiniMax-M2.5",
messages=[
{"role": "system", "content": "Eres un experto editor que identifica momentos virales."},
{"role": "user", "content": prompt}
],
temperature=0.1,
max_tokens=500
)
content = response.choices[0].message.content.strip().upper()
# Parsear respuesta: SI,NO,SI,NO,...
decisions_raw = content.replace(',', ' ').split()
decisions = []
for d in decisions_raw:
if d == "SI" or d == "INCLUDE":
decisions.append("INCLUDE")
elif d == "NO" or d == "EXCLUDE":
decisions.append("EXCLUDE")
# Si no hay suficientes decisiones, completar
if len(decisions) < len(interval_texts):
decisions.extend(["INCLUDE"] * (len(interval_texts) - len(decisions)))
logger.info(f"Decisiones de la IA: {sum(1 for d in decisions if d == 'INCLUDE')} INCLUDE, {sum(1 for d in decisions if d == 'EXCLUDE')} EXCLUDE")
except Exception as e:
logger.error(f"Error en API: {e}")
decisions = ["INCLUDE"] * len(interval_texts)
# Filtrar intervalos basado en decisiones
filtered = []
for i, decision in enumerate(decisions):
if i < len(intervals):
if decision == "INCLUDE":
filtered.append(intervals[i])
logger.info(f"Intervalos después del filtro: {len(filtered)}/{len(intervals)}")
return filtered
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--chat", required=True)
parser.add_argument("--transcripcion", required=True)
parser.add_argument("--output", default="highlights_hibrido.json")
parser.add_argument("--top", type=int, default=25)
parser.add_argument("--min-duration", type=int, default=15)
parser.add_argument("--max-duration", type=int, default=30)
args = parser.parse_args()
# Cargar datos
logger.info("Cargando chat...")
with open(args.chat, 'r') as f:
chat_data = json.load(f)
# Detectar picos de chat
peak_indices, activity_smooth = get_chat_peaks(chat_data)
# Agrupar en intervalos
intervals = group_peaks_into_intervals(
peak_indices,
min_duration=args.min_duration,
max_duration=args.max_duration
)
logger.info(f"Intervalos de chat: {len(intervals)}")
# Filtrar con minimax
filtered = filter_intervals_with_minimax(
intervals[:args.top],
args.transcripcion
)
# Guardar
with open(args.output, 'w') as f:
json.dump(filtered, f)
logger.info(f"Guardado en {args.output}")
# Imprimir resumen
print(f"\n{'='*70}")
print(f"HIGHLIGHTS HÍBRIDOS (Chat + IA)".center(70))
print(f"{'='*70}")
print(f"Total: {len(filtered)} clips")
print(f"Duración total: {sum(e-s for s,e in filtered)}s ({sum(e-s for s,e in filtered)/60:.1f} min)")
print(f"{'-'*70}")
for i, (start, end) in enumerate(filtered, 1):
duration = end - start
h = start // 3600
m = (start % 3600) // 60
sec = start % 60
text = get_transcript_segments(args.transcripcion, start, end)[:60]
print(f"{i:2d}. {h:02d}:{m:02d}:{sec:02d} - {duration}s - {text}...")
print(f"{'='*70}")
if __name__ == "__main__":
main()