- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
274 lines
8.8 KiB
Python
274 lines
8.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Sincronizador de Chat-Video
|
|
Analiza la transcripción (Whisper) y el chat para detectar delay.
|
|
|
|
Lógica:
|
|
1. Busca momentos donde el streamer dice palabras clave ("joder", "puta", "no", etc.)
|
|
2. Busca en el chat reacciones a esas mismas palabras
|
|
3. Calcula la diferencia de tiempo entre el audio y el chat
|
|
4. Aplica el offset a todos los timestamps del chat
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
from collections import defaultdict
|
|
from typing import Dict, List, Tuple
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ChatVideoSynchronizer:
|
|
"""Sincroniza timestamps del chat con el video detectando delay."""
|
|
|
|
def __init__(self):
|
|
self.delay_samples = []
|
|
|
|
def find_keyword_matches(
|
|
self,
|
|
transcription: Dict,
|
|
chat_data: Dict,
|
|
keywords: List[str],
|
|
window_seconds: int = 30,
|
|
) -> List[Tuple]:
|
|
"""
|
|
Encuentra coincidencias entre audio y chat para las mismas keywords.
|
|
|
|
Returns:
|
|
Lista de (audio_time, chat_time, keyword, confidence)
|
|
"""
|
|
matches = []
|
|
|
|
# 1. Buscar keywords en la transcripción
|
|
audio_keywords = []
|
|
for seg in transcription.get("segments", []):
|
|
text = seg["text"].lower()
|
|
for keyword in keywords:
|
|
if keyword in text:
|
|
audio_keywords.append(
|
|
{"time": seg["start"], "text": text, "keyword": keyword}
|
|
)
|
|
|
|
logger.info(f"Keywords encontradas en audio: {len(audio_keywords)}")
|
|
|
|
# 2. Buscar las mismas keywords en el chat
|
|
for audio_kw in audio_keywords:
|
|
audio_time = audio_kw["time"]
|
|
keyword = audio_kw["keyword"]
|
|
|
|
# Buscar en ventana de +/- window_seconds
|
|
chat_matches = []
|
|
for comment in chat_data["comments"]:
|
|
chat_time = comment["content_offset_seconds"]
|
|
chat_text = comment["message"]["body"].lower()
|
|
|
|
# Si el chat está en ventana razonable
|
|
if abs(chat_time - audio_time) < window_seconds * 3: # Ventana amplia
|
|
if keyword in chat_text or self._is_related_keyword(
|
|
chat_text, keyword
|
|
):
|
|
chat_matches.append(
|
|
{
|
|
"time": chat_time,
|
|
"text": chat_text,
|
|
"diff": chat_time - audio_time,
|
|
}
|
|
)
|
|
|
|
if chat_matches:
|
|
# Tomar el chat más cercano en tiempo
|
|
best_match = min(chat_matches, key=lambda x: abs(x["diff"]))
|
|
matches.append(
|
|
(audio_time, best_match["time"], keyword, best_match["diff"])
|
|
)
|
|
|
|
return matches
|
|
|
|
def _is_related_keyword(self, text: str, keyword: str) -> bool:
|
|
"""Verifica si el texto contiene palabras relacionadas."""
|
|
related = {
|
|
"joder": ["joder", "hostia", "mierda", "omg", "lol"],
|
|
"puta": ["puta", "puto", "mierda", "carajo"],
|
|
"no": ["no", "noo", "nooo", "noooo"],
|
|
"muerto": ["muerto", "muere", "death", "rip"],
|
|
"kill": ["kill", "killed", "mate", "mataron"],
|
|
"baron": ["baron", "barón", "nashor"],
|
|
"dragon": ["dragon", "dragón", "drake"],
|
|
}
|
|
|
|
if keyword in related:
|
|
return any(k in text for k in related[keyword])
|
|
return False
|
|
|
|
def calculate_delay(self, matches: List[Tuple]) -> float:
|
|
"""
|
|
Calcula el delay promedio a partir de las coincidencias.
|
|
|
|
El delay es: chat_time - audio_time
|
|
Positivo = chat llega después del audio
|
|
Negativo = chat llega antes (raro, pero posible)
|
|
"""
|
|
if not matches:
|
|
return 0.0
|
|
|
|
delays = [diff for _, _, _, diff in matches]
|
|
|
|
# Filtrar outliers (diferencias muy grandes)
|
|
delays_filtered = [d for d in delays if abs(d) < 30] # Max 30 segundos
|
|
|
|
if not delays_filtered:
|
|
return 0.0
|
|
|
|
avg_delay = sum(delays_filtered) / len(delays_filtered)
|
|
|
|
logger.info(f"Delay calculado: {avg_delay:.1f}s")
|
|
logger.info(f" - Muestras usadas: {len(delays_filtered)}/{len(matches)}")
|
|
logger.info(f" - Min delay: {min(delays_filtered):.1f}s")
|
|
logger.info(f" - Max delay: {max(delays_filtered):.1f}s")
|
|
|
|
return avg_delay
|
|
|
|
def synchronize_chat(self, chat_data: Dict, delay: float) -> Dict:
|
|
"""
|
|
Aplica el delay a todos los timestamps del chat.
|
|
|
|
Args:
|
|
chat_data: Datos originales del chat
|
|
delay: Segundos a restar (si el chat llega tarde)
|
|
|
|
Returns:
|
|
Chat data con timestamps corregidos
|
|
"""
|
|
if delay == 0:
|
|
return chat_data
|
|
|
|
synchronized = {"comments": []}
|
|
|
|
for comment in chat_data["comments"]:
|
|
# Crear copia y ajustar timestamp
|
|
new_comment = comment.copy()
|
|
original_time = comment["content_offset_seconds"]
|
|
|
|
# Si el chat tiene delay, restamos para sincronizar
|
|
new_time = original_time - delay
|
|
|
|
# No permitir tiempos negativos
|
|
if new_time < 0:
|
|
new_time = 0
|
|
|
|
new_comment["content_offset_seconds"] = new_time
|
|
synchronized["comments"].append(new_comment)
|
|
|
|
logger.info(f"Chat sincronizado: {len(synchronized['comments'])} mensajes")
|
|
logger.info(f"Delay aplicado: -{delay:.1f}s a todos los timestamps")
|
|
|
|
return synchronized
|
|
|
|
def analyze_and_sync(
|
|
self, transcription: Dict, chat_data: Dict, output_file: str = None
|
|
) -> Tuple[Dict, float]:
|
|
"""
|
|
Analiza y sincroniza el chat completo.
|
|
|
|
Returns:
|
|
(chat_data sincronizado, delay detectado)
|
|
"""
|
|
logger.info("=" * 60)
|
|
logger.info("SINCRONIZADOR CHAT-VIDEO")
|
|
logger.info("=" * 60)
|
|
|
|
# Keywords para buscar coincidencias
|
|
keywords = [
|
|
"joder",
|
|
"puta",
|
|
"no",
|
|
"muerto",
|
|
"kill",
|
|
"baron",
|
|
"dragon",
|
|
"mierda",
|
|
"hostia",
|
|
"dios",
|
|
"omg",
|
|
"gg",
|
|
"nice",
|
|
]
|
|
|
|
# 1. Encontrar coincidencias
|
|
logger.info(f"Buscando coincidencias de {len(keywords)} keywords...")
|
|
matches = self.find_keyword_matches(transcription, chat_data, keywords)
|
|
|
|
if len(matches) < 5:
|
|
logger.warning(f"Pocas coincidencias ({len(matches)}), usando delay = 0")
|
|
return chat_data, 0.0
|
|
|
|
logger.info(f"Coincidencias encontradas: {len(matches)}")
|
|
|
|
# 2. Calcular delay
|
|
delay = self.calculate_delay(matches)
|
|
|
|
# 3. Sincronizar
|
|
if abs(delay) > 1.0: # Solo si hay delay significativo
|
|
synchronized_chat = self.synchronize_chat(chat_data, delay)
|
|
|
|
if output_file:
|
|
with open(output_file, "w") as f:
|
|
json.dump(synchronized_chat, f)
|
|
logger.info(f"Chat sincronizado guardado: {output_file}")
|
|
|
|
return synchronized_chat, delay
|
|
else:
|
|
logger.info("Delay insignificante (< 1s), usando chat original")
|
|
return chat_data, 0.0
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Sincroniza chat con video analizando delay"
|
|
)
|
|
parser.add_argument(
|
|
"--transcription", required=True, help="JSON de transcripción Whisper"
|
|
)
|
|
parser.add_argument("--chat", required=True, help="JSON del chat original")
|
|
parser.add_argument(
|
|
"--output", default="chat_synced.json", help="Output JSON sincronizado"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Cargar datos
|
|
with open(args.transcription, "r") as f:
|
|
transcription = json.load(f)
|
|
|
|
with open(args.chat, "r") as f:
|
|
chat_data = json.load(f)
|
|
|
|
# Sincronizar
|
|
synchronizer = ChatVideoSynchronizer()
|
|
synced_chat, delay = synchronizer.analyze_and_sync(
|
|
transcription, chat_data, args.output
|
|
)
|
|
|
|
print(f"\n{'=' * 60}")
|
|
print(f"SINCRONIZACIÓN COMPLETADA")
|
|
print(f"{'=' * 60}")
|
|
print(f"Delay detectado: {delay:.1f}s")
|
|
if delay > 0:
|
|
print(f" → El chat llega {delay:.1f}s DESPUÉS del video")
|
|
print(f" → Se restaron {delay:.1f}s a todos los timestamps")
|
|
elif delay < 0:
|
|
print(f" → El chat llega {abs(delay):.1f}s ANTES del video")
|
|
print(f" → Se sumaron {abs(delay):.1f}s a todos los timestamps")
|
|
else:
|
|
print(f" → Chat y video ya están sincronizados")
|
|
print(f"\nArchivo guardado: {args.output}")
|
|
print(f"{'=' * 60}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|