- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
340 lines
11 KiB
Python
340 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Detector automático de intro/breaks en streams.
|
|
Analiza chat y audio para detectar cuándo termina la intro.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
import subprocess
|
|
import numpy as np
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def detect_intro_end_chat(chat_data, min_chat_activity=5, window_seconds=60):
|
|
"""
|
|
Detecta el final de la intro analizando el chat.
|
|
|
|
La intro típicamente tiene:
|
|
- Mensajes de "empieza", "hola", "buenas", "prende"
|
|
- Actividad irregular (picos y valles)
|
|
- Palabras clave de intro
|
|
|
|
El gameplay real tiene:
|
|
- Actividad de chat más estable
|
|
- Mensajes sobre el juego
|
|
- Menos keywords de intro
|
|
"""
|
|
logger.info("Analizando chat para detectar fin de intro...")
|
|
|
|
# Extraer timestamps
|
|
chat_times = {}
|
|
for comment in chat_data["comments"]:
|
|
second = int(comment["content_offset_seconds"])
|
|
chat_times[second] = chat_times.get(second, 0) + 1
|
|
|
|
if not chat_times:
|
|
return 0
|
|
|
|
max_second = max(chat_times.keys())
|
|
duration = max_second + 1
|
|
|
|
# Crear vector de actividad
|
|
activity = np.zeros(duration)
|
|
for second, count in chat_times.items():
|
|
if second < duration:
|
|
activity[second] = count
|
|
|
|
# Keywords de intro (en español e inglés)
|
|
intro_keywords = [
|
|
r"\b(empieza|empezar|ya|comienza)\b",
|
|
r"\b(hola|hi|ola|hey|buenas|buenos)\b",
|
|
r"\b(calvo|gord|prende|prendio|enciende)\b",
|
|
r"\b(vamo|vamos|ya vamos)\b",
|
|
r"\b(espera|esperando|waiting)\b",
|
|
r"\b(offstream|off-stream|break|vuelta|volviste)\b",
|
|
r"\b(merch|tienda|discord|redes|social|follow)\b",
|
|
r"\b(intro|presento|presentaci[oó]n|inicio|comienzo)\b",
|
|
r"\b(rrss|twitter|instagram|youtube)\b",
|
|
r"\b(sorteo|giveaway|donar|sub|prime)\b",
|
|
]
|
|
|
|
# Keywords de gameplay (indican que ya está jugando)
|
|
gameplay_keywords = [
|
|
r"\b(kill|muerte|mate|muero|mata|mat[oó])\b",
|
|
r"\b(fight|pelea|teamfight|gank)\b",
|
|
r"\b(ulti|ultimate|habilidad|spell)\b",
|
|
r"\b(lol|gg|wp|ff|nice|good)\b",
|
|
r"\b(bar[oó]n|drag[oó]n|nashor|inhib|torre)\b",
|
|
r"\b(champ|campe[oó]n|top|mid|jg|jungla|adc|support)\b",
|
|
r"\b(penta|quadra|triple|ace)\b",
|
|
r"\b(feed|int|troll|report)\b",
|
|
r"\b(lag|fps|ping|delay)\b",
|
|
]
|
|
|
|
# Analizar por ventanas de 60 segundos
|
|
window = window_seconds
|
|
intro_scores = []
|
|
gameplay_scores = []
|
|
|
|
for start in range(0, duration - window, window // 2): # Overlap 50%
|
|
end = min(start + window, duration)
|
|
|
|
# Mensajes en esta ventana
|
|
messages = []
|
|
for comment in chat_data["comments"]:
|
|
sec = int(comment["content_offset_seconds"])
|
|
if start <= sec < end:
|
|
msg = comment["message"]["body"].lower()
|
|
messages.append(msg)
|
|
|
|
if not messages:
|
|
continue
|
|
|
|
# Contar keywords de intro
|
|
intro_count = 0
|
|
for msg in messages[:100]: # Sample de 100 mensajes
|
|
for pattern in intro_keywords:
|
|
if re.search(pattern, msg, re.IGNORECASE):
|
|
intro_count += 1
|
|
break
|
|
|
|
# Contar keywords de gameplay
|
|
gameplay_count = 0
|
|
for msg in messages[:100]:
|
|
for pattern in gameplay_keywords:
|
|
if re.search(pattern, msg, re.IGNORECASE):
|
|
gameplay_count += 1
|
|
break
|
|
|
|
# Calcular ratio
|
|
total = len(messages[:100])
|
|
if total > 0:
|
|
intro_ratio = intro_count / total
|
|
gameplay_ratio = gameplay_count / total
|
|
|
|
# Actividad promedio
|
|
avg_activity = np.mean(activity[start:end])
|
|
|
|
intro_scores.append(
|
|
{
|
|
"start": start,
|
|
"end": end,
|
|
"intro_ratio": intro_ratio,
|
|
"gameplay_ratio": gameplay_ratio,
|
|
"activity": avg_activity,
|
|
"messages": total,
|
|
}
|
|
)
|
|
|
|
if not intro_scores:
|
|
return 300 # Default 5 minutos si no hay datos
|
|
|
|
# Buscar transición: donde gameplay supera a intro
|
|
for i, window_data in enumerate(intro_scores):
|
|
# Si tenemos suficiente actividad y gameplay > intro
|
|
if (
|
|
window_data["activity"] >= min_chat_activity
|
|
and window_data["gameplay_ratio"] > window_data["intro_ratio"]
|
|
and window_data["gameplay_ratio"] > 0.05
|
|
): # Al menos 5% mensajes de gameplay
|
|
# Verificar que las próximas 2 ventanas también tengan gameplay
|
|
if i + 2 < len(intro_scores):
|
|
next1 = intro_scores[i + 1]
|
|
next2 = intro_scores[i + 2]
|
|
|
|
if (
|
|
next1["gameplay_ratio"] > next1["intro_ratio"]
|
|
or next2["gameplay_ratio"] > next2["intro_ratio"]
|
|
):
|
|
logger.info(
|
|
f"Fin de intro detectado en segundo {window_data['start']} "
|
|
f"({window_data['start'] // 60}m {window_data['start'] % 60}s)"
|
|
)
|
|
logger.info(f" - Actividad: {window_data['activity']:.1f} msg/s")
|
|
logger.info(
|
|
f" - Gameplay keywords: {window_data['gameplay_ratio'] * 100:.1f}%"
|
|
)
|
|
logger.info(
|
|
f" - Intro keywords: {window_data['intro_ratio'] * 100:.1f}%"
|
|
)
|
|
return window_data["start"]
|
|
|
|
# Si no detectamos transición clara, buscar caída de keywords de intro
|
|
for i in range(1, len(intro_scores)):
|
|
prev_intro = intro_scores[i - 1]["intro_ratio"]
|
|
curr_intro = intro_scores[i]["intro_ratio"]
|
|
|
|
# Si las keywords de intro cayeron drásticamente
|
|
if prev_intro > 0.3 and curr_intro < 0.1:
|
|
if intro_scores[i]["activity"] >= min_chat_activity:
|
|
logger.info(
|
|
f"Fin de intro por caída de keywords en segundo {intro_scores[i]['start']} "
|
|
f"({intro_scores[i]['start'] // 60}m {intro_scores[i]['start'] % 60}s)"
|
|
)
|
|
return intro_scores[i]["start"]
|
|
|
|
# Fallback: usar primera ventana con actividad sostenida
|
|
for window_data in intro_scores:
|
|
if window_data["activity"] >= min_chat_activity * 2:
|
|
logger.info(
|
|
f"Fin de intro por actividad sostenida en segundo {window_data['start']} "
|
|
f"({window_data['start'] // 60}m {window_data['start'] % 60}s)"
|
|
)
|
|
return window_data["start"]
|
|
|
|
return 300 # Default 5 minutos
|
|
|
|
|
|
def detect_intro_end_audio(video_file, min_volume_threshold=0.01):
|
|
"""
|
|
Detecta el final de la intro analizando el audio.
|
|
La intro suele tener música de fondo constante,
|
|
el gameplay tiene más variación (gritos, silencios, etc).
|
|
"""
|
|
logger.info("Analizando audio para detectar fin de intro...")
|
|
|
|
import io
|
|
|
|
# Extraer audio
|
|
cmd = [
|
|
"ffmpeg",
|
|
"-i",
|
|
video_file,
|
|
"-vn",
|
|
"-acodec",
|
|
"pcm_s16le",
|
|
"-ar",
|
|
"16000",
|
|
"-ac",
|
|
"1",
|
|
"-f",
|
|
"wav",
|
|
"pipe:1",
|
|
"-y",
|
|
"-threads",
|
|
"4",
|
|
]
|
|
|
|
result = subprocess.run(cmd, capture_output=True)
|
|
|
|
try:
|
|
import soundfile as sf
|
|
|
|
waveform, sr = sf.read(io.BytesIO(result.stdout), dtype="float32")
|
|
except Exception as e:
|
|
logger.warning(f"No se pudo analizar audio: {e}")
|
|
return None
|
|
|
|
# Analizar volumen por ventanas de 10 segundos
|
|
window_samples = sr * 10
|
|
volumes = []
|
|
|
|
for i in range(0, len(waveform) - window_samples, window_samples):
|
|
window = waveform[i : i + window_samples]
|
|
volume = np.sqrt(np.mean(window**2))
|
|
volumes.append(volume)
|
|
|
|
if len(volumes) < 10:
|
|
return None
|
|
|
|
# Calcular varianza móvil (gameplay tiene más varianza)
|
|
variances = []
|
|
window_size = 6 # 60 segundos
|
|
|
|
for i in range(window_size, len(volumes)):
|
|
var = np.var(volumes[i - window_size : i])
|
|
variances.append(var)
|
|
|
|
# Buscar aumento significativo de varianza
|
|
mean_var = np.mean(variances[:10]) # Primeros 100s como baseline
|
|
std_var = np.std(variances[:10])
|
|
|
|
for i, var in enumerate(variances):
|
|
if var > mean_var + 2 * std_var: # 2 desviaciones estándar
|
|
time_sec = (i + window_size) * 10
|
|
logger.info(
|
|
f"Fin de intro detectado por audio en {time_sec}s "
|
|
f"({time_sec // 60}m {time_sec % 60}s)"
|
|
)
|
|
return time_sec
|
|
|
|
return None
|
|
|
|
|
|
def detect_intro_end(chat_data, video_file=None, method="auto"):
|
|
"""
|
|
Detecta automáticamente el final de la intro.
|
|
|
|
Args:
|
|
chat_data: Datos del chat
|
|
video_file: Archivo de video (opcional, para análisis de audio)
|
|
method: 'chat', 'audio', o 'auto' (ambos)
|
|
|
|
Returns:
|
|
Segundo donde termina la intro
|
|
"""
|
|
logger.info("=" * 60)
|
|
logger.info("DETECTOR AUTOMÁTICO DE INTRO")
|
|
logger.info("=" * 60)
|
|
|
|
results = []
|
|
|
|
if method in ["chat", "auto"]:
|
|
chat_end = detect_intro_end_chat(chat_data)
|
|
if chat_end:
|
|
results.append(("chat", chat_end))
|
|
|
|
if method in ["audio", "auto"] and video_file:
|
|
audio_end = detect_intro_end_audio(video_file)
|
|
if audio_end:
|
|
results.append(("audio", audio_end))
|
|
|
|
if not results:
|
|
logger.warning(
|
|
"No se pudo detectar fin de intro automáticamente. Usando default: 300s"
|
|
)
|
|
return 300
|
|
|
|
# Tomar el promedio si tenemos ambos, o el único disponible
|
|
if len(results) == 2:
|
|
avg = int((results[0][1] + results[1][1]) / 2)
|
|
logger.info(f"Chat detectó: {results[0][1]}s, Audio detectó: {results[1][1]}s")
|
|
logger.info(f"Usando promedio: {avg}s ({avg // 60}m {avg % 60}s)")
|
|
return avg
|
|
else:
|
|
method_name, value = results[0]
|
|
logger.info(
|
|
f"Usando detección por {method_name}: {value}s ({value // 60}m {value % 60}s)"
|
|
)
|
|
return value
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--chat", required=True, help="Chat JSON file")
|
|
parser.add_argument("--video", help="Video file (opcional, para análisis de audio)")
|
|
parser.add_argument("--method", default="auto", choices=["chat", "audio", "auto"])
|
|
parser.add_argument("--output", default="intro_end.txt", help="Output file")
|
|
args = parser.parse_args()
|
|
|
|
with open(args.chat, "r") as f:
|
|
chat_data = json.load(f)
|
|
|
|
intro_end = detect_intro_end(chat_data, args.video, args.method)
|
|
|
|
print(f"\n{'=' * 60}")
|
|
print(f"FIN DE INTRO DETECTADO: {intro_end}s")
|
|
print(f" = {intro_end // 60}m {intro_end % 60}s")
|
|
print(f"{'=' * 60}")
|
|
|
|
with open(args.output, "w") as f:
|
|
f.write(str(intro_end))
|
|
|
|
print(f"Guardado en: {args.output}")
|