- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
183 lines
6.1 KiB
Python
183 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Detector simple: busca momentos con actividad de chat.
|
|
En lugar de buscar "picos", toma cualquier momento donde hubo actividad.
|
|
"""
|
|
import sys
|
|
import json
|
|
import logging
|
|
import numpy as np
|
|
from pathlib import Path
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def is_intro_like(start, end, chat_data):
|
|
"""
|
|
Detecta si un intervalo parece una intro/vuelta de break
|
|
basándose en palabras clave del chat.
|
|
"""
|
|
# Palabras clave que suelen aparecer en intros/vueltas de break
|
|
intro_keywords = [
|
|
'empieza', 'ya empieza', 'empiezo', 'empez',
|
|
'hola', 'hi', 'ola', 'hey', 'buenas',
|
|
'calvo', 'gord', 'prende', 'prendio',
|
|
'ya vamos', 'vamo', 'vamos',
|
|
'espera', 'esperando',
|
|
'offstream', 'off-stream',
|
|
'break', 'vuelta',
|
|
]
|
|
|
|
# Contar comentarios en el intervalo
|
|
comments_in_interval = [
|
|
c for c in chat_data['comments']
|
|
if start <= c['content_offset_seconds'] <= end
|
|
]
|
|
|
|
if len(comments_in_interval) == 0:
|
|
return False
|
|
|
|
# Verificar qué porcentaje de mensajes contienen keywords de intro
|
|
intro_like_count = 0
|
|
for c in comments_in_interval[:50]: # Muestrear primeros 50
|
|
msg = c['message']['body'].lower()
|
|
if any(kw in msg for kw in intro_keywords):
|
|
intro_like_count += 1
|
|
|
|
intro_ratio = intro_like_count / min(len(comments_in_interval), 50)
|
|
|
|
# Si más del 25% de los mensajes parecen de intro, descartar
|
|
return intro_ratio > 0.25
|
|
|
|
|
|
def detect_any_activity(chat_data, min_duration=5, intro_skip=90, min_activity_threshold=2):
|
|
"""
|
|
Detecta momentos con actividad de chat.
|
|
|
|
Args:
|
|
chat_data: Datos del chat
|
|
min_duration: Duración mínima del intervalo en segundos
|
|
intro_skip: Segundos a saltar del inicio (intro del streamer)
|
|
min_activity_threshold: Mensajes mínimos por segundo para considerar actividad
|
|
"""
|
|
logger.info("=== Detectando momentos con actividad de chat ===")
|
|
|
|
# Crear timeline de actividad por segundo
|
|
duration = max(
|
|
int(c['content_offset_seconds'])
|
|
for c in chat_data['comments']
|
|
) + 1
|
|
|
|
# Vector de actividad por segundo
|
|
activity = np.zeros(duration, dtype=np.int32)
|
|
for comment in chat_data['comments']:
|
|
second = int(comment['content_offset_seconds'])
|
|
if second < duration:
|
|
activity[second] += 1
|
|
|
|
# Excluir la intro (primeros N segundos)
|
|
activity[:intro_skip] = 0
|
|
logger.info(f"Intro excluida: primeros {intro_skip}s")
|
|
|
|
# Encontrar segundos con actividad significativa
|
|
active_seconds = np.where(activity >= min_activity_threshold)[0]
|
|
|
|
if len(active_seconds) == 0:
|
|
logger.warning("No hay actividad de chat significativa")
|
|
return []
|
|
|
|
# Agrupar en intervalos
|
|
intervals = []
|
|
start = active_seconds[0]
|
|
prev = active_seconds[0]
|
|
|
|
for second in active_seconds[1:]:
|
|
if second - prev > 5: # 5 segundos de gap
|
|
if prev - start >= min_duration:
|
|
intervals.append((int(start), int(prev)))
|
|
start = second
|
|
prev = second
|
|
|
|
if prev - start >= min_duration:
|
|
intervals.append((int(start), int(prev)))
|
|
|
|
# Calcular score de cada intervalo (actividad promedio * duración)
|
|
intervals_with_score = []
|
|
for start, end in intervals:
|
|
segment_activity = activity[start:end+1]
|
|
avg_activity = np.mean(segment_activity)
|
|
peak_activity = np.max(segment_activity)
|
|
duration = end - start
|
|
# Score: actividad promedio + pico + duración/10
|
|
score = avg_activity + peak_activity * 0.3 + duration / 10
|
|
intervals_with_score.append((start, end, score))
|
|
|
|
# Filtrar intervalos que parezcan intro
|
|
filtered_intervals = []
|
|
skipped_count = 0
|
|
for start, end, score in intervals_with_score:
|
|
if is_intro_like(start, end, chat_data):
|
|
skipped_count += 1
|
|
logger.info(f"Descartando intervalo {start}-{end}s (parece intro/vuelta de break)")
|
|
continue
|
|
filtered_intervals.append((start, end, score))
|
|
|
|
logger.info(f"Intervalos descartados por parecer intro: {skipped_count}")
|
|
|
|
# Ordenar por score (los más interesantes primero)
|
|
filtered_intervals.sort(key=lambda x: -x[2])
|
|
intervals = [(s, e) for s, e, _ in filtered_intervals]
|
|
|
|
logger.info(f"Intervalos con actividad: {len(intervals)}")
|
|
logger.info(f"Duración total: {sum(e-s for s,e in intervals)}s")
|
|
|
|
return intervals
|
|
|
|
def main():
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--chat", required=True)
|
|
parser.add_argument("--output", default="activity.json")
|
|
parser.add_argument("--min-duration", type=int, default=10, help="Duración mínima de cada clip")
|
|
parser.add_argument("--top", type=int, default=15, help="Número máximo de clips")
|
|
parser.add_argument("--intro-skip", type=int, default=90, help="Segundos a saltar del inicio (intro)")
|
|
parser.add_argument("--activity-threshold", type=int, default=2, help="Mensajes mínimos por segundo")
|
|
args = parser.parse_args()
|
|
|
|
# Cargar chat
|
|
logger.info("Cargando chat...")
|
|
with open(args.chat, 'r') as f:
|
|
chat_data = json.load(f)
|
|
|
|
# Detectar actividad
|
|
intervals = detect_any_activity(chat_data, args.min_duration, args.intro_skip, args.activity_threshold)
|
|
|
|
# Tomar top N
|
|
intervals_top = intervals[:args.top]
|
|
|
|
# Guardar
|
|
with open(args.output, 'w') as f:
|
|
json.dump(intervals_top, f)
|
|
|
|
logger.info(f"Guardado en {args.output}")
|
|
|
|
# Imprimir resumen
|
|
print(f"\n{'='*70}")
|
|
print(f"MOMENTOS CON ACTIVIDAD DE CHAT".center(70))
|
|
print(f"{'='*70}")
|
|
print(f"Total: {len(intervals_top)} clips")
|
|
print(f"Duración total: {sum(e-s for s,e in intervals_top)}s ({sum(e-s for s,e in intervals_top)/60:.1f} min)")
|
|
print(f"{'-'*70}")
|
|
|
|
for i, (start, end) in enumerate(intervals_top, 1):
|
|
duration = end - start
|
|
h = start // 3600
|
|
m = (start % 3600) // 60
|
|
sec = start % 60
|
|
print(f"{i:2d}. {h:02d}:{m:02d}:{sec:02d} - {duration}s duración")
|
|
|
|
print(f"{'='*70}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|