#!/usr/bin/env python3 """ Detector de EXPLOSIONES de chat: Busca momentos repentinos de alta actividad en el chat que suelen indicar momentos épicos/intereses. """ import sys import json import logging import torch import torch.nn.functional as F import numpy as np from pathlib import Path logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def get_device(): if torch.cuda.is_available(): return torch.device("cuda") return torch.device("cpu") def detect_chat_explosions(chat_data, device="cuda", window_seconds=10, spike_threshold=3.0): """ Detecta EXPLOSIONES de chat: saltos repentinos en la actividad. En lugar de picos sostenidos, busca aumentos bruscos. """ logger.info("=== Detectando EXPLOSIONES de chat ===") # Crear timeline de actividad por segundo duration = max( int(c['content_offset_seconds']) for c in chat_data['comments'] ) + 1 # Vector de actividad por segundo activity = torch.zeros(duration, device=device) for comment in chat_data['comments']: second = int(comment['content_offset_seconds']) if second < duration: activity[second] += 1 # Calcular media móvil para ver tendencia window_size = window_seconds # 10 segundos kernel = torch.ones(1, 1, window_size, device=device) / window_size activity_reshaped = activity.unsqueeze(0).unsqueeze(0) # Padear activity para mantener tamaño después de conv padding = window_size // 2 activity_padded = F.pad(activity_reshaped, (padding, padding)) activity_smooth = F.conv1d(activity_padded, kernel).squeeze() activity_smooth = activity_smooth[:activity.shape[0]] # Recortar al tamaño original # Detectar EXPLOSIONES: saltos bruscos por encima de la tendencia # Calcular diferencia con la media móvil diff = activity - activity_smooth # Buscar spikes donde la actividad real es mucho mayor que la esperada mean_diff = torch.mean(diff) std_diff = torch.std(diff) # Threshold dinámico basado en percentiles percentile_90 = torch.quantile(activity[activity > 0], 0.90) percentile_95 = torch.quantile(activity[activity > 0], 0.95) percentile_99 = torch.quantile(activity[activity > 0], 0.99) logger.info(f"Activity stats: p90={percentile_90:.0f}, p95={percentile_95:.0f}, p99={percentile_99:.0f}") # Detectar explosiones: actividad > p95 Y diff alto explosion_mask = (activity > percentile_95) & (diff > std_diff * spike_threshold) # Encontrar regiones contiguas de explosiones explosion_indices = torch.where(explosion_mask)[0] if len(explosion_indices) == 0: logger.warning("No se detectaron explosiones. Bajando threshold...") explosion_mask = activity > percentile_90 explosion_indices = torch.where(explosion_mask)[0] # Agrupar en eventos events = [] if len(explosion_indices) > 0: start = explosion_indices[0].item() prev = explosion_indices[0].item() for idx in explosion_indices[1:]: second = idx.item() if second - prev > 15: # 15 segundos de gap = nuevo evento if prev - start >= 5: # Mínimo 5 segundos events.append((start, prev)) start = second prev = second if prev - start >= 5: events.append((start, prev)) # Calcular "intensidad" de cada evento (pico de actividad) events_with_intensity = [] for start, end in events: segment_activity = activity[start:end+1] peak = torch.max(segment_activity).item() avg = torch.mean(segment_activity).item() duration = end - start # Score combinado: pico * duración / dispersión intensity = (peak * duration) / (1 + (end - start) / 10) events_with_intensity.append((start, end, duration, peak, intensity)) # Ordenar por intensidad (los más "explosivos" primero) events_with_intensity.sort(key=lambda x: -x[4]) logger.info(f"Explosiones detectadas: {len(events)}") return events_with_intensity def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--chat", required=True) parser.add_argument("--output", default="explosiones.json") parser.add_argument("--top", type=int, default=20, help="Número de eventos a retornar") parser.add_argument("--min-duration", type=int, default=8) parser.add_argument("--device", default="auto") args = parser.parse_args() if args.device == "auto": device = get_device() else: device = torch.device(args.device) logger.info(f"Usando device: {device}") # Cargar chat logger.info("Cargando chat...") with open(args.chat, 'r') as f: chat_data = json.load(f) # Detectar explosiones events = detect_chat_explosions(chat_data, device) # Filtrar por duración mínima y tomar top N events_filtered = [(s, e, d, p, i) for s, e, d, p, i in events if d >= args.min_duration] events_top = events_filtered[:args.top] # Convertir a formato de intervalos intervals = [(int(s), int(e)) for s, e, d, p, i in events_top] # Guardar with open(args.output, 'w') as f: json.dump(intervals, f) logger.info(f"Guardado en {args.output}") # Imprimir resumen print(f"\n{'='*70}") print(f"EXPLOSIONES DE CHAT DETECTADAS".center(70)) print(f"{'='*70}") print(f"Total: {len(intervals)} eventos (top {args.top} por intensidad)") print(f"Duración total: {sum(e-s for s,e in intervals)}s ({sum(e-s for s,e in intervals)/60:.1f} min)") print(f"{'-'*70}") for i, (start, end) in enumerate(intervals, 1): duration = end - start h = start // 3600 m = (start % 3600) // 60 sec = start % 60 print(f"{i:2d}. {h:02d}:{m:02d}:{sec:02d} - {duration}s duración") print(f"{'='*70}") if __name__ == "__main__": main()