import sys import re import json import logging import numpy as np from datetime import datetime logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def detect_highlights(chat_file, min_duration=10, threshold=2.0): """Detecta highlights por chat saturado""" logger.info("Analizando picos de chat...") # Leer mensajes messages = [] with open(chat_file, 'r', encoding='utf-8') as f: for line in f: match = re.match(r'\[(\d{4}-\d{2}-\d{2}T[\d:.]+Z?)\]', line) if match: timestamp_str = match.group(1).replace('Z', '+00:00') try: timestamp = datetime.fromisoformat(timestamp_str) messages.append((timestamp, line)) except: pass if not messages: logger.error("No se encontraron mensajes") return [] start_time = messages[0][0] end_time = messages[-1][0] duration = (end_time - start_time).total_seconds() logger.info(f"Chat: {len(messages)} mensajes, duración: {duration:.1f}s") # Agrupar por segundo time_buckets = {} for timestamp, _ in messages: second = int((timestamp - start_time).total_seconds()) time_buckets[second] = time_buckets.get(second, 0) + 1 # Calcular estadísticas counts = list(time_buckets.values()) mean_count = np.mean(counts) std_count = np.std(counts) logger.info(f"Stats: media={mean_count:.1f}, std={std_count:.1f}") # Detectar picos peak_seconds = [] for second, count in time_buckets.items(): if std_count > 0: z_score = (count - mean_count) / std_count if z_score > threshold: peak_seconds.append(second) logger.info(f"Picos encontrados: {len(peak_seconds)}") # Unir segundos consecutivos if not peak_seconds: return [] intervals = [] start = peak_seconds[0] prev = peak_seconds[0] for second in peak_seconds[1:]: if second - prev > 1: if second - start >= min_duration: intervals.append((start, prev)) start = second prev = second if prev - start >= min_duration: intervals.append((start, prev)) return intervals if __name__ == "__main__": chat_file = "chat.txt" highlights = detect_highlights(chat_file) print(f"\nHighlights encontrados: {len(highlights)}") for i, (start, end) in enumerate(highlights): print(f" {i+1}. {start}s - {end}s (duración: {end-start}s)") # Guardar JSON with open("highlights.json", "w") as f: json.dump(highlights, f) print(f"\nGuardado en highlights.json")