#!/usr/bin/env python3 """ Detector híbrido: usa picos del chat + filtra con transcripción (minimax) """ import json import logging import os import numpy as np from openai import OpenAI logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def get_chat_peaks(chat_data, window_seconds=5, percentile_threshold=85): """ Detecta picos de actividad en el chat. """ duration = max(int(c['content_offset_seconds']) for c in chat_data['comments']) + 1 # Actividad por segundo activity = np.zeros(duration, dtype=np.int32) for comment in chat_data['comments']: second = int(comment['content_offset_seconds']) if second < duration: activity[second] += 1 # Suavizar con media móvil kernel = np.ones(window_seconds) / window_seconds activity_smooth = np.convolve(activity, kernel, mode='same') # Threshold basado en percentil threshold = np.percentile(activity_smooth[activity_smooth > 0], percentile_threshold) # Encontrar picos peak_mask = activity_smooth > threshold peak_indices = np.where(peak_mask)[0] logger.info(f"Picos de chat: {len(peak_indices)} segundos con actividad alta") logger.info(f"Threshold: {threshold:.1f} mensajes/segundo (percentil {percentile_threshold})") return peak_indices, activity_smooth def group_peaks_into_intervals(peak_indices, min_duration=15, max_duration=30, gap_seconds=8): """ Agrupa picos cercanos en intervalos de 15-30 segundos. """ if len(peak_indices) == 0: return [] intervals = [] start = peak_indices[0] prev = peak_indices[0] for idx in peak_indices[1:]: if idx - prev > gap_seconds: duration = prev - start # Ajustar duración al rango deseado if duration < min_duration: duration = min_duration elif duration > max_duration: duration = max_duration intervals.append((int(start), int(start + duration))) start = idx prev = idx # Último intervalo duration = prev - start if duration < min_duration: duration = min_duration elif duration > max_duration: duration = max_duration intervals.append((int(start), int(start + duration))) return intervals def get_transcript_segments(transcripcion_json, start, end): """ Obtiene el texto de la transcripción en un intervalo. """ with open(transcripcion_json, 'r', encoding='utf-8') as f: data = json.load(f) segments = data.get("segments", []) relevant_segments = [] for seg in segments: seg_start = seg["start"] seg_end = seg["end"] # Si el segmento se superpone con el intervalo if seg_end >= start and seg_start <= end: relevant_segments.append(seg["text"].strip()) return " ".join(relevant_segments) def filter_intervals_with_minimax(intervals, transcripcion_json, api_key=None): """ Usa minimax para filtrar intervalos y detectar si son interesantes. """ base_url = os.environ.get("OPENAI_BASE_URL", "https://api.minimax.io/v1") if not api_key: api_key = os.environ.get("OPENAI_API_KEY") client = OpenAI(base_url=base_url, api_key=api_key) # Obtener texto de cada intervalo interval_texts = [] for i, (start, end) in enumerate(intervals): text = get_transcript_segments(transcripcion_json, start, end) mins = start // 60 secs = start % 60 interval_texts.append({ "index": i, "start": start, "end": end, "timestamp": f"[{mins:02d}:{secs:02d}]", "text": text }) # Crear resumen para la IA summary_lines = [] for it in interval_texts[:50]: # Limitar a 50 summary_lines.append(f"{it['timestamp']} {it['text'][:100]}") full_summary = "\n".join(summary_lines) prompt = f"""Eres un filtrador de contenido de Twitch. CLIPS A ANALIZAR ({len(interval_texts)} clips): {full_summary} TU TAREA: Para cada clip, responde SOLO "SI" o "NO". "SI" = incluir, si el clip tiene: - Risas, carcajadas, jajaja - Emoción, entusiasmo, celebración - Algo gracioso o inesperado - Mencion de jugada épica "NO" = excluir, si el clip tiene: - Quejas, insultos, rage negativo - Conversación aburrida - Silencio o texto muy corto - Repetición de palabras sin sentido (como "Gigi") IMPORTANTE: Responde en una sola línea con SI/NO separados por coma. Ejemplo: SI,NO,SI,SI,NO,SI,NO Tu respuesta para los {len(interval_texts)} clips:""" try: response = client.chat.completions.create( model="MiniMax-M2.5", messages=[ {"role": "system", "content": "Eres un experto editor que identifica momentos virales."}, {"role": "user", "content": prompt} ], temperature=0.1, max_tokens=500 ) content = response.choices[0].message.content.strip().upper() # Parsear respuesta: SI,NO,SI,NO,... decisions_raw = content.replace(',', ' ').split() decisions = [] for d in decisions_raw: if d == "SI" or d == "INCLUDE": decisions.append("INCLUDE") elif d == "NO" or d == "EXCLUDE": decisions.append("EXCLUDE") # Si no hay suficientes decisiones, completar if len(decisions) < len(interval_texts): decisions.extend(["INCLUDE"] * (len(interval_texts) - len(decisions))) logger.info(f"Decisiones de la IA: {sum(1 for d in decisions if d == 'INCLUDE')} INCLUDE, {sum(1 for d in decisions if d == 'EXCLUDE')} EXCLUDE") except Exception as e: logger.error(f"Error en API: {e}") decisions = ["INCLUDE"] * len(interval_texts) # Filtrar intervalos basado en decisiones filtered = [] for i, decision in enumerate(decisions): if i < len(intervals): if decision == "INCLUDE": filtered.append(intervals[i]) logger.info(f"Intervalos después del filtro: {len(filtered)}/{len(intervals)}") return filtered def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--chat", required=True) parser.add_argument("--transcripcion", required=True) parser.add_argument("--output", default="highlights_hibrido.json") parser.add_argument("--top", type=int, default=25) parser.add_argument("--min-duration", type=int, default=15) parser.add_argument("--max-duration", type=int, default=30) args = parser.parse_args() # Cargar datos logger.info("Cargando chat...") with open(args.chat, 'r') as f: chat_data = json.load(f) # Detectar picos de chat peak_indices, activity_smooth = get_chat_peaks(chat_data) # Agrupar en intervalos intervals = group_peaks_into_intervals( peak_indices, min_duration=args.min_duration, max_duration=args.max_duration ) logger.info(f"Intervalos de chat: {len(intervals)}") # Filtrar con minimax filtered = filter_intervals_with_minimax( intervals[:args.top], args.transcripcion ) # Guardar with open(args.output, 'w') as f: json.dump(filtered, f) logger.info(f"Guardado en {args.output}") # Imprimir resumen print(f"\n{'='*70}") print(f"HIGHLIGHTS HÍBRIDOS (Chat + IA)".center(70)) print(f"{'='*70}") print(f"Total: {len(filtered)} clips") print(f"Duración total: {sum(e-s for s,e in filtered)}s ({sum(e-s for s,e in filtered)/60:.1f} min)") print(f"{'-'*70}") for i, (start, end) in enumerate(filtered, 1): duration = end - start h = start // 3600 m = (start % 3600) // 60 sec = start % 60 text = get_transcript_segments(args.transcripcion, start, end)[:60] print(f"{i:2d}. {h:02d}:{m:02d}:{sec:02d} - {duration}s - {text}...") print(f"{'='*70}") if __name__ == "__main__": main()