#!/usr/bin/env python3 """ Detector de MOMENTOS CON ALMA: Busca risas, emoción, pérdida de control y chat reaccionando fuerte. """ import json import logging import re import numpy as np logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def detect_moments_with_soul(chat_data, transcripcion_json, min_duration=20, max_duration=60, top=25): """ Detecta momentos con alma: risas, emoción, chat excitado. """ logger.info("=== Buscando MOMENTOS CON ALMA ===") with open(transcripcion_json, 'r', encoding='utf-8') as f: trans_data = json.load(f) segments = trans_data.get("segments", []) # === ANÁLISIS DEL CHAT: Encontrar momentos de emoción colectiva === duration = max(int(c['content_offset_seconds']) for c in chat_data['comments']) + 1 activity = np.zeros(duration, dtype=np.int32) for comment in chat_data['comments']: second = int(comment['content_offset_seconds']) if second < duration: activity[second] += 1 # Suavizar activity_smooth = np.convolve(activity, np.ones(5)/5, mode='same') # Encontrar picos EMOCIONALES (percentil alto) threshold = np.percentile(activity_smooth[activity_smooth > 0], 90) peak_seconds = np.where(activity_smooth > threshold)[0] logger.info(f"Picos de chat emocional: {len(peak_seconds)} segundos") # === ANÁLISIS DE TRANSCRIPCIÓN: Buscar risas y emoción === laughter_patterns = [ r'\b(ja){2,}\b', # jajaja r'\b(je){2,}\b', # jejeje r'\b(ji){2,}\b', # jijiji r'\b(jo){2,}\b', # jojojo r'\b(ri|ri)(sa|se|se){2,}\b', # risas, rise r'\bcarcajadas?\b', r'\bme (estoy|toy) muriendo\b', r'\bno puedo\b.*\b(reír|risa|jaja)', r'\b(jajaja|jejeje|jijiji)\b', ] emotion_patterns = [ r'!{2,}', # múltiples exclamaciones = emoción r'¡{2,}', # exclamaciones invertidas r'\b[A-Z]{5,}\b', # palabras en mayúsculas = grito r'\b(PUTA|DIOS|MIERDA|CARAJO|HOSTIA)\b', r'\b(vamos|vamo|vale|siu){2,}\b', # repetición emocional r'\b(estoy|toy) (llorando|llorando|muerto)\b', ] # Analizar segmentos para encontrar momentos con alma soul_moments = [] for i, seg in enumerate(segments): text = seg["text"] text_lower = text.lower() start = seg["start"] end = seg["end"] soul_score = 0 reasons = [] # Buscar risas for pattern in laughter_patterns: if re.search(pattern, text_lower, re.IGNORECASE): soul_score += 30 reasons.append("risa") break # Buscar emoción for pattern in emotion_patterns: if re.search(pattern, text, re.IGNORECASE): soul_score += 20 if not reasons: reasons.append("emoción") break # Verificar si hay chat emocional en este momento chat_activity = activity_smooth[int(start):int(end)].mean() if int(end) < len(activity_smooth) else 0 if chat_activity > threshold * 1.5: # Chat MUY activo soul_score += 25 if not reasons: reasons.append("chat loco") # Texto muy largo con repeticiones = posible pérdida de control if len(text) > 50: words = text_lower.split() unique_ratio = len(set(words)) / len(words) if words else 1 if unique_ratio < 0.5: # Mucha repetición soul_score += 15 if not reasons: reasons.append("repetición emocional") if soul_score >= 20: # Umbral más alto para momentos de calidad soul_moments.append({ "start": start, "end": end, "score": soul_score, "text": text.strip()[:100], "reasons": reasons }) if not soul_moments: logger.warning("No se encontraron momentos con alma") return [] # Ordenar por score soul_moments.sort(key=lambda x: -x["score"]) # Agrupar en intervalos sin solapamiento intervals = [] for moment in soul_moments: start = int(moment["start"]) end = int(moment["end"]) # Extender para dar contexto duration = max(min_duration, min(end - start, max_duration)) end = start + duration # Verificar solapamiento overlaps = False for s, e in intervals: if not (end < s or start > e): overlaps = True break if not overlaps: intervals.append((start, int(end))) if len(intervals) >= top: break intervals.sort() logger.info(f"Momentos con alma detectados: {len(intervals)}") return intervals, soul_moments def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--chat", required=True) parser.add_argument("--transcripcion", required=True) parser.add_argument("--output", default="highlights_alma.json") parser.add_argument("--top", type=int, default=25) parser.add_argument("--min-duration", type=int, default=20) parser.add_argument("--max-duration", type=int, default=60) args = parser.parse_args() with open(args.chat, 'r') as f: chat_data = json.load(f) intervals, moments = detect_moments_with_soul( chat_data, args.transcripcion, args.min_duration, args.max_duration, args.top ) # Guardar with open(args.output, 'w') as f: json.dump(intervals, f) logger.info(f"Guardado en {args.output}") # Imprimir resumen print(f"\n{'='*70}") print(f"MOMENTOS CON ALMA".center(70)) print(f"{'='*70}") print(f"Total: {len(intervals)} clips") print(f"Duración total: {sum(e-s for s,e in intervals)}s ({sum(e-s for s,e in intervals)/60:.1f} min)") print(f"{'-'*70}") for i, (start, end) in enumerate(intervals, 1): duration = end - start h = start // 3600 m = (start % 3600) // 60 sec = start % 60 for moment in moments: if abs(moment["start"] - start) < 5: reasons_emoji = { "risa": "😂", "emoción": "🔥", "chat loco": "💬", "repetición emocional": "🤪" } emojis = "".join(reasons_emoji.get(r, "") for r in moment["reasons"]) text_preview = moment["text"][:55].replace('\n', ' ') print(f"{i:2d}. {h:02d}:{m:02d}:{sec:02d} - {duration}s {emojis} - {text_preview}...") break print(f"{'='*70}") if __name__ == "__main__": main()