#!/usr/bin/env python3 """ Detector de RAGE EDITION: Encuentra momentos de furia, quejas, insultos y rage puro. """ import json import logging import re from collections import defaultdict logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def detect_rage_moments(transcripcion_json, min_duration=15, max_duration=45, top=30): """ Detecta momentos de rage analizando la transcripción. """ logger.info("=== Buscando RAGE MOMENTS ===") with open(transcripcion_json, 'r', encoding='utf-8') as f: data = json.load(f) segments = data.get("segments", []) # Palabras clave de rage rage_keywords = [ # Insultos directos r'\bretrasad[ao]s?\b', r'\bimbecil\b', r'\best[úu]pid[ao]s?\b', r'\bidiota\b', r'\bput[ao]\b', r'\bmaric[óo]n\b', r'\bpolla?\b', r'\bpinga?\b', r'\bpendej[ao]s?\b', r'\bcapullo\b', r'\bgilipollas\b', r'\bcabron\b', r'\bhostia\b', r'\bcoñ[ao]\b', r'\bjoder\b', # Quejas de juego r'\breport\b', r'\bban\b', r'\binter[bv]enido\b', r'\bafk\b', r'\btroll\b', r'\bfeed\b', r'\bthrow\b', # Expresiones de frustración r'\bno puedo\b', r'\bimposible\b', r'\bque putada\b', r'\bme cago\b', r'\bqué verg[üu]enza\b', # Sonidos de rabia r'\bargh\b', r'\bugh\b', r'\baargh\b', ] # Patrones de repeticiones (señal de rage) repetition_patterns = [ r'\b(no\s+)+', # "no no no no" r'\b(vamos\b.*){3,}', # "vamos vamos vamos" r'\b(por favor\b.*){3,}', # "por favor por favor" ] # Patrones de gritos (mayúsculas o exclamaciones múltiples) scream_patterns = [ r'!{2,}', # múltiples signos de exclamación r'¡{2,}', # múltiples signos de exclamación invertidos ] # Analizar cada segmento rage_scores = [] for i, seg in enumerate(segments): text = seg["text"].lower() start = seg["start"] end = seg["end"] score = 0 reasons = [] # Buscar palabras clave de rage for pattern in rage_keywords: matches = len(re.findall(pattern, text, re.IGNORECASE)) if matches > 0: score += matches * 10 if "retrasado" in text or "imbecil" in text: reasons.append("insulto") # Buscar repeticiones for pattern in repetition_patterns: if re.search(pattern, text): score += 15 reasons.append("repetición") # Buscar gritos for pattern in scream_patterns: if re.search(pattern, text): score += 5 reasons.append("grito") # Palabras de frustración extrema if any(w in text for w in ["me la suda", "me suda", "qué putada", "putada"]): score += 20 reasons.append("frustración") # Duración muy corta con mucho texto = posible rage rápido duration = end - start if duration < 3 and len(text) > 20: score += 10 reasons.append("habla rápido") if score > 0: rage_scores.append({ "start": start, "end": end, "score": score, "text": text, "reasons": reasons }) # Agrupar momentos cercanos if not rage_scores: logger.warning("No se encontraron rage moments") return [] # Ordenar por score rage_scores.sort(key=lambda x: -x["score"]) # Agrupar en intervalos intervals = [] used = set() for rage in rage_scores[:top * 3]: # Tomar más y luego filtrar start = int(rage["start"]) end = int(rage["end"]) # Extender el intervalo duration = max(min_duration, min(end - start, max_duration)) end = start + duration # Verificar solapamiento overlaps = False for i, (s, e) in enumerate(intervals): if not (end < s or start > e): # Hay solapamiento overlaps = True break if not overlaps: intervals.append((start, end)) if len(intervals) >= top: break # Ordenar por timestamp intervals.sort() logger.info(f"Rage moments detectados: {len(intervals)}") return intervals, rage_scores def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--transcripcion", required=True) parser.add_argument("--output", default="highlights_rage.json") parser.add_argument("--top", type=int, default=30) parser.add_argument("--min-duration", type=int, default=15) parser.add_argument("--max-duration", type=int, default=45) args = parser.parse_args() intervals, rage_scores = detect_rage_moments( args.transcripcion, args.min_duration, args.max_duration, args.top ) # Guardar with open(args.output, 'w') as f: json.dump(intervals, f) logger.info(f"Guardado en {args.output}") # Imprimir resumen print(f"\n{'='*70}") print(f"RAGE EDITION - MOMENTOS DE FURIA".center(70)) print(f"{'='*70}") print(f"Total: {len(intervals)} clips") print(f"Duración total: {sum(e-s for s,e in intervals)}s ({sum(e-s for s,e in intervals)/60:.1f} min)") print(f"{'-'*70}") for i, (start, end) in enumerate(intervals, 1): duration = end - start h = start // 3600 m = (start % 3600) // 60 sec = start % 60 # Buscar el texto correspondiente for rage in rage_scores: if abs(rage["start"] - start) < 5: text_preview = rage["text"][:50].replace('\n', ' ') print(f"{i:2d}. {h:02d}:{m:02d}:{sec:02d} - {duration}s - {text_preview}...") break else: print(f"{i:2d}. {h:02d}:{m:02d}:{sec:02d} - {duration}s") print(f"{'='*70}") if __name__ == "__main__": main()