twitch-highlight-detector/extract_final.py

#!/opt/vlm_env/bin/python3
"""
EXTRACT HIGHLIGHTS FROM CONFIRMED GAMEPLAY
Extrae highlights SOLO de los segmentos de gameplay validados
"""

import json
import re

# Cargar segmentos de gameplay confirmados
with open(
    "/home/ren/proyectos/editor/twitch-highlight-detector/gameplay_scenes.json", "r"
) as f:
    gameplay_segments = json.load(f)

# Cargar transcripción
with open(
    "/home/ren/proyectos/editor/twitch-highlight-detector/transcripcion_rage.json", "r"
) as f:
    trans = json.load(f)

print("=" * 70)
print("🎯 EXTRACTOR DE HIGHLIGHTS - Solo Gameplay Confirmado")
print("=" * 70)
print(f"Analizando {len(gameplay_segments)} segmentos de gameplay...")
print()

# Buscar mejores momentos en cada segmento de gameplay
all_highlights = []

rage_patterns = [
    (r"\bputa\w*", 10, "EXTREME"),
    (r"\bme mataron\b", 12, "DEATH"),
    (r"\bme mori\b", 12, "DEATH"),
    (r"\bmierda\b", 8, "RAGE"),
    (r"\bjoder\b", 8, "RAGE"),
    (r"\bretrasad\w*", 9, "INSULT"),
    (r"\bimbecil\b", 9, "INSULT"),
    (r"\bla cague\b", 8, "FAIL"),
]

for seg in gameplay_segments:
    seg_highlights = []

    for t in trans["segments"]:
        if seg["start"] <= t["start"] <= seg["end"]:
            text = t["text"].lower()
            score = 0
            reasons = []

            for pattern, points, reason in rage_patterns:
                if re.search(pattern, text, re.IGNORECASE):
                    score += points
                    if reason not in reasons:
                        reasons.append(reason)

            if score >= 6:
                seg_highlights.append(
                    {
                        "time": t["start"],
                        "score": score,
                        "text": t["text"][:60],
                        "reasons": reasons,
                        "segment_start": seg["start"],
                        "segment_end": seg["end"],
                    }
                )

    # Ordenar y tomar top 2 de cada segmento
    seg_highlights.sort(key=lambda x: -x["score"])
    all_highlights.extend(seg_highlights[:2])

print(f"Momentos destacados encontrados: {len(all_highlights)}")

# Ordenar todos por score
all_highlights.sort(key=lambda x: -x["score"])

# Mostrar top 15
print("\nTop momentos:")
for i, h in enumerate(all_highlights[:15], 1):
    mins = int(h["time"]) // 60
    secs = int(h["time"]) % 60
    print(
        f"{i:2d}. {mins:02d}:{secs:02d} [Score: {h['score']:2d}] {'/'.join(h['reasons'])}"
    )
    print(f"     {h['text'][:50]}...")

# Crear clips (tomar top 12)
clips = []
for h in all_highlights[:12]:
    start = max(455, int(h["time"]) - 10)
    end = min(8237, int(h["time"]) + 20)
    clips.append([start, end])

# Eliminar solapamientos
clips.sort(key=lambda x: x[0])
filtered = []
for clip in clips:
    if not filtered:
        filtered.append(clip)
    else:
        last = filtered[-1]
        if clip[0] <= last[1] + 5:
            last[1] = max(last[1], clip[1])
        else:
            filtered.append(clip)

print(f"\n{'=' * 70}")
print(f"CLIPS FINALES: {len(filtered)}")
total = sum(e - s for s, e in filtered)
print(f"Duración total: {total // 60}m {total % 60}s")
print(f"{'=' * 70}")

for i, (s, e) in enumerate(filtered, 1):
    mins, secs = divmod(s, 60)
    print(f"{i:2d}. {mins:02d}:{secs:02d} - {e - s}s")

# Guardar
with open(
    "/home/ren/proyectos/editor/twitch-highlight-detector/final_highlights.json", "w"
) as f:
    json.dump(filtered, f)

print("\n💾 Guardado: final_highlights.json")
print("\nEste archivo contiene SOLO highlights de gameplay confirmado.")
print("No incluye selección de campeones ni hablando entre juegos.")