twitch-highlight-detector/vlm_analyzer.py

#!/usr/bin/env python3
"""
VLM GAMEPLAY DETECTOR usando Moondream
Analiza frames con Moondream para detectar gameplay real de LoL
Compatible con RTX 3050 (4GB VRAM)
"""

import moondream as md
from PIL import Image
import subprocess
import json
import torch
from pathlib import Path

print("🎮 VLM GAMEPLAY DETECTOR (Moondream)")
print(f"GPU: {torch.cuda.get_device_name(0)}")
print()

# Cargar modelo Moondream
print("📥 Cargando Moondream en GPU...")
model = md.vl(
    model="https://huggingface.co/vikhyatk/moondream2/resolve/main/moondream-2b-int8.mf"
)
print("✅ Modelo listo")
print()


def analyze_frame_vlm(image_path, timestamp):
    """Analiza un frame con Moondream VLM."""
    try:
        image = Image.open(image_path)

        # Pregunta específica para detectar gameplay
        question = "Is this a League of Legends gameplay screenshot showing the game map, champions, or action? Answer only YES or NO."

        answer = model.query(image, question)["answer"].strip().upper()

        is_gameplay = "YES" in answer

        return {"timestamp": timestamp, "is_gameplay": is_gameplay, "answer": answer}
    except Exception as e:
        print(f"Error: {e}")
        return None


# Obtener duración del video
result = subprocess.run(
    [
        "ffprobe",
        "-v",
        "error",
        "-show_entries",
        "format=duration",
        "-of",
        "default=noprint_wrappers=1:nokey=1",
        "nuevo_stream_360p.mp4",
    ],
    capture_output=True,
    text=True,
)

duration = float(result.stdout.strip())
print(f"📹 Video: {duration / 60:.1f} minutos")
print("🔍 Analizando cada 30 segundos con VLM...")
print()

# Analizar frames cada 30 segundos
timestamps = list(range(455, int(duration), 30))
segments = []
in_gameplay = False
start_ts = None

for i, ts in enumerate(timestamps):
    mins = ts // 60
    secs = ts % 60

    # Extraer frame
    frame_path = f"/tmp/vlm_frame_{ts}.jpg"
    subprocess.run(
        [
            "ffmpeg",
            "-y",
            "-i",
            "nuevo_stream_360p.mp4",
            "-ss",
            str(ts),
            "-vframes",
            "1",
            "-vf",
            "scale=640:360",  # Resolución suficiente para VLM
            "-q:v",
            "2",
            frame_path,
        ],
        capture_output=True,
    )

    if not Path(frame_path).exists():
        continue

    # Analizar con VLM
    analysis = analyze_frame_vlm(frame_path, ts)

    if analysis:
        icon = "🎮" if analysis["is_gameplay"] else "🗣️"
        print(f"{mins:02d}:{secs:02d} {icon} {analysis['answer']}")

        # Detectar cambios
        if analysis["is_gameplay"]:
            if not in_gameplay:
                start_ts = ts
                in_gameplay = True
        else:
            if in_gameplay and start_ts and (ts - start_ts) > 60:
                segments.append(
                    {"start": start_ts, "end": ts, "duration": ts - start_ts}
                )
                print(
                    f"   └─ Gameplay: {start_ts // 60}m-{ts // 60}m ({(ts - start_ts) // 60}min)"
                )
            in_gameplay = False
            start_ts = None

    # Limpiar frame
    Path(frame_path).unlink(missing_ok=True)

    # Progreso
    if (i + 1) % 10 == 0:
        print(f"   ({i + 1}/{len(timestamps)} frames procesados)")

# Cerrar último segmento
if in_gameplay and start_ts:
    segments.append(
        {"start": start_ts, "end": int(duration), "duration": int(duration) - start_ts}
    )

print(f"\n{'=' * 60}")
print(f"✅ ANÁLISIS COMPLETADO")
print(f"{'=' * 60}")
print(f"Segmentos de gameplay: {len(segments)}")
total = sum(s["duration"] for s in segments)
print(f"Tiempo total: {total // 60}m {total % 60}s")
print()

for i, seg in enumerate(segments, 1):
    mins_s, secs_s = divmod(seg["start"], 60)
    mins_e, secs_e = divmod(seg["end"], 60)
    print(
        f"{i}. {mins_s:02d}:{secs_s:02d} - {mins_e:02d}:{secs_e:02d} "
        f"({seg['duration'] // 60}m {seg['duration'] % 60}s)"
    )

# Guardar
with open("gameplay_vlm.json", "w") as f:
    json.dump(segments, f, indent=2)

print(f"\n💾 Guardado: gameplay_vlm.json")
print("\nUsa este archivo para filtrar highlights:")
print(
    "python3 filter_by_vlm.py --vlm gameplay_vlm.json --highlights highlights_many.json"
)