Sistema completo de detección de highlights con VLM y análisis de gameplay

- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
2026-02-19 17:38:14 +00:00
parent c1c66a7d9a
commit 00180d0b1c
45 changed files with 10636 additions and 260 deletions
--- a/vlm_analyzer.py
+++ b/vlm_analyzer.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+"""
+VLM GAMEPLAY DETECTOR usando Moondream
+Analiza frames con Moondream para detectar gameplay real de LoL
+Compatible con RTX 3050 (4GB VRAM)
+"""
+
+import moondream as md
+from PIL import Image
+import subprocess
+import json
+import torch
+from pathlib import Path
+
+print("🎮 VLM GAMEPLAY DETECTOR (Moondream)")
+print(f"GPU: {torch.cuda.get_device_name(0)}")
+print()
+
+# Cargar modelo Moondream
+print("📥 Cargando Moondream en GPU...")
+model = md.vl(
+    model="https://huggingface.co/vikhyatk/moondream2/resolve/main/moondream-2b-int8.mf"
+)
+print("✅ Modelo listo")
+print()
+
+
+def analyze_frame_vlm(image_path, timestamp):
+    """Analiza un frame con Moondream VLM."""
+    try:
+        image = Image.open(image_path)
+
+        # Pregunta específica para detectar gameplay
+        question = "Is this a League of Legends gameplay screenshot showing the game map, champions, or action? Answer only YES or NO."
+
+        answer = model.query(image, question)["answer"].strip().upper()
+
+        is_gameplay = "YES" in answer
+
+        return {"timestamp": timestamp, "is_gameplay": is_gameplay, "answer": answer}
+    except Exception as e:
+        print(f"Error: {e}")
+        return None
+
+
+# Obtener duración del video
+result = subprocess.run(
+    [
+        "ffprobe",
+        "-v",
+        "error",
+        "-show_entries",
+        "format=duration",
+        "-of",
+        "default=noprint_wrappers=1:nokey=1",
+        "nuevo_stream_360p.mp4",
+    ],
+    capture_output=True,
+    text=True,
+)
+
+duration = float(result.stdout.strip())
+print(f"📹 Video: {duration / 60:.1f} minutos")
+print("🔍 Analizando cada 30 segundos con VLM...")
+print()
+
+# Analizar frames cada 30 segundos
+timestamps = list(range(455, int(duration), 30))
+segments = []
+in_gameplay = False
+start_ts = None
+
+for i, ts in enumerate(timestamps):
+    mins = ts // 60
+    secs = ts % 60
+
+    # Extraer frame
+    frame_path = f"/tmp/vlm_frame_{ts}.jpg"
+    subprocess.run(
+        [
+            "ffmpeg",
+            "-y",
+            "-i",
+            "nuevo_stream_360p.mp4",
+            "-ss",
+            str(ts),
+            "-vframes",
+            "1",
+            "-vf",
+            "scale=640:360",  # Resolución suficiente para VLM
+            "-q:v",
+            "2",
+            frame_path,
+        ],
+        capture_output=True,
+    )
+
+    if not Path(frame_path).exists():
+        continue
+
+    # Analizar con VLM
+    analysis = analyze_frame_vlm(frame_path, ts)
+
+    if analysis:
+        icon = "🎮" if analysis["is_gameplay"] else "🗣️"
+        print(f"{mins:02d}:{secs:02d} {icon} {analysis['answer']}")
+
+        # Detectar cambios
+        if analysis["is_gameplay"]:
+            if not in_gameplay:
+                start_ts = ts
+                in_gameplay = True
+        else:
+            if in_gameplay and start_ts and (ts - start_ts) > 60:
+                segments.append(
+                    {"start": start_ts, "end": ts, "duration": ts - start_ts}
+                )
+                print(
+                    f"   └─ Gameplay: {start_ts // 60}m-{ts // 60}m ({(ts - start_ts) // 60}min)"
+                )
+            in_gameplay = False
+            start_ts = None
+
+    # Limpiar frame
+    Path(frame_path).unlink(missing_ok=True)
+
+    # Progreso
+    if (i + 1) % 10 == 0:
+        print(f"   ({i + 1}/{len(timestamps)} frames procesados)")
+
+# Cerrar último segmento
+if in_gameplay and start_ts:
+    segments.append(
+        {"start": start_ts, "end": int(duration), "duration": int(duration) - start_ts}
+    )
+
+print(f"\n{'=' * 60}")
+print(f"✅ ANÁLISIS COMPLETADO")
+print(f"{'=' * 60}")
+print(f"Segmentos de gameplay: {len(segments)}")
+total = sum(s["duration"] for s in segments)
+print(f"Tiempo total: {total // 60}m {total % 60}s")
+print()
+
+for i, seg in enumerate(segments, 1):
+    mins_s, secs_s = divmod(seg["start"], 60)
+    mins_e, secs_e = divmod(seg["end"], 60)
+    print(
+        f"{i}. {mins_s:02d}:{secs_s:02d} - {mins_e:02d}:{secs_e:02d} "
+        f"({seg['duration'] // 60}m {seg['duration'] % 60}s)"
+    )
+
+# Guardar
+with open("gameplay_vlm.json", "w") as f:
+    json.dump(segments, f, indent=2)
+
+print(f"\n💾 Guardado: gameplay_vlm.json")
+print("\nUsa este archivo para filtrar highlights:")
+print(
+    "python3 filter_by_vlm.py --vlm gameplay_vlm.json --highlights highlights_many.json"
+)