Sistema completo de detección de highlights con VLM y análisis de gameplay

- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
2026-02-19 17:38:14 +00:00
parent c1c66a7d9a
commit 00180d0b1c
45 changed files with 10636 additions and 260 deletions
--- a/run_vlm_analysis.py
+++ b/run_vlm_analysis.py
@@ -0,0 +1,209 @@
+#!/opt/vlm_env/bin/python3
+"""
+VLM GAMEPLAY DETECTOR - Nivel Senior
+Usa Moondream 2B local en GPU para detectar gameplay real de LoL
+"""
+
+import sys
+
+sys.path.insert(0, "/opt/vlm_env/lib/python3.13/site-packages")
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from PIL import Image
+import subprocess
+import json
+from pathlib import Path
+import time
+
+print("=" * 70)
+print("🎮 VLM GAMEPLAY DETECTOR - Moondream 2B (Local GPU)")
+print("=" * 70)
+print(f"GPU: {torch.cuda.get_device_name(0)}")
+print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
+print()
+
+# Cargar modelo Moondream
+print("📥 Cargando Moondream 2B en GPU...")
+model_id = "vikhyatk/moondream2"
+
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id, trust_remote_code=True, torch_dtype=torch.float16, device_map={"": "cuda"}
+)
+print("✅ Modelo cargado y listo")
+print()
+
+
+def analyze_frame(image_path, timestamp):
+    """Analiza un frame con Moondream VLM."""
+    try:
+        image = Image.open(image_path)
+
+        # Prompt específico para League of Legends
+        prompt = """Look at this image from a gaming stream. Is this showing:
+1. ACTIVE GAMEPLAY - League of Legends match in progress (map visible, champions fighting, abilities being used)
+2. CHAMPION SELECT - Lobby or selection screen
+3. STREAMER TALKING - Just the streamer face/webcam without game visible
+4. MENU/WAITING - Game menus, loading screens, or waiting
+
+Answer with ONLY ONE word: GAMEPLAY, SELECT, TALKING, or MENU"""
+
+        # Encode image
+        enc_image = model.encode_image(image)
+
+        # Query
+        answer = model.answer_question(enc_image, prompt, tokenizer)
+        result = answer.strip().upper()
+
+        # Determinar si es gameplay
+        is_gameplay = "GAMEPLAY" in result
+
+        return {
+            "timestamp": timestamp,
+            "is_gameplay": is_gameplay,
+            "classification": result,
+            "confidence": "HIGH" if is_gameplay else "LOW",
+        }
+    except Exception as e:
+        print(f"  Error en {timestamp}s: {e}")
+        return None
+
+
+# Analizar video
+video_path = (
+    "/home/ren/proyectos/editor/twitch-highlight-detector/nuevo_stream_360p.mp4"
+)
+
+# Obtener duración
+result = subprocess.run(
+    [
+        "ffprobe",
+        "-v",
+        "error",
+        "-show_entries",
+        "format=duration",
+        "-of",
+        "default=noprint_wrappers=1:nokey=1",
+        video_path,
+    ],
+    capture_output=True,
+    text=True,
+)
+
+duration = float(result.stdout.strip())
+print(f"📹 Video: {duration / 60:.1f} minutos ({duration / 3600:.1f} horas)")
+print("🔍 Analizando cada 30 segundos con VLM...")
+print("   (Esto tomará ~10-15 minutos)")
+print()
+
+# Analizar cada 30 segundos
+check_interval = 30
+timestamps = list(range(455, int(duration), check_interval))
+
+segments = []
+in_gameplay = False
+start_ts = None
+start_time = time.time()
+
+for i, ts in enumerate(timestamps):
+    mins = ts // 60
+    secs = ts % 60
+
+    # Extraer frame
+    frame_path = f"/tmp/vlm_frame_{ts}.jpg"
+    subprocess.run(
+        [
+            "ffmpeg",
+            "-y",
+            "-i",
+            video_path,
+            "-ss",
+            str(ts),
+            "-vframes",
+            "1",
+            "-vf",
+            "scale=512:288",  # Tamaño suficiente para VLM
+            "-q:v",
+            "3",
+            frame_path,
+        ],
+        capture_output=True,
+    )
+
+    if not Path(frame_path).exists():
+        continue
+
+    # Analizar con VLM
+    analysis = analyze_frame(frame_path, ts)
+
+    if analysis:
+        icon = "🎮" if analysis["is_gameplay"] else "🗣️"
+        print(f"{mins:02d}:{secs:02d} {icon} {analysis['classification']}")
+
+        # Detectar segmentos
+        if analysis["is_gameplay"]:
+            if not in_gameplay:
+                start_ts = ts
+                in_gameplay = True
+                print(f"   └─ INICIO gameplay")
+        else:
+            if in_gameplay and start_ts:
+                seg_duration = ts - start_ts
+                if seg_duration > 60:  # Mínimo 1 minuto
+                    segments.append(
+                        {"start": start_ts, "end": ts, "duration": seg_duration}
+                    )
+                    print(
+                        f"   └─ FIN gameplay ({seg_duration // 60}m {seg_duration % 60}s)"
+                    )
+                in_gameplay = False
+                start_ts = None
+
+    # Limpiar
+    Path(frame_path).unlink(missing_ok=True)
+
+    # Progreso cada 10 frames
+    if (i + 1) % 10 == 0:
+        elapsed = time.time() - start_time
+        remaining = (elapsed / (i + 1)) * (len(timestamps) - i - 1)
+        print(
+            f"\n   Progreso: {i + 1}/{len(timestamps)} frames | "
+            f"Tiempo restante: {remaining // 60:.0f}m {remaining % 60:.0f}s\n"
+        )
+
+# Cerrar último
+if in_gameplay and start_ts:
+    segments.append(
+        {"start": start_ts, "end": int(duration), "duration": int(duration) - start_ts}
+    )
+
+# Resultados
+print(f"\n{'=' * 70}")
+print(f"✅ ANÁLISIS VLM COMPLETADO")
+print(f"{'=' * 70}")
+print(f"Segmentos de gameplay: {len(segments)}")
+total_gameplay = sum(s["duration"] for s in segments)
+print(f"Tiempo total gameplay: {total_gameplay // 60}m {total_gameplay % 60}s")
+print(f"Tiempo total hablando/otros: {(int(duration) - 455 - total_gameplay) // 60}m")
+print()
+
+for i, seg in enumerate(segments, 1):
+    mins_s, secs_s = divmod(seg["start"], 60)
+    mins_e, secs_e = divmod(seg["end"], 60)
+    hours_s = mins_s // 60
+    hours_e = mins_e // 60
+    print(
+        f"{i}. {hours_s}h{mins_s % 60:02d}m - {hours_e}h{mins_e % 60:02d}m "
+        f"({seg['duration'] // 60}m {seg['duration'] % 60}s)"
+    )
+
+# Guardar
+output_file = (
+    "/home/ren/proyectos/editor/twitch-highlight-detector/gameplay_vlm_zones.json"
+)
+with open(output_file, "w") as f:
+    json.dump(segments, f, indent=2)
+
+print(f"\n💾 Guardado: {output_file}")
+print(f"\nAhora puedes filtrar highlights usando estos rangos exactos.")