Sistema completo de detección de highlights con VLM y análisis de gameplay

- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
2026-02-19 17:38:14 +00:00
parent c1c66a7d9a
commit 00180d0b1c
45 changed files with 10636 additions and 260 deletions
--- a/gpu_detector.py
+++ b/gpu_detector.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+"""
+GPU GAMEPLAY DETECTOR
+Usa PyTorch + OpenCV en GPU para detectar gameplay en tiempo real
+"""
+
+import torch
+import cv2
+import numpy as np
+import json
+import subprocess
+from pathlib import Path
+
+print(f"🎮 GPU Gameplay Detector")
+print(f"Dispositivo: {torch.cuda.get_device_name(0)}")
+print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
+
+
+def extract_frame_batch_gpu(video_path, timestamps):
+    """Extrae múltiples frames usando GPU."""
+    frames = []
+
+    for ts in timestamps:
+        # Extraer frame con ffmpeg
+        result = subprocess.run(
+            [
+                "ffmpeg",
+                "-hwaccel",
+                "cuda",
+                "-i",
+                video_path,
+                "-ss",
+                str(ts),
+                "-vframes",
+                "1",
+                "-f",
+                "image2pipe",
+                "-vcodec",
+                "png",
+                "pipe:1",
+            ],
+            capture_output=True,
+        )
+
+        if result.returncode == 0:
+            # Decodificar a numpy array
+            nparr = np.frombuffer(result.stdout, np.uint8)
+            frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+            if frame is not None:
+                frames.append((ts, frame))
+
+    return frames
+
+
+def analyze_gameplay_gpu(frames):
+    """
+    Analiza frames en GPU para detectar gameplay.
+
+    Detecta:
+    - Movimiento (optical flow)
+    - Bordes (Canny) - UI de LoL tiene bordes característicos
+    - Colores - Paleta característica de LoL
+    """
+    if not frames:
+        return []
+
+    results = []
+
+    for ts, frame in frames:
+        # Redimensionar para análisis rápido (GPU)
+        frame_resized = cv2.resize(frame, (320, 180))
+
+        # Convertir a tensor y mover a GPU
+        frame_tensor = torch.from_numpy(frame_resized).float().cuda()
+
+        # Análisis 1: Detectar movimiento (variación entre frames no aplicable aquí)
+        # Análisis 2: Detectar colores característicos de LoL
+        # LoL tiene muchos verdes (mapa), azules (UI), y colores vivos (campeones)
+
+        mean_color = frame_tensor.mean(dim=(0, 1))
+        std_color = frame_tensor.std(dim=(0, 1))
+
+        # Heurísticas de gameplay de LoL:
+        # - Alta variación de color (std > umbral)
+        # - Presencia de verde (mapa)
+        # - No es gris/negro (menu)
+
+        is_colorful = std_color.mean() > 40  # Hay variación de color
+        has_green = mean_color[1] > 80  # Canal verde presente (mapa)
+        not_dark = frame_tensor.mean() > 30  # No es pantalla negra/menu
+
+        # Score de gameplay (0-1)
+        gameplay_score = 0.0
+        if is_colorful:
+            gameplay_score += 0.4
+        if has_green:
+            gameplay_score += 0.4
+        if not_dark:
+            gameplay_score += 0.2
+
+        is_gameplay = gameplay_score > 0.6
+
+        results.append(
+            {
+                "timestamp": ts,
+                "is_gameplay": is_gameplay,
+                "score": gameplay_score,
+                "color_std": float(std_color.mean()),
+                "green_mean": float(mean_color[1]),
+            }
+        )
+
+        # Liberar memoria GPU
+        del frame_tensor
+
+    torch.cuda.empty_cache()
+    return results
+
+
+def scan_video_gpu(video_path, interval=30):
+    """Escanea video completo usando GPU."""
+
+    # Obtener duración
+    result = subprocess.run(
+        [
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            video_path,
+        ],
+        capture_output=True,
+        text=True,
+    )
+
+    duration = float(result.stdout.strip())
+    print(f"\n📹 Video: {duration / 60:.1f} minutos")
+    print(f"🔍 Analizando cada {interval}s con GPU...")
+    print()
+
+    # Generar timestamps
+    timestamps = list(range(455, int(duration), interval))
+
+    # Procesar en batches para no saturar VRAM
+    batch_size = 10
+    all_results = []
+
+    for i in range(0, len(timestamps), batch_size):
+        batch_ts = timestamps[i : i + batch_size]
+        print(
+            f"Procesando batch {i // batch_size + 1}/{(len(timestamps) - 1) // batch_size + 1}..."
+        )
+
+        # Extraer frames
+        frames = extract_frame_batch_gpu(video_path, batch_ts)
+
+        # Analizar en GPU
+        results = analyze_gameplay_gpu(frames)
+        all_results.extend(results)
+
+        # Mostrar progreso
+        for r in results:
+            status = "🎮" if r["is_gameplay"] else "🗣️"
+            mins = r["timestamp"] // 60
+            secs = r["timestamp"] % 60
+            print(f"  {mins:02d}:{secs:02d} {status} Score: {r['score']:.2f}")
+
+    # Convertir a segmentos
+    segments = []
+    current_start = None
+
+    for r in all_results:
+        if r["is_gameplay"]:
+            if current_start is None:
+                current_start = r["timestamp"]
+        else:
+            if current_start is not None:
+                segments.append(
+                    {
+                        "start": current_start,
+                        "end": r["timestamp"],
+                        "duration": r["timestamp"] - current_start,
+                    }
+                )
+                current_start = None
+
+    # Cerrar último
+    if current_start is not None:
+        segments.append(
+            {
+                "start": current_start,
+                "end": int(duration),
+                "duration": int(duration) - current_start,
+            }
+        )
+
+    return segments
+
+
+def main():
+    video_path = "nuevo_stream_360p.mp4"
+
+    print("=" * 60)
+    print("GPU GAMEPLAY DETECTOR")
+    print("=" * 60)
+
+    # Escanear
+    segments = scan_video_gpu(video_path, interval=30)
+
+    # Guardar
+    with open("gameplay_segments_gpu.json", "w") as f:
+        json.dump(segments, f, indent=2)
+
+    print(f"\n{'=' * 60}")
+    print(f"RESULTADO")
+    print(f"{'=' * 60}")
+    print(f"Segmentos de gameplay: {len(segments)}")
+    total = sum(s["duration"] for s in segments)
+    print(f"Tiempo total gameplay: {total // 60}m {total % 60}s")
+
+    for i, seg in enumerate(segments, 1):
+        mins_s, secs_s = divmod(seg["start"], 60)
+        mins_e, secs_e = divmod(seg["end"], 60)
+        print(
+            f"{i}. {mins_s:02d}:{secs_s:02d} - {mins_e:02d}:{secs_e:02d} "
+            f"({seg['duration'] // 60}m {seg['duration'] % 60}s)"
+        )
+
+    print(f"\n💾 Guardado en: gameplay_segments_gpu.json")
+
+
+if __name__ == "__main__":
+    main()