- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
210 lines
5.9 KiB
Python
Executable File
210 lines
5.9 KiB
Python
Executable File
#!/opt/vlm_env/bin/python3
|
|
"""
|
|
VLM GAMEPLAY DETECTOR - Nivel Senior
|
|
Usa Moondream 2B local en GPU para detectar gameplay real de LoL
|
|
"""
|
|
|
|
import sys
|
|
|
|
sys.path.insert(0, "/opt/vlm_env/lib/python3.13/site-packages")
|
|
|
|
import torch
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
from PIL import Image
|
|
import subprocess
|
|
import json
|
|
from pathlib import Path
|
|
import time
|
|
|
|
print("=" * 70)
|
|
print("🎮 VLM GAMEPLAY DETECTOR - Moondream 2B (Local GPU)")
|
|
print("=" * 70)
|
|
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
|
print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
|
|
print()
|
|
|
|
# Cargar modelo Moondream
|
|
print("📥 Cargando Moondream 2B en GPU...")
|
|
model_id = "vikhyatk/moondream2"
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
model_id, trust_remote_code=True, torch_dtype=torch.float16, device_map={"": "cuda"}
|
|
)
|
|
print("✅ Modelo cargado y listo")
|
|
print()
|
|
|
|
|
|
def analyze_frame(image_path, timestamp):
|
|
"""Analiza un frame con Moondream VLM."""
|
|
try:
|
|
image = Image.open(image_path)
|
|
|
|
# Prompt específico para League of Legends
|
|
prompt = """Look at this image from a gaming stream. Is this showing:
|
|
1. ACTIVE GAMEPLAY - League of Legends match in progress (map visible, champions fighting, abilities being used)
|
|
2. CHAMPION SELECT - Lobby or selection screen
|
|
3. STREAMER TALKING - Just the streamer face/webcam without game visible
|
|
4. MENU/WAITING - Game menus, loading screens, or waiting
|
|
|
|
Answer with ONLY ONE word: GAMEPLAY, SELECT, TALKING, or MENU"""
|
|
|
|
# Encode image
|
|
enc_image = model.encode_image(image)
|
|
|
|
# Query
|
|
answer = model.answer_question(enc_image, prompt, tokenizer)
|
|
result = answer.strip().upper()
|
|
|
|
# Determinar si es gameplay
|
|
is_gameplay = "GAMEPLAY" in result
|
|
|
|
return {
|
|
"timestamp": timestamp,
|
|
"is_gameplay": is_gameplay,
|
|
"classification": result,
|
|
"confidence": "HIGH" if is_gameplay else "LOW",
|
|
}
|
|
except Exception as e:
|
|
print(f" Error en {timestamp}s: {e}")
|
|
return None
|
|
|
|
|
|
# Analizar video
|
|
video_path = (
|
|
"/home/ren/proyectos/editor/twitch-highlight-detector/nuevo_stream_360p.mp4"
|
|
)
|
|
|
|
# Obtener duración
|
|
result = subprocess.run(
|
|
[
|
|
"ffprobe",
|
|
"-v",
|
|
"error",
|
|
"-show_entries",
|
|
"format=duration",
|
|
"-of",
|
|
"default=noprint_wrappers=1:nokey=1",
|
|
video_path,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
duration = float(result.stdout.strip())
|
|
print(f"📹 Video: {duration / 60:.1f} minutos ({duration / 3600:.1f} horas)")
|
|
print("🔍 Analizando cada 30 segundos con VLM...")
|
|
print(" (Esto tomará ~10-15 minutos)")
|
|
print()
|
|
|
|
# Analizar cada 30 segundos
|
|
check_interval = 30
|
|
timestamps = list(range(455, int(duration), check_interval))
|
|
|
|
segments = []
|
|
in_gameplay = False
|
|
start_ts = None
|
|
start_time = time.time()
|
|
|
|
for i, ts in enumerate(timestamps):
|
|
mins = ts // 60
|
|
secs = ts % 60
|
|
|
|
# Extraer frame
|
|
frame_path = f"/tmp/vlm_frame_{ts}.jpg"
|
|
subprocess.run(
|
|
[
|
|
"ffmpeg",
|
|
"-y",
|
|
"-i",
|
|
video_path,
|
|
"-ss",
|
|
str(ts),
|
|
"-vframes",
|
|
"1",
|
|
"-vf",
|
|
"scale=512:288", # Tamaño suficiente para VLM
|
|
"-q:v",
|
|
"3",
|
|
frame_path,
|
|
],
|
|
capture_output=True,
|
|
)
|
|
|
|
if not Path(frame_path).exists():
|
|
continue
|
|
|
|
# Analizar con VLM
|
|
analysis = analyze_frame(frame_path, ts)
|
|
|
|
if analysis:
|
|
icon = "🎮" if analysis["is_gameplay"] else "🗣️"
|
|
print(f"{mins:02d}:{secs:02d} {icon} {analysis['classification']}")
|
|
|
|
# Detectar segmentos
|
|
if analysis["is_gameplay"]:
|
|
if not in_gameplay:
|
|
start_ts = ts
|
|
in_gameplay = True
|
|
print(f" └─ INICIO gameplay")
|
|
else:
|
|
if in_gameplay and start_ts:
|
|
seg_duration = ts - start_ts
|
|
if seg_duration > 60: # Mínimo 1 minuto
|
|
segments.append(
|
|
{"start": start_ts, "end": ts, "duration": seg_duration}
|
|
)
|
|
print(
|
|
f" └─ FIN gameplay ({seg_duration // 60}m {seg_duration % 60}s)"
|
|
)
|
|
in_gameplay = False
|
|
start_ts = None
|
|
|
|
# Limpiar
|
|
Path(frame_path).unlink(missing_ok=True)
|
|
|
|
# Progreso cada 10 frames
|
|
if (i + 1) % 10 == 0:
|
|
elapsed = time.time() - start_time
|
|
remaining = (elapsed / (i + 1)) * (len(timestamps) - i - 1)
|
|
print(
|
|
f"\n Progreso: {i + 1}/{len(timestamps)} frames | "
|
|
f"Tiempo restante: {remaining // 60:.0f}m {remaining % 60:.0f}s\n"
|
|
)
|
|
|
|
# Cerrar último
|
|
if in_gameplay and start_ts:
|
|
segments.append(
|
|
{"start": start_ts, "end": int(duration), "duration": int(duration) - start_ts}
|
|
)
|
|
|
|
# Resultados
|
|
print(f"\n{'=' * 70}")
|
|
print(f"✅ ANÁLISIS VLM COMPLETADO")
|
|
print(f"{'=' * 70}")
|
|
print(f"Segmentos de gameplay: {len(segments)}")
|
|
total_gameplay = sum(s["duration"] for s in segments)
|
|
print(f"Tiempo total gameplay: {total_gameplay // 60}m {total_gameplay % 60}s")
|
|
print(f"Tiempo total hablando/otros: {(int(duration) - 455 - total_gameplay) // 60}m")
|
|
print()
|
|
|
|
for i, seg in enumerate(segments, 1):
|
|
mins_s, secs_s = divmod(seg["start"], 60)
|
|
mins_e, secs_e = divmod(seg["end"], 60)
|
|
hours_s = mins_s // 60
|
|
hours_e = mins_e // 60
|
|
print(
|
|
f"{i}. {hours_s}h{mins_s % 60:02d}m - {hours_e}h{mins_e % 60:02d}m "
|
|
f"({seg['duration'] // 60}m {seg['duration'] % 60}s)"
|
|
)
|
|
|
|
# Guardar
|
|
output_file = (
|
|
"/home/ren/proyectos/editor/twitch-highlight-detector/gameplay_vlm_zones.json"
|
|
)
|
|
with open(output_file, "w") as f:
|
|
json.dump(segments, f, indent=2)
|
|
|
|
print(f"\n💾 Guardado: {output_file}")
|
|
print(f"\nAhora puedes filtrar highlights usando estos rangos exactos.")
|