#!/opt/vlm_env/bin/python3 """ VLM GAMEPLAY DETECTOR - Nivel Senior Usa Moondream 2B local en GPU para detectar gameplay real de LoL """ import sys sys.path.insert(0, "/opt/vlm_env/lib/python3.13/site-packages") import torch from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image import subprocess import json from pathlib import Path import time print("=" * 70) print("🎮 VLM GAMEPLAY DETECTOR - Moondream 2B (Local GPU)") print("=" * 70) print(f"GPU: {torch.cuda.get_device_name(0)}") print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB") print() # Cargar modelo Moondream print("📥 Cargando Moondream 2B en GPU...") model_id = "vikhyatk/moondream2" tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, torch_dtype=torch.float16, device_map={"": "cuda"} ) print("✅ Modelo cargado y listo") print() def analyze_frame(image_path, timestamp): """Analiza un frame con Moondream VLM.""" try: image = Image.open(image_path) # Prompt específico para League of Legends prompt = """Look at this image from a gaming stream. Is this showing: 1. ACTIVE GAMEPLAY - League of Legends match in progress (map visible, champions fighting, abilities being used) 2. CHAMPION SELECT - Lobby or selection screen 3. STREAMER TALKING - Just the streamer face/webcam without game visible 4. MENU/WAITING - Game menus, loading screens, or waiting Answer with ONLY ONE word: GAMEPLAY, SELECT, TALKING, or MENU""" # Encode image enc_image = model.encode_image(image) # Query answer = model.answer_question(enc_image, prompt, tokenizer) result = answer.strip().upper() # Determinar si es gameplay is_gameplay = "GAMEPLAY" in result return { "timestamp": timestamp, "is_gameplay": is_gameplay, "classification": result, "confidence": "HIGH" if is_gameplay else "LOW", } except Exception as e: print(f" Error en {timestamp}s: {e}") return None # Analizar video video_path = ( "/home/ren/proyectos/editor/twitch-highlight-detector/nuevo_stream_360p.mp4" ) # Obtener duración result = subprocess.run( [ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", video_path, ], capture_output=True, text=True, ) duration = float(result.stdout.strip()) print(f"📹 Video: {duration / 60:.1f} minutos ({duration / 3600:.1f} horas)") print("🔍 Analizando cada 30 segundos con VLM...") print(" (Esto tomará ~10-15 minutos)") print() # Analizar cada 30 segundos check_interval = 30 timestamps = list(range(455, int(duration), check_interval)) segments = [] in_gameplay = False start_ts = None start_time = time.time() for i, ts in enumerate(timestamps): mins = ts // 60 secs = ts % 60 # Extraer frame frame_path = f"/tmp/vlm_frame_{ts}.jpg" subprocess.run( [ "ffmpeg", "-y", "-i", video_path, "-ss", str(ts), "-vframes", "1", "-vf", "scale=512:288", # Tamaño suficiente para VLM "-q:v", "3", frame_path, ], capture_output=True, ) if not Path(frame_path).exists(): continue # Analizar con VLM analysis = analyze_frame(frame_path, ts) if analysis: icon = "🎮" if analysis["is_gameplay"] else "🗣️" print(f"{mins:02d}:{secs:02d} {icon} {analysis['classification']}") # Detectar segmentos if analysis["is_gameplay"]: if not in_gameplay: start_ts = ts in_gameplay = True print(f" └─ INICIO gameplay") else: if in_gameplay and start_ts: seg_duration = ts - start_ts if seg_duration > 60: # Mínimo 1 minuto segments.append( {"start": start_ts, "end": ts, "duration": seg_duration} ) print( f" └─ FIN gameplay ({seg_duration // 60}m {seg_duration % 60}s)" ) in_gameplay = False start_ts = None # Limpiar Path(frame_path).unlink(missing_ok=True) # Progreso cada 10 frames if (i + 1) % 10 == 0: elapsed = time.time() - start_time remaining = (elapsed / (i + 1)) * (len(timestamps) - i - 1) print( f"\n Progreso: {i + 1}/{len(timestamps)} frames | " f"Tiempo restante: {remaining // 60:.0f}m {remaining % 60:.0f}s\n" ) # Cerrar último if in_gameplay and start_ts: segments.append( {"start": start_ts, "end": int(duration), "duration": int(duration) - start_ts} ) # Resultados print(f"\n{'=' * 70}") print(f"✅ ANÁLISIS VLM COMPLETADO") print(f"{'=' * 70}") print(f"Segmentos de gameplay: {len(segments)}") total_gameplay = sum(s["duration"] for s in segments) print(f"Tiempo total gameplay: {total_gameplay // 60}m {total_gameplay % 60}s") print(f"Tiempo total hablando/otros: {(int(duration) - 455 - total_gameplay) // 60}m") print() for i, seg in enumerate(segments, 1): mins_s, secs_s = divmod(seg["start"], 60) mins_e, secs_e = divmod(seg["end"], 60) hours_s = mins_s // 60 hours_e = mins_e // 60 print( f"{i}. {hours_s}h{mins_s % 60:02d}m - {hours_e}h{mins_e % 60:02d}m " f"({seg['duration'] // 60}m {seg['duration'] % 60}s)" ) # Guardar output_file = ( "/home/ren/proyectos/editor/twitch-highlight-detector/gameplay_vlm_zones.json" ) with open(output_file, "w") as f: json.dump(segments, f, indent=2) print(f"\n💾 Guardado: {output_file}") print(f"\nAhora puedes filtrar highlights usando estos rangos exactos.")