- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
133 lines
3.2 KiB
Python
133 lines
3.2 KiB
Python
#!/opt/vlm_env/bin/python3
|
|
import torch
|
|
import cv2
|
|
import numpy as np
|
|
import subprocess
|
|
import json
|
|
from pathlib import Path
|
|
|
|
print("=" * 70)
|
|
print("GPU GAMEPLAY DETECTOR - RTX 3050")
|
|
print("=" * 70)
|
|
print(f"GPU: {torch.cuda.get_device_name(0)}")
|
|
print()
|
|
|
|
video_path = (
|
|
"/home/ren/proyectos/editor/twitch-highlight-detector/nuevo_stream_360p.mp4"
|
|
)
|
|
|
|
result = subprocess.run(
|
|
[
|
|
"ffprobe",
|
|
"-v",
|
|
"error",
|
|
"-show_entries",
|
|
"format=duration",
|
|
"-of",
|
|
"default=noprint_wrappers=1:nokey=1",
|
|
video_path,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
duration = float(result.stdout.strip())
|
|
|
|
print(f"Video: {duration / 60:.1f} min")
|
|
print("Analizando frames en GPU...")
|
|
print()
|
|
|
|
timestamps = list(range(455, int(duration), 30))
|
|
segments = []
|
|
in_gameplay = False
|
|
start_ts = None
|
|
|
|
for i, ts in enumerate(timestamps):
|
|
mins = ts // 60
|
|
secs = ts % 60
|
|
|
|
frame_path = f"/tmp/frame_{ts}.jpg"
|
|
subprocess.run(
|
|
[
|
|
"ffmpeg",
|
|
"-y",
|
|
"-i",
|
|
video_path,
|
|
"-ss",
|
|
str(ts),
|
|
"-vframes",
|
|
"1",
|
|
"-vf",
|
|
"scale=320:180",
|
|
frame_path,
|
|
],
|
|
capture_output=True,
|
|
)
|
|
|
|
if not Path(frame_path).exists():
|
|
continue
|
|
|
|
frame = cv2.imread(frame_path)
|
|
if frame is None:
|
|
continue
|
|
|
|
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
tensor = torch.from_numpy(frame_rgb).float().cuda()
|
|
|
|
variance = tensor.std().item()
|
|
green_ratio = (tensor[:, :, 1] > tensor[:, :, 0]).float().mean().item()
|
|
green_mean = tensor[:, :, 1].mean().item()
|
|
|
|
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
|
edges = cv2.Canny(gray, 50, 150)
|
|
edge_density = (edges > 0).sum() / (edges.shape[0] * edges.shape[1])
|
|
|
|
gameplay_score = 0
|
|
if variance > 30:
|
|
gameplay_score += 0.3
|
|
if variance > 40:
|
|
gameplay_score += 0.2
|
|
if green_ratio > 0.4:
|
|
gameplay_score += 0.2
|
|
if green_mean > 90:
|
|
gameplay_score += 0.1
|
|
if edge_density > 0.05:
|
|
gameplay_score += 0.2
|
|
|
|
is_gameplay = gameplay_score >= 0.5
|
|
|
|
icon = "🎮" if is_gameplay else "🗣️"
|
|
print(f"{mins:02d}:{secs:02d} {icon} score={gameplay_score:.2f}")
|
|
|
|
if is_gameplay:
|
|
if not in_gameplay:
|
|
start_ts = ts
|
|
in_gameplay = True
|
|
else:
|
|
if in_gameplay and start_ts and (ts - start_ts) > 60:
|
|
segments.append({"start": start_ts, "end": ts, "duration": ts - start_ts})
|
|
print(f" Gameplay: {start_ts // 60}m-{ts // 60}m")
|
|
in_gameplay = False
|
|
start_ts = None
|
|
|
|
Path(frame_path).unlink(missing_ok=True)
|
|
del tensor
|
|
if i % 10 == 0:
|
|
torch.cuda.empty_cache()
|
|
|
|
if in_gameplay and start_ts:
|
|
segments.append(
|
|
{"start": start_ts, "end": int(duration), "duration": int(duration) - start_ts}
|
|
)
|
|
|
|
print(f"\nGameplays: {len(segments)}")
|
|
for s in segments:
|
|
print(f" {s['start'] // 60}m-{s['end'] // 60}m ({s['duration'] // 60}m)")
|
|
|
|
with open(
|
|
"/home/ren/proyectos/editor/twitch-highlight-detector/gameplay_zones_final.json",
|
|
"w",
|
|
) as f:
|
|
json.dump(segments, f)
|
|
|
|
print("\nGuardado: gameplay_zones_final.json")
|