- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
261 lines
7.9 KiB
Python
261 lines
7.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Detector híbrido: usa picos del chat + filtra con transcripción (minimax)
|
|
"""
|
|
import json
|
|
import logging
|
|
import os
|
|
import numpy as np
|
|
from openai import OpenAI
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_chat_peaks(chat_data, window_seconds=5, percentile_threshold=85):
|
|
"""
|
|
Detecta picos de actividad en el chat.
|
|
"""
|
|
duration = max(int(c['content_offset_seconds']) for c in chat_data['comments']) + 1
|
|
|
|
# Actividad por segundo
|
|
activity = np.zeros(duration, dtype=np.int32)
|
|
for comment in chat_data['comments']:
|
|
second = int(comment['content_offset_seconds'])
|
|
if second < duration:
|
|
activity[second] += 1
|
|
|
|
# Suavizar con media móvil
|
|
kernel = np.ones(window_seconds) / window_seconds
|
|
activity_smooth = np.convolve(activity, kernel, mode='same')
|
|
|
|
# Threshold basado en percentil
|
|
threshold = np.percentile(activity_smooth[activity_smooth > 0], percentile_threshold)
|
|
|
|
# Encontrar picos
|
|
peak_mask = activity_smooth > threshold
|
|
peak_indices = np.where(peak_mask)[0]
|
|
|
|
logger.info(f"Picos de chat: {len(peak_indices)} segundos con actividad alta")
|
|
logger.info(f"Threshold: {threshold:.1f} mensajes/segundo (percentil {percentile_threshold})")
|
|
|
|
return peak_indices, activity_smooth
|
|
|
|
|
|
def group_peaks_into_intervals(peak_indices, min_duration=15, max_duration=30, gap_seconds=8):
|
|
"""
|
|
Agrupa picos cercanos en intervalos de 15-30 segundos.
|
|
"""
|
|
if len(peak_indices) == 0:
|
|
return []
|
|
|
|
intervals = []
|
|
start = peak_indices[0]
|
|
prev = peak_indices[0]
|
|
|
|
for idx in peak_indices[1:]:
|
|
if idx - prev > gap_seconds:
|
|
duration = prev - start
|
|
# Ajustar duración al rango deseado
|
|
if duration < min_duration:
|
|
duration = min_duration
|
|
elif duration > max_duration:
|
|
duration = max_duration
|
|
|
|
intervals.append((int(start), int(start + duration)))
|
|
start = idx
|
|
prev = idx
|
|
|
|
# Último intervalo
|
|
duration = prev - start
|
|
if duration < min_duration:
|
|
duration = min_duration
|
|
elif duration > max_duration:
|
|
duration = max_duration
|
|
intervals.append((int(start), int(start + duration)))
|
|
|
|
return intervals
|
|
|
|
|
|
def get_transcript_segments(transcripcion_json, start, end):
|
|
"""
|
|
Obtiene el texto de la transcripción en un intervalo.
|
|
"""
|
|
with open(transcripcion_json, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
segments = data.get("segments", [])
|
|
relevant_segments = []
|
|
|
|
for seg in segments:
|
|
seg_start = seg["start"]
|
|
seg_end = seg["end"]
|
|
# Si el segmento se superpone con el intervalo
|
|
if seg_end >= start and seg_start <= end:
|
|
relevant_segments.append(seg["text"].strip())
|
|
|
|
return " ".join(relevant_segments)
|
|
|
|
|
|
def filter_intervals_with_minimax(intervals, transcripcion_json, api_key=None):
|
|
"""
|
|
Usa minimax para filtrar intervalos y detectar si son interesantes.
|
|
"""
|
|
base_url = os.environ.get("OPENAI_BASE_URL", "https://api.minimax.io/v1")
|
|
if not api_key:
|
|
api_key = os.environ.get("OPENAI_API_KEY")
|
|
|
|
client = OpenAI(base_url=base_url, api_key=api_key)
|
|
|
|
# Obtener texto de cada intervalo
|
|
interval_texts = []
|
|
for i, (start, end) in enumerate(intervals):
|
|
text = get_transcript_segments(transcripcion_json, start, end)
|
|
mins = start // 60
|
|
secs = start % 60
|
|
interval_texts.append({
|
|
"index": i,
|
|
"start": start,
|
|
"end": end,
|
|
"timestamp": f"[{mins:02d}:{secs:02d}]",
|
|
"text": text
|
|
})
|
|
|
|
# Crear resumen para la IA
|
|
summary_lines = []
|
|
for it in interval_texts[:50]: # Limitar a 50
|
|
summary_lines.append(f"{it['timestamp']} {it['text'][:100]}")
|
|
|
|
full_summary = "\n".join(summary_lines)
|
|
|
|
prompt = f"""Eres un filtrador de contenido de Twitch.
|
|
|
|
CLIPS A ANALIZAR ({len(interval_texts)} clips):
|
|
{full_summary}
|
|
|
|
TU TAREA: Para cada clip, responde SOLO "SI" o "NO".
|
|
|
|
"SI" = incluir, si el clip tiene:
|
|
- Risas, carcajadas, jajaja
|
|
- Emoción, entusiasmo, celebración
|
|
- Algo gracioso o inesperado
|
|
- Mencion de jugada épica
|
|
|
|
"NO" = excluir, si el clip tiene:
|
|
- Quejas, insultos, rage negativo
|
|
- Conversación aburrida
|
|
- Silencio o texto muy corto
|
|
- Repetición de palabras sin sentido (como "Gigi")
|
|
|
|
IMPORTANTE: Responde en una sola línea con SI/NO separados por coma.
|
|
|
|
Ejemplo: SI,NO,SI,SI,NO,SI,NO
|
|
|
|
Tu respuesta para los {len(interval_texts)} clips:"""
|
|
|
|
try:
|
|
response = client.chat.completions.create(
|
|
model="MiniMax-M2.5",
|
|
messages=[
|
|
{"role": "system", "content": "Eres un experto editor que identifica momentos virales."},
|
|
{"role": "user", "content": prompt}
|
|
],
|
|
temperature=0.1,
|
|
max_tokens=500
|
|
)
|
|
|
|
content = response.choices[0].message.content.strip().upper()
|
|
|
|
# Parsear respuesta: SI,NO,SI,NO,...
|
|
decisions_raw = content.replace(',', ' ').split()
|
|
decisions = []
|
|
for d in decisions_raw:
|
|
if d == "SI" or d == "INCLUDE":
|
|
decisions.append("INCLUDE")
|
|
elif d == "NO" or d == "EXCLUDE":
|
|
decisions.append("EXCLUDE")
|
|
|
|
# Si no hay suficientes decisiones, completar
|
|
if len(decisions) < len(interval_texts):
|
|
decisions.extend(["INCLUDE"] * (len(interval_texts) - len(decisions)))
|
|
|
|
logger.info(f"Decisiones de la IA: {sum(1 for d in decisions if d == 'INCLUDE')} INCLUDE, {sum(1 for d in decisions if d == 'EXCLUDE')} EXCLUDE")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error en API: {e}")
|
|
decisions = ["INCLUDE"] * len(interval_texts)
|
|
|
|
# Filtrar intervalos basado en decisiones
|
|
filtered = []
|
|
for i, decision in enumerate(decisions):
|
|
if i < len(intervals):
|
|
if decision == "INCLUDE":
|
|
filtered.append(intervals[i])
|
|
|
|
logger.info(f"Intervalos después del filtro: {len(filtered)}/{len(intervals)}")
|
|
|
|
return filtered
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--chat", required=True)
|
|
parser.add_argument("--transcripcion", required=True)
|
|
parser.add_argument("--output", default="highlights_hibrido.json")
|
|
parser.add_argument("--top", type=int, default=25)
|
|
parser.add_argument("--min-duration", type=int, default=15)
|
|
parser.add_argument("--max-duration", type=int, default=30)
|
|
args = parser.parse_args()
|
|
|
|
# Cargar datos
|
|
logger.info("Cargando chat...")
|
|
with open(args.chat, 'r') as f:
|
|
chat_data = json.load(f)
|
|
|
|
# Detectar picos de chat
|
|
peak_indices, activity_smooth = get_chat_peaks(chat_data)
|
|
|
|
# Agrupar en intervalos
|
|
intervals = group_peaks_into_intervals(
|
|
peak_indices,
|
|
min_duration=args.min_duration,
|
|
max_duration=args.max_duration
|
|
)
|
|
|
|
logger.info(f"Intervalos de chat: {len(intervals)}")
|
|
|
|
# Filtrar con minimax
|
|
filtered = filter_intervals_with_minimax(
|
|
intervals[:args.top],
|
|
args.transcripcion
|
|
)
|
|
|
|
# Guardar
|
|
with open(args.output, 'w') as f:
|
|
json.dump(filtered, f)
|
|
|
|
logger.info(f"Guardado en {args.output}")
|
|
|
|
# Imprimir resumen
|
|
print(f"\n{'='*70}")
|
|
print(f"HIGHLIGHTS HÍBRIDOS (Chat + IA)".center(70))
|
|
print(f"{'='*70}")
|
|
print(f"Total: {len(filtered)} clips")
|
|
print(f"Duración total: {sum(e-s for s,e in filtered)}s ({sum(e-s for s,e in filtered)/60:.1f} min)")
|
|
print(f"{'-'*70}")
|
|
|
|
for i, (start, end) in enumerate(filtered, 1):
|
|
duration = end - start
|
|
h = start // 3600
|
|
m = (start % 3600) // 60
|
|
sec = start % 60
|
|
text = get_transcript_segments(args.transcripcion, start, end)[:60]
|
|
print(f"{i:2d}. {h:02d}:{m:02d}:{sec:02d} - {duration}s - {text}...")
|
|
|
|
print(f"{'='*70}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|