- Implementación de detector híbrido (Whisper + Chat + Audio + VLM) - Sistema de detección de gameplay real vs hablando - Scene detection con FFmpeg - Soporte para RTX 3050 y RX 6800 XT - Guía completa en 6800xt.md para próxima IA - Scripts de filtrado visual y análisis de contexto - Pipeline automatizado de generación de videos
155 lines
4.7 KiB
Python
155 lines
4.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Segunda pasada: elimina tiempos muertos de los clips existentes.
|
|
Usa la transcripción para detectar silencios y contenido irrelevante.
|
|
"""
|
|
import json
|
|
import logging
|
|
import subprocess
|
|
import tempfile
|
|
import os
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def analyze_clip_content(transcripcion_json, start, end):
|
|
"""
|
|
Analiza qué partes del clip tienen contenido relevante.
|
|
Retorna una lista de intervalos [(start_relativo, end_relativo)] con contenido.
|
|
"""
|
|
with open(transcripcion_json, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
segments = data.get("segments", [])
|
|
|
|
# Buscar segmentos que caen dentro del intervalo
|
|
relevant_segments = []
|
|
for seg in segments:
|
|
seg_start = seg["start"]
|
|
seg_end = seg["end"]
|
|
|
|
# Si el segmento se superpone con el clip
|
|
if seg_end >= start and seg_start <= end:
|
|
text = seg["text"].strip()
|
|
|
|
# Filtrar contenido irrelevante
|
|
if len(text) < 2:
|
|
continue
|
|
|
|
# Segmentos que son solo muletillas
|
|
muletillas = ['eh', 'ah', 'um', 'ehm', 'está', 'va', 'o sea', 'bueno']
|
|
words = text.lower().split()
|
|
if all(w in muletillas for w in words if len(w) > 1):
|
|
continue
|
|
|
|
# Calcular posición relativa dentro del clip
|
|
rel_start = max(0, seg_start - start)
|
|
rel_end = min(end - start, seg_end - start)
|
|
|
|
if rel_end > rel_start:
|
|
relevant_segments.append({
|
|
"start": rel_start,
|
|
"end": rel_end,
|
|
"text": text
|
|
})
|
|
|
|
if not relevant_segments:
|
|
# Si no hay segmentos, mantener todo
|
|
return [(0, end - start)]
|
|
|
|
# Agrupar segmentos cercanos (gap de 2 segundos o menos)
|
|
relevant_segments.sort(key=lambda x: x["start"])
|
|
grouped = []
|
|
current = relevant_segments[0]
|
|
|
|
for seg in relevant_segments[1:]:
|
|
if seg["start"] - current["end"] <= 2:
|
|
# Extender el segmento actual
|
|
current = {
|
|
"start": current["start"],
|
|
"end": seg["end"],
|
|
"text": current["text"]
|
|
}
|
|
else:
|
|
grouped.append(current)
|
|
current = seg
|
|
|
|
grouped.append(current)
|
|
|
|
# Añadir margen de 1 segundo antes y después
|
|
intervals = []
|
|
for seg in grouped:
|
|
s = max(0, seg["start"] - 1)
|
|
e = min(end - start, seg["end"] + 1)
|
|
intervals.append((s, e))
|
|
|
|
return intervals
|
|
|
|
|
|
def refine_intervals(intervals_json, transcripcion_json, output_json):
|
|
"""
|
|
Refina los intervalos existentes eliminando tiempos muertos.
|
|
"""
|
|
logger.info("=== SEGUNDA PASADA: Eliminando tiempos muertos ===")
|
|
|
|
with open(intervals_json, 'r') as f:
|
|
original_intervals = json.load(f)
|
|
|
|
refined = []
|
|
total_original = sum(e - s for s, e in original_intervals)
|
|
|
|
for i, (start, end) in enumerate(original_intervals):
|
|
content_intervals = analyze_clip_content(transcripcion_json, start, end)
|
|
|
|
if not content_intervals:
|
|
continue
|
|
|
|
# Usar el primer intervalo de contenido como nuevo inicio
|
|
# y el último como nuevo final
|
|
new_start = start + content_intervals[0][0]
|
|
new_end = start + content_intervals[-1][1]
|
|
|
|
# Asegurar duración mínima de 5 segundos
|
|
if new_end - new_start < 5:
|
|
mid = (new_start + new_end) / 2
|
|
new_start = mid - 2.5
|
|
new_end = mid + 2.5
|
|
|
|
refined.append([int(new_start), int(new_end)])
|
|
|
|
# Guardar
|
|
with open(output_json, 'w') as f:
|
|
json.dump(refined, f)
|
|
|
|
total_refined = sum(e - s for s, e in refined)
|
|
time_saved = total_original - total_refined
|
|
|
|
logger.info(f"Intervalos originales: {len(original_intervals)}")
|
|
logger.info(f"Intervalos refinados: {len(refined)}")
|
|
logger.info(f"Tiempo original: {total_original}s ({total_original/60:.1f} min)")
|
|
logger.info(f"Tiempo refinado: {total_refined}s ({total_refined/60:.1f} min)")
|
|
logger.info(f"Tiempo ahorrado: {time_saved}s ({time_saved/60:.1f} min)")
|
|
|
|
return refined
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--intervals", required=True)
|
|
parser.add_argument("--transcripcion", required=True)
|
|
parser.add_argument("--output", default="highlights_refined.json")
|
|
args = parser.parse_args()
|
|
|
|
refine_intervals(args.intervals, args.transcripcion, args.output)
|
|
|
|
print(f"\n{'='*70}")
|
|
print(f"SEGUNDA PASADA COMPLETADA".center(70))
|
|
print(f"Guardado en: {args.output}")
|
|
print(f"{'='*70}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|