commit 9fb01d5c2258343c382c6ed7f79bf8d912425ba5
Author: renato97 <renato97@gitea.cbcren.online>
Date:   Tue Dec 16 22:32:27 2025 +0000

    Initial commit

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..7320942
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,7 @@
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.m4a filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.blob filter=lfs diff=lfs merge=lfs -text
+pdf_test/imperio5.pdf !text !filter !merge !diff
+pdf_test/imperio5_ocr.pdf !text !filter !merge !diff
+downloads/* !text !filter !merge !diff
diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..2d820fa
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,71 @@
+# Local environment files
+.env
+
+# Python cache
+__pycache__/
+*.pyc
+.venv/
+
+# Application-generated data
+downloads/
+resumenes/
+resumenes_docx/
+processed_files.txt
+*_unificado.docx
+resumen_*.md
+downloads/**/*.md
+downloads/**/*.docx
+resumenes_docx/**/*.docx
+resumenes_docx/**/*.md
+resumenes/**/*.md
+resumenes/**/*.docx
+
+# Ollama data
+ollama_data/
+ollama_data/models/blobs/
+
+# Node.js
+.npm/
+
+# Logs
+logs/
+*.log
+
+# Test files
+pdf_test/
+cereal*.txt
+test_*.py
+docker-compose.test.yml
+Dockerfile.test
+requirements_summaries.txt
+
+# Runtime state
+.main_service.lock
+cbc-main.pid
+*.pid
+*.db
+
+# System files
+.docker/buildx/
+.dotnet/
+.gemini/
+.ssh/
+.sudo_as_admin_successful
+resumenes/
+
+# IDE and editor files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+.aider*
diff --git a/ARQUITECTURA.md b/ARQUITECTURA.md
new file mode 100644
index 0000000..10fddec
--- /dev/null
+++ b/ARQUITECTURA.md
@@ -0,0 +1,240 @@
+# 🏗️ Arquitectura del Sistema Integrado
+
+## 📊 Diagrama General
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                           SISTEMA COMPLETO                                  │
+│                                                                             │
+│  ┌──────────────┐                         ┌──────────────┐                  │
+│  │   SERVICIO   │                         │  DASHBOARD   │                  │
+│  │   PRINCIPAL  │◄────── Comparte ───────►│     WEB      │                  │
+│  │   (main.py)  │        contexto         │ (dashboard)  │                  │
+│  └──────┬───────┘                         └──────┬───────┘                  │
+│         │                                         │                          │
+│         │ Polling cada 5s                        │ API REST                 │
+│         ▼                                         ▼                          │
+│  ┌──────────────────────────────────────────────────────────┐               │
+│  │            PROCESAMIENTO AUTOMÁTICO                      │               │
+│  │  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐   │               │
+│  │  │   AUDIO  │ │    PDF   │ │   TXT    │ │  WEBDAV  │   │               │
+│  │  │Whisper+IA│ │  OCR+IA  │ │   IA     │ │ Sync     │   │               │
+│  │  └────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘   │               │
+│  └──────┼─────────────┼─────────────┼────────────┼─────────┘               │
+│         │             │             │            │                          │
+│         └─────────────┴─────────────┴────────────┘                          │
+│                           │                                              │
+│                           ▼                                              │
+│                  ┌────────────────┐                                     │
+│                  │  NEXTCLOUD     │                                     │
+│                  │     (WebDAV)   │                                     │
+│                  └────────────────┘                                     │
+│                                                                             │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+## 🔄 Flujo de Datos
+
+### 1. Servicio Principal (main.py)
+```
+Inicio → start_dashboard() → main() [Bucle infinito]
+  ↓              ↓              ↓
+Dashboard    Hilo separado   Polling 5s
+Web (5000)    (daemon)       ↓
+                           Check Archivos
+                           ↓
+                           Procesar
+                           ↓
+                           Subir a Nextcloud
+```
+
+### 2. Dashboard Web (dashboard.py)
+```
+HTTP Request → Flask App → API Endpoints
+     ↓              ↓           ↓
+localhost:5000   Routing    file_manager
+     ↓              ↓           ↓
+  Browser      Python Code  Operar archivos
+```
+
+## 📡 Comunicación entre Componentes
+
+### Imports Compartidos
+```python
+# main.py importa del dashboard
+import dashboard
+dashboard.app.run()  # En hilo separado
+
+# dashboard.py importa de main
+from main import (
+    AUDIO_EXTENSIONS,
+    LOCAL_DOWNLOADS_PATH,
+    load_processed_files,
+    process_audio_file
+)
+```
+
+### Estado Compartido
+- ✅ Variables de configuración
+- ✅ Funciones de procesamiento
+- ✅ Registro de archivos procesados
+- ✅ Conexión WebDAV a Nextcloud
+
+## 🎯 Modos de Ejecución
+
+### Modo 1: Servicio Completo
+```bash
+python3 main.py
+```
+```
+┌─────────────────────────────────────┐
+│  THREAD PRINCIPAL                   │
+│  ┌──────────────────────────────┐  │
+│  │  main() - Bucle principal   │  │
+│  │  - Polling Nextcloud        │  │
+│  │  - Procesar archivos        │  │
+│  └──────────────────────────────┘  │
+└─────────────────────────────────────┘
+              │
+              │ threading.Thread
+              ▼
+┌─────────────────────────────────────┐
+│  DASHBOARD THREAD (daemon)          │
+│  ┌──────────────────────────────┐  │
+│  │  Flask Web Server           │  │
+│  │  - Puerto 5000              │  │
+│  │  - API REST                 │  │
+│  │  - Interfaz web             │  │
+│  └──────────────────────────────┘  │
+└─────────────────────────────────────┘
+```
+
+### Modo 2: Solo Dashboard
+```bash
+python3 main.py dashboard-only
+```
+```
+┌─────────────────────────────────────┐
+│  MAIN THREAD                        │
+│  ┌──────────────────────────────┐  │
+│  │  Flask Web Server ONLY      │  │
+│  │  - Puerto 5000              │  │
+│  │  - Sin bucle principal      │  │
+│  └──────────────────────────────┘  │
+└─────────────────────────────────────┘
+```
+
+## 🗂️ Estructura de Archivos
+
+```
+/home/ren/cbc/
+├── main.py                 # Servicio principal + integra dashboard
+├── dashboard.py            # Aplicación Flask independiente
+├── test_dashboard.py       # Script de pruebas
+├── templates/
+│   └── index.html         # Interfaz web del dashboard
+├── downloads/             # Archivos temporales locales
+├── processed_files.txt    # Registro de procesados
+├── QUICKSTART.md          # Guía de inicio rápido
+├── DASHBOARD_INSTRUCTIONS.md  # Manual detallado
+└── ARQUITECTURA.md        # Este archivo
+```
+
+## 🔌 API Endpoints del Dashboard
+
+| Método | Endpoint | Función |
+|--------|----------|---------|
+| GET | `/` | Página principal |
+| GET | `/api/files` | Obtener lista de archivos |
+| POST | `/api/reprocess` | Reprocesar archivo |
+| POST | `/api/mark-unprocessed` | Marcar como no procesado |
+| GET | `/api/refresh` | Refrescar lista |
+| GET | `/health` | Health check |
+| GET | `/downloads/<archivo>` | Descargar archivo |
+
+## 🚀 Proceso de Inicio
+
+```
+1. Usuario ejecuta: python3 main.py
+   │
+   ├─► main.py inicia
+   │   │
+   │   ├─► acquire_lock()  [Evitar múltiples instancias]
+   │   │
+   │   ├─► start_dashboard()  [INICIA DASHBOARD]
+   │   │   │
+   │   │   ├─► import dashboard
+   │   │   │
+   │   │   ├─► threading.Thread(target=run_dashboard)
+   │   │   │
+   │   │   ├─► dashboard_thread.start()
+   │   │   │
+   │   │   └─► ✅ Dashboard en http://localhost:5000
+   │   │
+   │   ├─► time.sleep(2)  [Pausa para dashboard]
+   │   │
+   │   └─► main()  [INICIA BUCLE PRINCIPAL]
+   │       │
+   │       └─► while True:
+   │           │
+   │           ├─► Polling Nextcloud
+   │           ├─► Verificar archivos nuevos
+   │           ├─► Procesar automáticamente
+   │           └─► sleep(5)  [esperar 5s]
+   │
+   └─► Servicio corriendo indefinidamente
+       │
+       ├─► Dashboard accesible
+       └─► Procesamiento activo
+```
+
+## 🔐 Características de Seguridad
+
+1. **Threading Daemon**
+   - Dashboard se cierra automáticamente con main.py
+   - No impide el cierre del programa
+
+2. **Lock File**
+   - Evita múltiples instancias de main.py
+   - Protección automática via `fcntl`
+
+3. **Error Handling**
+   - Dashboard puede fallar sin afectar main
+   - Logging detallado de errores
+
+4. **CORS Enabled**
+   - Flask-CORS configurado
+   - Acceso desde cualquier origen
+
+## 💡 Ventajas de la Integración
+
+### ✅ Beneficios
+- **Un solo comando** para todo
+- **Contexto compartido** (config, funciones)
+- **Cierre automático** (hilo daemon)
+- **Logs unificados** (consola única)
+- **Sin dependencias externas** (todo en main.py)
+
+### ⚡ Rendimiento
+- **Dashboard**: ~5-10MB RAM
+- **Main**: Variable según procesamiento
+- **Comunicación**: Directa (mismo proceso)
+- **Latencia**: Mínima (sin red)
+
+### 🛠️ Mantenimiento
+- **Código unificado** en main.py
+- **Menos archivos** de configuración
+- **Debugging simplificado** (una sola consola)
+- **Actualización fácil** (un solo archivo)
+
+## 🎉 Resumen
+
+El sistema está **completamente integrado**:
+- ✅ Un solo comando: `python3 main.py`
+- ✅ Dashboard automático en puerto 5000
+- ✅ Servicio principal procesando 24/7
+- ✅ Interfaz web moderna y responsive
+- ✅ API REST completa
+- ✅ Gestión de archivos en tiempo real
+
+**¡Simplemente ejecuta `python3 main.py` y visita http://localhost:5000!** 🚀
diff --git a/DASHBOARD_INSTRUCTIONS.md b/DASHBOARD_INSTRUCTIONS.md
new file mode 100644
index 0000000..8a255d3
--- /dev/null
+++ b/DASHBOARD_INSTRUCTIONS.md
@@ -0,0 +1,206 @@
+# 🎛️ Dashboard Integrado - Instrucciones de Uso
+
+## 🚀 Inicio Rápido
+
+### Ejecutar servicio completo con dashboard
+
+```bash
+python3 main.py
+```
+
+**¡Listo!** Tendrás:
+- ✅ Servicio principal procesando archivos automáticamente
+- ✅ Dashboard web accesible en **http://localhost:5000**
+
+---
+
+## 📊 ¿Qué es el Dashboard?
+
+El dashboard es una **interfaz web moderna** que te permite:
+
+### 🔍 Monitoreo
+- Ver todos los archivos de audio en tiempo real
+- Filtrar por origen (Local/WebDAV)
+- Buscar archivos por nombre
+- Ver estadísticas: total, procesados, pendientes
+
+### ⚡ Control
+- **Reprocesar archivos** con un solo click
+- **Resetear archivos** para forzar reprocesamiento
+- **Descargar resultados** en múltiples formatos
+
+### 📁 Gestión de Archivos
+Ver formatos disponibles para cada archivo:
+- 📝 TXT (transcripción)
+- 📋 MD (Markdown)
+- 📄 DOCX (documento editable)
+- 📑 PDF (documento PDF)
+
+---
+
+## 🌐 Uso del Dashboard
+
+### 1. Acceder
+Abre tu navegador en: **http://localhost:5000**
+
+### 2. Ver Archivos
+La página principal muestra:
+- **Header**: Título del dashboard
+- **Estadísticas**: Cards con total, procesados, pendientes
+- **Controles**: Botones para refrescar y reprocesar
+- **Lista**: Grid de archivos con información
+
+### 3. Filtrar
+Usa los filtros en la parte superior:
+- ☑️ **Local**: Mostrar archivos de la carpeta downloads
+- ☑️ **WebDAV**: Mostrar archivos de Nextcloud
+- 🔍 **Búsqueda**: Escribir para filtrar por nombre
+
+### 4. Acciones por Archivo
+
+#### Para archivos **Pendientes**:
+1. Click en botón **🚀 Procesar**
+2. Confirmar si hay archivos existentes
+3. El archivo se encola para procesamiento
+4. El estado se actualiza automáticamente
+
+#### Para archivos **Procesados**:
+1. Click en botón **🔄 Resetear**
+2. Confirmar la acción
+3. El archivo se marca como no procesado
+4. Podrás reprocesarlo cuando quieras
+
+### 5. Descargar Resultados
+Si un archivo tiene formatos disponibles, verás enlaces:
+- 📝 TXT, 📋 MD, 📄 DOCX, 📑 PDF
+- Click directo para descargar
+
+---
+
+## 🔧 Comandos Alternativos
+
+### Solo Dashboard (sin servicio principal)
+```bash
+python3 main.py dashboard-only
+```
+
+### Otros comandos disponibles
+```bash
+# Servicio completo
+python3 main.py
+
+# Procesar audio individual
+python3 main.py whisper audio.mp3 salida.txt
+
+# Procesar PDF individual
+python3 main.py pdf documento.pdf editable.docx
+
+# Convertir texto a resumen
+python3 main.py txt2docx texto.txt resumen.docx
+
+# Generar quiz
+python3 main.py quiz "texto del quiz" quiz.docx
+```
+
+---
+
+## 📱 Interfaz Responsive
+
+El dashboard funciona en:
+- 💻 **Desktop**: Interfaz completa con todas las funciones
+- 📱 **Móvil**: Adaptado para pantallas pequeñas
+- 📲 **Tablet**: Experiencia optimizada para tablets
+
+---
+
+## 🎨 Características de la UI
+
+- **Diseño moderno**: Gradientes y efectos glassmorphism
+- **Animaciones suaves**: Transiciones y hover effects
+- **Feedback visual**: Estados de carga y confirmación
+- **Tema oscuro**: Colores elegantes y profesionales
+- **Iconos**: Emojis para mejor identificación visual
+
+---
+
+## ⚙️ API Endpoints
+
+El dashboard expone una API REST:
+
+- `GET /api/files` - Obtener lista de archivos
+- `POST /api/reprocess` - Reprocesar archivo
+- `POST /api/mark-unprocessed` - Marcar como no procesado
+- `GET /api/refresh` - Refrescar lista
+- `GET /health` - Health check
+- `GET /downloads/<archivo>` - Descargar archivo
+
+---
+
+## 🚨 Notas Importantes
+
+### Logs
+Los logs del dashboard aparecen en la consola donde ejecutaste `main.py`
+
+### Errores Comunes
+1. **Puerto 5000 en uso**
+   - Solución: Cambiar puerto en `dashboard.py` o terminar proceso anterior
+
+2. **No aparecen archivos**
+   - Verificar conexión a Nextcloud
+   - Revisar variables de entorno NEXTCLOUD_*
+   - Comprobar que hay archivos en la carpeta Audios
+
+3. **Error al procesar**
+   - Revisar logs en la consola
+   - Verificar dependencias (Flask, etc.)
+   - Comprobar espacio en disco
+
+### Rendimiento
+- El dashboard se ejecuta en **hilo separado**
+- **No bloquea** el procesamiento principal
+- Se inicia **automáticamente** con main.py
+- **Cierre seguro** al terminar main.py
+
+---
+
+## 🎯 Casos de Uso
+
+### 📚 Estudio Personal
+1. Sube audios de clases a Nextcloud
+2. El sistema los procesa automáticamente
+3. Usa el dashboard para descargar resúmenes
+4. Estudia con los documentos DOCX generados
+
+### 🏢 Oficina
+1. Configura Nextcloud empresarial
+2. Comparte carpeta Audios con el equipo
+3. Todos pueden subir archivos para procesar
+4. Gestiona todo desde el dashboard web
+
+### 🔄 Reprocesamiento
+1. Un archivo falló al procesarse
+2. Usa el dashboard para resetearlo
+3. Reprocesa con un click
+4. Descarga el resultado actualizado
+
+---
+
+## 🎉 ¡Disfruta!
+
+**Simplemente ejecuta:**
+```bash
+python3 main.py
+```
+
+**Y visita:** http://localhost:5000
+
+---
+
+### ¿Necesitas Ayuda?
+
+1. Revisa los logs en la consola
+2. Verifica que Flask esté instalado: `pip3 install flask flask-cors`
+3. Comprueba que el puerto 5000 esté libre
+4. Asegúrate de tener conexión a Internet para APIs de IA
+
+**¡El dashboard está listo para usar!** 🚀
diff --git a/Dockerfile b/Dockerfile
new file mode 100755
index 0000000..8b3ec08
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,43 @@
+# Usar una imagen base de NVIDIA con CUDA 12.1.1 y Python 3.10
+FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
+
+# Evitar que los cuadros de diálogo de apt se bloqueen
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Instalar Python, pip, ffmpeg, Node.js y otras dependencias del sistema
+RUN apt-get update && apt-get install -y python3.10 python3-pip ffmpeg poppler-utils tesseract-ocr tesseract-ocr-spa curl && rm -rf /var/lib/apt/lists/*
+
+# Instalar Node.js 20 usando NodeSource repository
+RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
+    apt-get install -y nodejs && \
+    rm -rf /var/lib/apt/lists/*
+
+# Crear un enlace simbólico para que python3 -> python
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+# Establecer el directorio de trabajo
+WORKDIR /app
+
+# Copiar el archivo de requerimientos e instalar PyTorch con soporte para CUDA
+COPY requirements.txt .
+
+# Instalar PyTorch y las dependencias de audio/visión compatibles con CUDA 12.1
+RUN python3 -m pip install --no-cache-dir --upgrade pip
+RUN python3 -m pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
+
+# Instalar el resto de las dependencias
+RUN python3 -m pip install --no-cache-dir -r requirements.txt
+RUN apt-get update && apt-get install -y tesseract-ocr tesseract-ocr-spa libgl1 libglib2.0-0
+RUN python3 -m pip install --no-cache-dir easyocr pytesseract opencv-python-headless pdf2image transformers
+
+# Instalar Claude CLI
+RUN npm install -g @anthropic-ai/claude-code
+
+# Instalar Gemini CLI como root
+RUN npm install -g @google/gemini-cli
+
+# Copiar todo el código de la aplicación al contenedor
+COPY . .
+
+# Comando por defecto para iniciar el servicio principal unificado
+CMD ["python3", "main.py"]
diff --git a/Dockerfile.dashboard b/Dockerfile.dashboard
new file mode 100644
index 0000000..a0e2e8a
--- /dev/null
+++ b/Dockerfile.dashboard
@@ -0,0 +1,32 @@
+FROM python:3.11-slim
+
+# Establecer directorio de trabajo
+WORKDIR /app
+
+# Instalar dependencias del sistema
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copiar requirements y dependencias de Python
+COPY requirements.txt requirements-dashboard.txt ./
+
+# Instalar dependencias de Python
+RUN pip install --no-cache-dir -r requirements-dashboard.txt
+
+# Copiar archivos de la aplicación
+COPY main.py dashboard.py ./
+COPY templates/ ./templates/
+
+# Crear directorios necesarios
+RUN mkdir -p downloads resumenes_docx
+
+# Establecer permisos
+RUN chmod +x dashboard.py
+
+# Exponer puerto
+EXPOSE 5000
+
+# Comando para ejecutar el dashboard
+CMD ["python", "dashboard.py"]
\ No newline at end of file
diff --git a/QUICKSTART.md b/QUICKSTART.md
new file mode 100644
index 0000000..b2df375
--- /dev/null
+++ b/QUICKSTART.md
@@ -0,0 +1,88 @@
+# 🚀 Quick Start - Dashboard Integrado
+
+## ✅ Estado Actual
+- ✅ Flask instalado y configurado
+- ✅ Dashboard funcionando correctamente
+- ✅ 210 archivos procesados detectados
+- ✅ Todas las pruebas pasaron
+
+## 🎯 Uso Inmediato
+
+### Opción 1: Servicio Completo (Recomendado)
+```bash
+python3 main.py
+```
+**Resultado:**
+- ✅ Servicio principal procesando archivos automáticamente
+- ✅ Dashboard web disponible en http://localhost:5000
+
+### Opción 2: Solo Dashboard
+```bash
+python3 main.py dashboard-only
+```
+
+### Opción 3: Probar Dashboard
+```bash
+python3 test_dashboard.py
+```
+
+## 📊 Dashboard Web
+
+**URL:** http://localhost:5000
+
+**Funciones:**
+- 🔍 Ver archivos de audio en tiempo real
+- 🔎 Filtrar por origen (Local/WebDAV)
+- 🚀 Reprocesar archivos con 1 click
+- 🔄 Resetear archivos procesados
+- 📥 Descargar resultados (TXT, MD, DOCX, PDF)
+- 📱 Interfaz responsive (móvil/tablet/desktop)
+
+## 📁 Ubicaciones Importantes
+
+- **Servicio Principal:** `/home/ren/cbc/main.py`
+- **Dashboard:** `/home/ren/cbc/dashboard.py`
+- **Interfaz Web:** `/home/ren/cbc/templates/index.html`
+- **Pruebas:** `/home/ren/cbc/test_dashboard.py`
+- **Instrucciones:** `/home/ren/cbc/DASHBOARD_INSTRUCTIONS.md`
+
+## 🔧 Comandos Disponibles
+
+```bash
+# Servicio completo (main + dashboard)
+python3 main.py
+
+# Solo dashboard
+python3 main.py dashboard-only
+
+# Transcribir audio
+python3 main.py whisper audio.mp3 salida.txt
+
+# Procesar PDF
+python3 main.py pdf documento.pdf editable.docx
+
+# Texto a resumen
+python3 main.py txt2docx texto.txt resumen.docx
+
+# Generar quiz
+python3 main.py quiz "texto" quiz.docx
+
+# Marcar archivos como procesados
+python3 main.py seed-processed
+```
+
+## 📞 Soporte
+
+Si algo no funciona:
+1. Ejecutar: `python3 test_dashboard.py`
+2. Verificar logs en consola
+3. Revisar: `/home/ren/cbc/DASHBOARD_INSTRUCTIONS.md`
+
+## 🎉 ¡Listo para Usar!
+
+**Simplemente ejecuta:**
+```bash
+python3 main.py
+```
+
+**Y visita:** http://localhost:5000
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4dc1e82
--- /dev/null
+++ b/README.md
@@ -0,0 +1,194 @@
+# Nextcloud AI Service v8 Final
+
+Sistema completo de procesamiento de audio, PDF y generación de resúmenes colaborativos mediante 3 modelos de IA.
+
+## 🚀 Características Principales
+
+### 🤖 Sistema Unificado de 3 IAs
+- **GPT-OSS-120B**: Extracción de puntos clave mediante DeepInfra API
+- **Claude CLI**: Desarrollo de resúmenes integrales mediante Z.ai API
+- **Gemini CLI**: Formato y estilo final mediante CLI de Google
+
+### 📁 Procesamiento de Archivos
+- **Audio**: Transcripción con Whisper y generación de resúmenes
+- **PDF**: OCR avanzado y conversión a documentos editables
+- **Documentos**: Generación de resúmenes automáticos
+- **Sincronización**: Integración completa con Nextcloud via WebDAV
+
+### 🎯 Características Técnicas
+- **Docker Multi-etapa**: Optimizado para producción
+- **GPU CUDA 12.1**: Aceleración por hardware NVIDIA
+- **CLI Tools**: Claude CLI y Gemini CLI para máxima compatibilidad
+- **Unificación**: Sistema colaborativo que genera un único documento final
+
+## 🛠️ Instalación
+
+### Requisitos
+- Docker y Docker Compose
+- NVIDIA GPU con drivers CUDA 12.1+
+- 16GB+ RAM recomendado
+- 20GB+ espacio en disco
+
+### Configuración
+
+1. **Clonar el repositorio**
+   ```bash
+   git clone https://gitea.cbcren.online/ren/nextcloud-ai-v8-final.git
+   cd nextcloud-ai-v8-final
+   ```
+
+2. **Configurar variables de entorno**
+   ```bash
+   cp .env.example .env
+   # Editar .env con tus credenciales
+   ```
+
+3. **Iniciar servicios**
+   ```bash
+   docker-compose up -d
+   ```
+
+## ⚙️ Configuración de Variables de Entorno
+
+### Nextcloud
+```env
+NEXTCLOUD_URL=https://tu-nextcloud.com
+NEXTCLOUD_USER=tu_usuario
+NEXTCLOUD_PASS=tu_contraseña
+```
+
+### APIs de IA
+```env
+GEMINI_API_KEY=tu_gemini_key
+DEEPINFRA_API_KEY=tu_deepinfra_key
+ANTHROPIC_BASE_URL=https://api.z.ai/api/anthropic
+ANTHROPIC_AUTH_TOKEN=tu_z_ai_token
+```
+
+### Notificaciones (Opcional)
+```env
+TELEGRAM_TOKEN=tu_bot_token
+TELEGRAM_CHAT_ID=tu_chat_id
+```
+
+## 📊 Arquitectura del Sistema
+
+```
+┌─────────────────┐    ┌─────────────────┐    ┌─────────────────┐
+│   GPT-OSS-120B  │───▶│    Claude CLI   │───▶│   Gemini CLI    │
+│  (DeepInfra)    │    │     (Z.ai)      │    │    (Google)     │
+└─────────────────┘    └─────────────────┘    └─────────────────┘
+         │                       │                       │
+         ▼                       ▼                       ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                    Sistema Unificado                            │
+│                 (Documento Final Único)                      │
+└─────────────────────────────────────────────────────────────────┘
+                                │
+                                ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                     Nextcloud Service                          │
+│              (Sincronización WebDAV)                           │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+## 🔧 Uso
+
+### Procesamiento Automático
+1. Sube archivos de audio a la carpeta `Audios` en Nextcloud
+2. El sistema detecta automáticamente y genera resúmenes
+3. Los documentos finales se guardan en `Documentos`
+4. Las versiones Markdown se sincronizan con `Notes`
+
+### Monitoreo
+- **Logs**: `docker-compose logs -f app`
+- **Estado**: `docker-compose ps`
+- **Telegram**: Notificaciones automáticas (si está configurado)
+
+## 🎨 Salida del Sistema
+
+El sistema genera documentos unificados con:
+
+1. **Puntos Clave**: Extraídos por GPT-OSS-120B
+2. **Resumen Integral**: Desarrollado por Claude CLI (500+ palabras)
+3. **Quiz de Evaluación**: 10 preguntas de opción múltiple
+4. **Metadatos**: Información del proceso colaborativo
+
+## 🐳 Docker
+
+### Estructura de Contenedores
+- **app**: Servicio principal con soporte GPU
+- **ollama**: Servidor Ollama para modelos locales
+
+### Personalización
+```bash
+# Reconstruir contenedores
+docker-compose build
+
+# Reiniciar servicios
+docker-compose restart
+
+# Ver logs en tiempo real
+docker-compose logs -f app
+```
+
+## 🚨 Troubleshooting
+
+### Problemas Comunes
+
+1. **Error de permisos Claude CLI**
+   - Solución: `CLAUDE_DANGEROUSLY_SKIP_PERMISSIONS=1` en docker-compose.yml
+
+2. **Timeout en procesamiento**
+   - Aumentar `MODEL_TIMEOUT_SECONDS` en .env
+
+3. **Problemas de GPU**
+   - Verificar drivers NVIDIA y CUDA 12.1+
+   - Comprobar `nvidia-smi`
+
+4. **Error de APIs**
+   - Verificar keys y endpoints en .env
+   - Comprobar límites de las APIs
+
+## 📈 Métricas y Optimización
+
+### Rendimiento
+- **Tiempo de procesamiento**: ~2-5 minutos por audio de 5min
+- **Uso de VRAM**: ~8-12GB con modelos GPU
+- **Calidad de resúmenes**: Formato académico con análisis profundo
+
+### Optimización
+- **VRAM Management**: Limpieza automática cada 5 minutos
+- **Error Handling**: Reintentos automáticos con backoff exponencial
+- **Timeout Configurable**: Adaptarse a diferentes cargas de trabajo
+
+## 🔐 Seguridad
+
+- **API Keys**: Almacenadas como variables de entorno
+- **WebDAV**: Autenticación básica con HTTPS
+- **CLI Tools**: Configuración segura sin permisos de root
+- **Redes**: Aislamiento de contenedores Docker
+
+## 🤝 Contribuciones
+
+1. Fork del repositorio
+2. Crear rama feature
+3. Commit con cambios descriptivos
+4. Pull Request para revisión
+
+## 📄 Licencia
+
+MIT License - Ver archivo LICENSE para detalles
+
+## 📞 Soporte
+
+Para problemas o preguntas:
+- Crear issue en el repositorio
+- Revisar logs del sistema
+- Verificar documentación de variables de entorno
+
+---
+
+**Versión**: 8.0 Final
+**Última Actualización**: Septiembre 2024
+**Estado**: Producción estable
\ No newline at end of file
diff --git a/README_AI_ASSISTANT.md b/README_AI_ASSISTANT.md
new file mode 100644
index 0000000..23953c6
--- /dev/null
+++ b/README_AI_ASSISTANT.md
@@ -0,0 +1,285 @@
+# 🤖 CBC Nextcloud AI Service - Documentación para Asistentes IA
+
+## 📋 Resumen Ejecutivo
+
+Este es un sistema automatizado de procesamiento de contenido académico que utiliza inteligencia artificial para transcribir audios, procesar PDFs y generar resúmenes colaborativos mediante la integración de 3 modelos de IA diferentes.
+
+**Estado Actual**: ✅ ACTIVO y FUNCIONANDO
+- **Servicio**: `cbc-main.service` (systemd)
+- **Proceso Principal**: PID 49696 (única instancia)
+- **GPU**: NVIDIA RTX 3050 (1.5GB/8GB en uso)
+- **Última Actualización**: 2025-09-26
+
+---
+
+## 🖥️ Especificaciones del Sistema
+
+### Hardware
+- **CPU**: AMD Ryzen 5 5600X 6-Core Processor (12 hilos)
+- **RAM**: 15GB DDR4 (9.7GB disponible)
+- **GPU**: NVIDIA GeForce RTX 3050 (8GB VRAM)
+- **Almacenamiento**: 439GB SSD (330GB libre)
+- **Sistema Operativo**: Linux x86_64
+
+### Software
+- **Python**: 3.10.12
+- **CUDA**: 12.2 (Driver 535.247.01)
+- **Gestor de Servicios**: systemd
+- **Directorio Principal**: `/home/ren/cbc/`
+
+### Librerías Clave
+- `torch` 2.8.0 (PyTorch con soporte CUDA)
+- `transformers` 4.56.1 (Hugging Face)
+- `whisper` 20240930 (OpenAI)
+- `easyocr` 1.7.2 (OCR)
+- `openai` 1.107.0 (API clientes)
+- `python-docx` 1.2.0 (manipulación Word)
+- `pdf2image` 1.17.0 (procesamiento PDF)
+
+---
+
+## 🚀 Funcionalidades Principales
+
+### 1. Procesamiento de Audio
+- **Formatos Soportados**: MP3, WAV, M4A, OGG
+- **Transcripción**: Whisper (medium) con GPU aceleración
+- **Salida**: Archivos de texto con timestamps
+- **Ubicación**: Carpeta `Audios` en Nextcloud
+
+### 2. Procesamiento de PDF
+- **OCR Múltiple**: EasyOCR + Tesseract + TrOCR
+- **Procesamiento por Lotes**: 3 páginas por chunk
+- **Corrección IA**: Gemini para limpieza de texto
+- **Salida**: Documentos DOCX editables
+- **Ubicación**: Carpeta `Pdf` en Nextcloud
+
+### 3. Sistema de Resúmenes Colaborativos
+**3 Modelos de IA trabajando en secuencia**:
+
+1. **GPT-OSS-120B** (DeepInfra API):
+   - Genera bullet points clave
+   - Análisis inicial del contenido
+
+2. **Claude/Zhai** (CLI - API Z.ai):
+   - Desarrolla resumen integral
+   - 400-500 palabras estructuradas
+
+3. **Gemini** (CLI - Google):
+   - Aplica formato final
+   - Optimiza presentación
+
+### 4. Clasificación Inteligente de Contenido
+El sistema clasifica automáticamente en 4 categorías temáticas:
+- `historia` - Eventos históricos, cronologías
+- `analisis_contable` - Contabilidad, finanzas, balances
+- `instituciones_gobierno` - Política, gobierno, leyes
+- `otras_clases` - Ciencias, tecnología, literatura, etc.
+
+### 5. Generación de Quizzes
+- **10 preguntas** de opción múltiple por documento
+- **4 opciones** (A, B, C, D) por pregunta
+- **Respuestas incluidas** en documentos separados
+
+---
+
+## ⚙️ Configuración Actual
+
+### Variables de Entorno
+```bash
+CUDA_LAUNCH_BLOCKING=1          # Depuración CUDA síncrona
+PYTHONPATH=/home/ren/cbc        # Ruta del proyecto
+HOME=/home/ren                  # Directorio home
+```
+
+### Rutas del Sistema
+- **Descargas**: `/app/downloads/`
+- **Resúmenes**: `./downloads/`
+- **Documentos DOCX**: `./resumenes_docx/`
+- **Archivos Procesados**: `/app/processed_files.txt`
+
+### Límites de Recursos
+- **Memoria RAM**: 8GB máximo
+- **CPU**: 90% de cuota
+- **Archivos Abiertos**: 65536
+- **Reinicio Automático**: Siempre (10 segundos de espera)
+
+---
+
+## 📁 Estructura de Archivos Principal
+
+```
+/home/ren/cbc/
+├── main.py                    # Servicio principal (72,405 líneas)
+├── .env                       # Variables de entorno
+├── requirements.txt           # Dependencias Python
+├── README_AI_ASSISTANT.md    # Este documento
+├── MEGA_HISTORIA_parte*.txt   # Historiales académicos
+├── resumen_*.py              # Scripts de resumen
+├── procesador_academico.py   # Procesamiento especializado
+├── analizador_*.py           # Análisis de contenido
+├── config_telegram.txt       # Configuración Telegram
+└── txt/                      # Directorio de textos
+```
+
+---
+
+## 🔧 Gestión del Servicio
+
+### Comandos Essenciales
+```bash
+# Ver estado del servicio
+sudo systemctl status cbc-main
+
+# Ver logs en tiempo real
+sudo journalctl -u cbc-main -f
+
+# Detener servicio
+sudo systemctl stop cbc-main
+
+# Iniciar servicio
+sudo systemctl start cbc-main
+
+# Reiniciar servicio
+sudo systemctl restart cbc-main
+
+# Ver uso de GPU
+nvidia-smi
+
+# Ver uso de memoria del sistema
+free -h
+```
+
+### Monitoreo de Salud
+```bash
+# Verificar proceso único
+ps aux | grep "python3.*main.py" | grep -v grep
+
+# Verificar memoria GPU
+nvidia-smi --query-gpu=memory.used,memory.total --format=csv
+
+# Verificar espacio en disco
+df -h /
+
+# Verificar actividad de red
+sudo netstat -tlnp | grep :80
+```
+
+---
+
+## 🚨 Problemas Comunes y Soluciones
+
+### Error CUDA: "CUDA-capable device(s) is/are busy or unavailable"
+**Causa**: Múltiples procesos compitiendo por recursos GPU
+**Solución**:
+```bash
+# Verificar procesos múltiples
+ps aux | grep "python3.*main.py"
+
+# Si hay múltiples, reiniciar servicio
+sudo systemctl restart cbc-main
+```
+
+### Error de Permiso: "Permission denied"
+**Causa**: El servicio no tiene permisos para escribir en `/app/`
+**Solución**:
+```bash
+# Crear directorios con permisos correctos
+sudo mkdir -p /app/downloads /app/resumenes
+sudo chown -R ren:ren /app/
+```
+
+### Alto Uso de Memoria GPU
+**Configuración actual optimizada**:
+- `MAX_PAGES_PER_CHUNK = 3` (reducido de 5)
+- `PDF_DPI = 200` (reducido de 300)
+- `batch_size = 1` (procesamiento individual)
+- `_MODEL_TIMEOUT_SECONDS = 300` (liberación rápida)
+
+---
+
+## 🔄 Flujo de Trabajo Automático
+
+### 1. Detección de Archivos
+- Monitorea carpetas Nextcloud cada 5 segundos
+- Verifica archivos no procesados en `processed_files.txt`
+
+### 2. Procesamiento según Tipo
+**Audio** → Transcripción → Resumen Colaborativo → Clasificación → Subida
+**PDF** → OCR → Corrección IA → Documento Editable → Resumen → Subida
+
+### 3. Clasificación y Organización
+- Análisis de contenido con Gemini
+- Clasificación en categorías temáticas
+- Renombrado automático con temas extraídos
+- Organización en carpetas específicas
+
+### 4. Notificaciones
+- Envío de alertas por Telegram
+- Reportes de progreso y errores
+- Confirmación de procesamiento completado
+
+---
+
+## 📊 Métricas de Rendimiento
+
+### Rendimiento Actual
+- **Procesamiento PDF**: ~3-5 minutos por documento (12 páginas)
+- **Transcripción Audio**: ~2-3 minutos por 10 minutos de audio
+- **Generación Resúmenes**: ~1-2 minutos por documento
+- **Uso GPU**: 1.5GB (estable)
+- **Uso CPU**: 44% promedio
+- **Memoria RAM**: 1.1GB utilizada
+
+### Optimizaciones Aplicadas
+- ✅ Procesamiento único (sin duplicados)
+- ✅ Gestión agresiva de VRAM
+- ✅ Límites de recursos controlados
+- ✅ Reintentos automáticos para errores CUDA
+- ✅ Sistema de clasificación inteligente
+
+---
+
+## 🔮 Estado y Próximos Pasos
+
+### Estado Actual: SISTEMA ESTABLE
+- ✅ Servicio systemd funcionando correctamente
+- ✅ GPU operativa sin errores
+- ✅ Procesamiento de PDF activo
+- ✅ Clasificación automática funcionando
+- ✅ Notificaciones Telegram operativas
+
+### Posibles Mejoras Futuras
+- Implementar cola de procesamiento para manejar carga pesada
+- Añadir interfaz web para monitoreo
+- Optimizar tiempos de procesamiento con modelos más eficientes
+- Implementar sistema de backup automático
+- Añadir métricas detalladas de rendimiento
+
+---
+
+## 📞 Información de Contacto y Soporte
+
+### Para Emergencias del Sistema
+```bash
+# Reinicio completo del sistema
+sudo systemctl restart cbc-main
+
+# Liberación forzada de VRAM
+sudo pkill -f python3
+sudo systemctl start cbc-main
+```
+
+### Archivos de Log Importantes
+- **Journal Systemd**: `sudo journalctl -u cbc-main`
+- **Log GPU**: `nvidia-smi --query-gpu=timestamp,memory.used --format=csv -l 1`
+- **Log Memoria**: `free -h -s 5`
+
+### Puntos Críticos del Sistema
+1. **Disponibilidad GPU**: NVIDIA RTX 3050 (8GB)
+2. **Espacio en Disco**: 330GB disponibles
+3. **Conexión Nextcloud**: WebDAV funcional
+4. **API Keys**: Configuradas y operativas
+
+---
+
+*Este documento fue generado automáticamente el 2025-09-26 para proporcionar a asistentes IA una comprensión completa del estado y funcionalidad del sistema CBC Nextcloud AI Service.*
\ No newline at end of file
diff --git a/claude_code_zai_env.sh b/claude_code_zai_env.sh
new file mode 100644
index 0000000..00ee0b9
--- /dev/null
+++ b/claude_code_zai_env.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# ========================
+#       Define Constants
+# ========================
+SCRIPT_NAME=$(basename "$0")
+NODE_MIN_VERSION=18
+NODE_INSTALL_VERSION=22
+NVM_VERSION="v0.40.3"
+CLAUDE_PACKAGE="@anthropic-ai/claude-code"
+CONFIG_DIR="$HOME/.claude"
+CONFIG_FILE="$CONFIG_DIR/settings.json"
+API_BASE_URL="https://api.z.ai/api/anthropic"
+API_KEY_URL="https://z.ai/manage-apikey/apikey-list"
+API_TIMEOUT_MS=3000000
+
+# ========================
+#       Functions
+# ========================
+
+log_info() {
+    echo "🔹 $*"
+}
+
+log_success() {
+    echo "✅ $*"
+}
+
+log_error() {
+    echo "❌ $*" >&2
+}
+
+ensure_dir_exists() {
+    local dir="$1"
+    if [ ! -d "$dir" ]; then
+        mkdir -p "$dir" || {
+            log_error "Failed to create directory: $dir"
+            exit 1
+        }
+    fi
+}
+
+# ========================
+#     Node.js Installation
+# ========================
+
+install_nodejs() {
+    local platform=$(uname -s)
+
+    case "$platform" in
+        Linux|Darwin)
+            log_info "Installing Node.js on $platform..."
+
+            # Install nvm
+            log_info "Installing nvm ($NVM_VERSION)..."
+            curl -s https://raw.githubusercontent.com/nvm-sh/nvm/"$NVM_VERSION"/install.sh | bash
+
+            # Load nvm
+            log_info "Loading nvm environment..."
+            \. "$HOME/.nvm/nvm.sh"
+
+            # Install Node.js
+            log_info "Installing Node.js $NODE_INSTALL_VERSION..."
+            nvm install "$NODE_INSTALL_VERSION"
+
+            # Verify installation
+            node -v &>/dev/null || {
+                log_error "Node.js installation failed"
+                exit 1
+            }
+            log_success "Node.js installed: $(node -v)"
+            log_success "npm version: $(npm -v)"
+            ;;
+        *)
+            log_error "Unsupported platform: $platform"
+            exit 1
+            ;;
+    esac
+}
+
+# ========================
+#     Node.js Check
+# ========================
+
+check_nodejs() {
+    if command -v node &>/dev/null; then
+        current_version=$(node -v | sed 's/v//')
+        major_version=$(echo "$current_version" | cut -d. -f1)
+
+        if [ "$major_version" -ge "$NODE_MIN_VERSION" ]; then
+            log_success "Node.js is already installed: v$current_version"
+            return 0
+        else
+            log_info "Node.js v$current_version is installed but version < $NODE_MIN_VERSION. Upgrading..."
+            install_nodejs
+        fi
+    else
+        log_info "Node.js not found. Installing..."
+        install_nodejs
+    fi
+}
+
+# ========================
+#     Claude Code Installation
+# ========================
+
+install_claude_code() {
+    if command -v claude &>/dev/null; then
+        log_success "Claude Code is already installed: $(claude --version)"
+    else
+        log_info "Installing Claude Code..."
+        npm install -g "$CLAUDE_PACKAGE" || {
+            log_error "Failed to install claude-code"
+            exit 1
+        }
+        log_success "Claude Code installed successfully"
+    fi
+}
+
+configure_claude_json(){
+  node --eval '
+      const os = require("os");
+      const fs = require("fs");
+      const path = require("path");
+
+      const homeDir = os.homedir();
+      const filePath = path.join(homeDir, ".claude.json");
+      if (fs.existsSync(filePath)) {
+          const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
+          fs.writeFileSync(filePath, JSON.stringify({ ...content, hasCompletedOnboarding: true }, null, 2), "utf-8");
+      } else {
+          fs.writeFileSync(filePath, JSON.stringify({ hasCompletedOnboarding: true }, null, 2), "utf-8");
+      }'
+}
+
+# ========================
+#     API Key Configuration
+# ========================
+
+configure_claude() {
+    log_info "Configuring Claude Code..."
+    echo "   You can get your API key from: $API_KEY_URL"
+    read -s -p "🔑 Please enter your Z.AI API key: " api_key
+    echo
+
+    if [ -z "$api_key" ]; then
+        log_error "API key cannot be empty. Please run the script again."
+        exit 1
+    fi
+
+    ensure_dir_exists "$CONFIG_DIR"
+
+    # Write settings.json
+    node --eval '
+        const os = require("os");
+        const fs = require("fs");
+        const path = require("path");
+
+        const homeDir = os.homedir();
+        const filePath = path.join(homeDir, ".claude", "settings.json");
+        const apiKey = "'"$api_key"'";
+
+        const content = fs.existsSync(filePath)
+            ? JSON.parse(fs.readFileSync(filePath, "utf-8"))
+            : {};
+
+        fs.writeFileSync(filePath, JSON.stringify({
+            ...content,
+            env: {
+                ANTHROPIC_AUTH_TOKEN: apiKey,
+                ANTHROPIC_BASE_URL: "'"$API_BASE_URL"'",
+                API_TIMEOUT_MS: "'"$API_TIMEOUT_MS"'",
+                CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC: 1
+            }
+        }, null, 2), "utf-8");
+    ' || {
+        log_error "Failed to write settings.json"
+        exit 1
+    }
+
+    log_success "Claude Code configured successfully"
+}
+
+# ========================
+#        Main
+# ========================
+
+main() {
+    echo "🚀 Starting $SCRIPT_NAME"
+
+    check_nodejs
+    install_claude_code
+    configure_claude_json
+    configure_claude
+
+    echo ""
+    log_success "🎉 Installation completed successfully!"
+    echo ""
+    echo "🚀 You can now start using Claude Code with:"
+    echo "   claude"
+}
+
+main "$@"
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..c1ec30a
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,21 @@
+cache_ttl: 30
+dashboard:
+  enabled: true
+  host: 0.0.0.0
+  port: 5000
+http_timeout: 30
+max_workers: 2
+models:
+  claude: claude-3-haiku
+  gemini: gemini-1.5-flash
+  whisper: base
+notifications:
+  telegram_enabled: true
+  verbose_logging: true
+poll_interval: 5
+processing:
+  batch_size: 3
+  parallel_uploads: 3
+  retry_attempts: 3
+vram_threshold: 0.8
+webdav_retries: 3
diff --git a/corregir_gallego.py b/corregir_gallego.py
new file mode 100644
index 0000000..b3fc27f
--- /dev/null
+++ b/corregir_gallego.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+import re
+
+def corregir_texto_gallego(input_file, output_file):
+    """Aplica correcciones de gallego a español al archivo de transcripción"""
+
+    # Correcciones más comunes de gallego a español
+    correcciones = {
+        "xeo": "yo",
+        "non": "no",
+        "hai": "hay",
+        "entóns": "entonces",
+        "máis": "más",
+        "tamén": "también",
+        "sempre": "siempre",
+        "verdade": "verdad",
+        "cousa": "cosa",
+        "xente": "gente",
+        "tempo": "tiempo",
+        "lingua": "lengua",
+        "pode": "puede",
+        "xamón": "shogun",
+        "xomón": "shogun",
+        "unha": "una",
+        "dunha": "de una",
+        "nunha": "en una",
+        "xeral": "general",
+        "xeraria": "jerarquía",
+        "ximéas": "temas",
+        "ximeas": "temas",
+        "ronquera": "reunión",
+        "xocalizar": "juntar",
+        "oanxacular": "juntar",
+        "xocal": "junto",
+        "lúmulo": "grupo",
+        "lúmido": "grupo",
+        "lúmada": "grupos",
+        "nulunxación": "reunificación",
+        "xotalipa": "capitalista",
+        "crente": "gente",
+        "enxucar": "juntar",
+        "agora": "ahora",
+        "cando": "cuando",
+        "temos": "tenemos",
+        "habíamos": "habíamos",
+        "era": "era",
+        "había": "había",
+        "existía": "existía",
+        "también": "también",
+        "vamos": "vamos",
+        "teníamos": "teníamos",
+        "vimos": "vimos",
+        "estaba": "estaba",
+        "estaban": "estaban",
+        "podía": "podía",
+        "podemos": "podemos",
+        "somos": "somos"
+    }
+
+    with open(input_file, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+
+    corrected_lines = []
+    for line in lines:
+        corrected_line = line
+        # Aplicar correcciones
+        for gallego, espanol in correcciones.items():
+            corrected_line = corrected_line.replace(gallego, espanol)
+
+        # Normalizar espacios múltiples
+        corrected_line = re.sub(r'\s+', ' ', corrected_line)
+
+        # Eliminar líneas que son solo repeticiones de "e" o "¿no?"
+        if corrected_line.strip() and not re.match(r'^\s*\[?\d+:\d+:\d+\]\s+(e\s+)+\s*$', corrected_line) and not re.match(r'^\s*\[?\d+:\d+:\d+\]\s+¿no\?\s*$', corrected_line):
+            corrected_lines.append(corrected_line)
+
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.writelines(corrected_lines)
+
+    print(f"Archivo corregido guardado en: {output_file}")
+    print(f"Líneas procesadas: {len(lines)}")
+    print(f"Líneas finales: {len(corrected_lines)}")
+
+if __name__ == "__main__":
+    input_file = "downloads/1_5134218813469886295.txt"
+    output_file = "downloads/1_5134218813469886295_corregido.txt"
+    corregir_texto_gallego(input_file, output_file)
\ No newline at end of file
diff --git a/dashboard.py b/dashboard.py
new file mode 100644
index 0000000..6ae6126
--- /dev/null
+++ b/dashboard.py
@@ -0,0 +1,417 @@
+#!/usr/bin/env python3
+"""
+Dashboard Flask para gestión de archivos de audio
+Interfaz web simple para reprocesar archivos MP3 con 1 click
+"""
+
+import os
+import json
+import logging
+import subprocess
+from datetime import datetime
+from pathlib import Path
+from typing import List, Dict, Any
+
+from flask import Flask, render_template, request, jsonify, send_from_directory
+from flask_cors import CORS
+
+# Importar configuraciones del main.py
+import sys
+sys.path.append('/home/ren/cbc')
+from main import (
+    AUDIO_EXTENSIONS, LOCAL_DOWNLOADS_PATH, PROCESSED_FILES_PATH,
+    load_processed_files, save_processed_file, process_audio_file,
+    REMOTE_AUDIOS_FOLDER, webdav_list, normalize_remote_path
+)
+
+app = Flask(__name__)
+CORS(app)
+
+# Configuración
+app.config['SECRET_KEY'] = os.getenv('DASHBOARD_SECRET_KEY', 'dashboard-secret-key-change-in-production')
+app.config['DOWNLOADS_FOLDER'] = LOCAL_DOWNLOADS_PATH
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class FileManager:
+    """Gestor de archivos para el dashboard"""
+
+    def __init__(self):
+        self.processed_files = set()
+        self.load_processed_files()
+
+    def load_processed_files(self):
+        """Cargar archivos procesados desde el registro"""
+        try:
+            self.processed_files = load_processed_files()
+            logger.info(f"Cargados {len(self.processed_files)} archivos procesados")
+        except Exception as e:
+            logger.error(f"Error cargando archivos procesados: {e}")
+            self.processed_files = set()
+
+    def get_audio_files(self) -> List[Dict[str, Any]]:
+        """Obtener lista de archivos de audio disponibles"""
+        files = []
+
+        # Obtener archivos de WebDAV
+        try:
+            webdav_files = webdav_list(REMOTE_AUDIOS_FOLDER)
+            for file_path in webdav_files:
+                normalized_path = normalize_remote_path(file_path)
+                base_name = os.path.basename(normalized_path)
+
+                if any(normalized_path.lower().endswith(ext) for ext in AUDIO_EXTENSIONS):
+                    available_formats = self._get_available_formats(base_name)
+                    # Considerar procesado si está en el registro O si tiene archivos de salida
+                    is_processed = (normalized_path in self.processed_files or
+                                  base_name in self.processed_files or
+                                  any(available_formats.values()))
+
+                    files.append({
+                        'filename': base_name,
+                        'path': normalized_path,
+                        'source': 'webdav',
+                        'processed': is_processed,
+                        'size': 'Unknown',
+                        'last_modified': 'Unknown',
+                        'available_formats': available_formats
+                    })
+        except Exception as e:
+            logger.error(f"Error obteniendo archivos WebDAV: {e}")
+
+        # Obtener archivos locales
+        try:
+            if os.path.exists(LOCAL_DOWNLOADS_PATH):
+                local_files = []
+                for ext in AUDIO_EXTENSIONS:
+                    local_files.extend(Path(LOCAL_DOWNLOADS_PATH).glob(f"*{ext}"))
+
+                for file_path in local_files:
+                    stat = file_path.stat()
+                    available_formats = self._get_available_formats(file_path.name)
+                    # Considerar procesado si está en el registro O si tiene archivos de salida
+                    is_processed = (file_path.name in self.processed_files or
+                                  any(available_formats.values()))
+
+                    files.append({
+                        'filename': file_path.name,
+                        'path': str(file_path),
+                        'source': 'local',
+                        'processed': is_processed,
+                        'size': self._format_size(stat.st_size),
+                        'last_modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
+                        'available_formats': available_formats
+                    })
+        except Exception as e:
+            logger.error(f"Error obteniendo archivos locales: {e}")
+
+        # Eliminar duplicados y ordenar
+        unique_files = {}
+        for file in files:
+            key = file['filename']
+            if key not in unique_files or file['source'] == 'webdav':
+                unique_files[key] = file
+
+        return sorted(unique_files.values(), key=lambda x: x['filename'])
+
+    def _get_available_formats(self, audio_filename: str) -> Dict[str, bool]:
+        """Verificar qué formatos de salida existen para un archivo de audio"""
+        # Obtener el nombre base sin extensión
+        base_name = Path(audio_filename).stem
+
+        # Extensiones a verificar
+        formats = {
+            'txt': False,
+            'md': False,
+            'pdf': False,
+            'docx': False
+        }
+
+        # Verificar en directorio local y resumenes_docx
+        directories_to_check = [LOCAL_DOWNLOADS_PATH, './resumenes_docx']
+
+        for directory in directories_to_check:
+            if not os.path.exists(directory):
+                continue
+
+            for ext in formats.keys():
+                # Buscar variaciones del nombre del archivo
+                name_variants = [
+                    base_name,  # Nombre exacto
+                    f"{base_name}_unificado",  # Con sufijo _unificado
+                    f"{base_name.replace(' ', '_')}",  # Espacios reemplazados por guiones bajos
+                    f"{base_name.replace(' ', '_')}_unificado",  # Ambas variaciones
+                ]
+
+                # También verificar variantes con espacios originales pero _unificado
+                if ' ' in base_name:
+                    name_variants.append(f"{base_name}_unificado")
+
+                # Para cada variante, verificar si existe el archivo
+                for name_variant in name_variants:
+                    file_path = os.path.join(directory, f"{name_variant}.{ext}")
+                    if os.path.exists(file_path):
+                        formats[ext] = True
+                        break  # Encontrado, pasar al siguiente formato
+
+        return formats
+
+    def _format_size(self, size_bytes: int) -> str:
+        """Formatear tamaño de archivo"""
+        for unit in ['B', 'KB', 'MB', 'GB']:
+            if size_bytes < 1024.0:
+                return f"{size_bytes:.1f} {unit}"
+            size_bytes /= 1024.0
+        return f"{size_bytes:.1f} TB"
+
+    def reprocess_file(self, file_path: str, source: str) -> Dict[str, Any]:
+        """Reprocesar un archivo específico"""
+        try:
+            # Verificar formatos existentes antes de reprocesar
+            filename = os.path.basename(file_path)
+            existing_formats = self._get_available_formats(filename)
+            has_existing_files = any(existing_formats.values())
+
+            if source == 'webdav':
+                # Para archivos WebDAV, llamar directamente a process_audio_file
+                logger.info(f"Iniciando reprocesamiento de WebDAV: {file_path}")
+                process_audio_file(file_path)
+            else:
+                # Para archivos locales, procesar directamente
+                logger.info(f"Iniciando reprocesamiento local: {file_path}")
+                # Aquí podrías agregar lógica adicional para archivos locales
+
+            return {
+                'success': True,
+                'message': f"Archivo {os.path.basename(file_path)} enviado a reprocesamiento",
+                'had_existing_files': has_existing_files,
+                'existing_formats': existing_formats
+            }
+        except Exception as e:
+            logger.error(f"Error reprocesando {file_path}: {e}")
+            return {
+                'success': False,
+                'message': f"Error: {str(e)}"
+            }
+
+    def mark_as_unprocessed(self, file_path: str) -> bool:
+        """Marcar archivo como no procesado para forzar reprocesamiento"""
+        try:
+            # Eliminar del registro de procesados
+            processed_files = load_processed_files()
+            normalized_path = normalize_remote_path(file_path)
+            base_name = os.path.basename(normalized_path)
+
+            # Crear nuevo registro sin este archivo
+            temp_path = PROCESSED_FILES_PATH + '.temp'
+            with open(temp_path, 'w', encoding='utf-8') as f:
+                for line in open(PROCESSED_FILES_PATH, 'r', encoding='utf-8'):
+                    if (line.strip() != normalized_path and
+                        os.path.basename(line.strip()) != base_name and
+                        line.strip() != file_path):
+                        f.write(line)
+
+            # Reemplazar archivo original
+            os.replace(temp_path, PROCESSED_FILES_PATH)
+            self.load_processed_files()  # Recargar
+
+            return True
+        except Exception as e:
+            logger.error(f"Error marcando como no procesado {file_path}: {e}")
+            return False
+
+# Instancia global del gestor de archivos
+file_manager = FileManager()
+
+@app.route('/')
+def index():
+    """Página principal del dashboard"""
+    return render_template('index.html')
+
+@app.route('/api/files')
+def get_files():
+    """API endpoint para obtener lista de archivos"""
+    try:
+        files = file_manager.get_audio_files()
+        return jsonify({
+            'success': True,
+            'files': files,
+            'total': len(files),
+            'processed': sum(1 for f in files if f['processed']),
+            'pending': sum(1 for f in files if not f['processed'])
+        })
+    except Exception as e:
+        logger.error(f"Error obteniendo archivos: {e}")
+        return jsonify({
+            'success': False,
+            'message': f"Error: {str(e)}"
+        }), 500
+
+@app.route('/api/reprocess', methods=['POST'])
+def reprocess_file():
+    """API endpoint para reprocesar un archivo"""
+    try:
+        data = request.get_json()
+        file_path = data.get('path')
+        source = data.get('source', 'local')
+
+        if not file_path:
+            return jsonify({
+                'success': False,
+                'message': "Path del archivo es requerido"
+            }), 400
+
+        result = file_manager.reprocess_file(file_path, source)
+        return jsonify(result)
+
+    except Exception as e:
+        logger.error(f"Error en endpoint reprocesar: {e}")
+        return jsonify({
+            'success': False,
+            'message': f"Error: {str(e)}"
+        }), 500
+
+@app.route('/api/mark-unprocessed', methods=['POST'])
+def mark_unprocessed():
+    """API endpoint para marcar archivo como no procesado"""
+    try:
+        data = request.get_json()
+        file_path = data.get('path')
+
+        if not file_path:
+            return jsonify({
+                'success': False,
+                'message': "Path del archivo es requerido"
+            }), 400
+
+        success = file_manager.mark_as_unprocessed(file_path)
+
+        if success:
+            return jsonify({
+                'success': True,
+                'message': "Archivo marcado como no procesado"
+            })
+        else:
+            return jsonify({
+                'success': False,
+                'message': "No se pudo marcar como no procesado"
+            }), 500
+
+    except Exception as e:
+        logger.error(f"Error marcando como no procesado: {e}")
+        return jsonify({
+            'success': False,
+            'message': f"Error: {str(e)}"
+        }), 500
+
+@app.route('/api/refresh')
+def refresh_files():
+    """API endpoint para refrescar lista de archivos"""
+    try:
+        file_manager.load_processed_files()
+        files = file_manager.get_audio_files()
+        return jsonify({
+            'success': True,
+            'message': "Lista de archivos actualizada",
+            'files': files
+        })
+    except Exception as e:
+        logger.error(f"Error refrescando archivos: {e}")
+        return jsonify({
+            'success': False,
+            'message': f"Error: {str(e)}"
+        }), 500
+
+@app.route('/downloads/find-file')
+def find_and_download_file():
+    """Buscar y servir archivos con diferentes variaciones de nombre"""
+    try:
+        from flask import request
+        filename = request.args.get('filename')
+        ext = request.args.get('ext')
+
+        if not filename or not ext:
+            return jsonify({'error': 'Missing parameters'}), 400
+
+        # Generar posibles variaciones del nombre del archivo
+        from pathlib import Path
+        base_name = Path(filename).stem
+
+        possible_names = [
+            f"{base_name}.{ext}",
+            f"{base_name}_unificado.{ext}",
+            f"{base_name.replace(' ', '_')}.{ext}",
+            f"{base_name.replace(' ', '_')}_unificado.{ext}"
+        ]
+
+        # Directorios donde buscar
+        directories = [LOCAL_DOWNLOADS_PATH, './resumenes_docx']
+
+        # Intentar encontrar el archivo en cada directorio con cada variación
+        for directory in directories:
+            if not os.path.exists(directory):
+                continue
+
+            for name in possible_names:
+                file_path = os.path.join(directory, name)
+                if os.path.exists(file_path):
+                    return send_from_directory(directory, name)
+
+        # Si no se encuentra el archivo
+        return jsonify({'error': 'File not found'}), 404
+
+    except Exception as e:
+        logger.error(f"Error buscando archivo: {e}")
+        return jsonify({'error': 'File not found'}), 404
+
+@app.route('/downloads/<path:filename>')
+def download_file(filename):
+    """Servir archivos de descarga desde downloads o resumenes_docx"""
+    try:
+        # Primero intentar en downloads
+        try:
+            return send_from_directory(LOCAL_DOWNLOADS_PATH, filename)
+        except FileNotFoundError:
+            pass
+
+        # Si no se encuentra en downloads, intentar en resumenes_docx
+        try:
+            return send_from_directory('./resumenes_docx', filename)
+        except FileNotFoundError:
+            pass
+
+        # Si no se encuentra en ninguna ubicación
+        return jsonify({'error': 'File not found'}), 404
+
+    except Exception as e:
+        logger.error(f"Error sirviendo archivo {filename}: {e}")
+        return jsonify({'error': 'File not found'}), 404
+
+@app.route('/health')
+def health_check():
+    """Health check endpoint"""
+    return jsonify({
+        'status': 'healthy',
+        'timestamp': datetime.now().isoformat(),
+        'processed_files_count': len(file_manager.processed_files)
+    })
+
+if __name__ == '__main__':
+    logger.info("🚀 Iniciando Dashboard de Gestión de Audio")
+    logger.info(f"📁 Carpeta de descargas: {LOCAL_DOWNLOADS_PATH}")
+    logger.info(f"📊 Servidor web en http://localhost:5000")
+
+    try:
+        app.run(
+            host='0.0.0.0',
+            port=5000,
+            debug=False,
+            threaded=True,
+            use_reloader=False  # Evitar problemas con threading
+        )
+    except KeyboardInterrupt:
+        logger.info("🛑 Dashboard detenido por el usuario")
+    except Exception as e:
+        logger.error(f"❌ Error en dashboard: {e}")
+        raise
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100755
index 0000000..c5ab2ec
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,86 @@
+version: '3.8'
+
+services:
+  app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: nextcloud_ai_app
+    volumes:
+      - ./downloads:/app/downloads
+      - ./resumenes_docx:/app/resumenes_docx
+    env_file:
+      - .env
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - OLLAMA_HOST=http://ollama:11434
+      - CLAUDE_DANGEROUSLY_SKIP_PERMISSIONS=1
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    depends_on:
+      - ollama
+    restart: always
+
+  ollama:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: ollama_server
+    volumes:
+      - ./ollama_data:/root/.ollama
+    ports:
+      - "11434:11434"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    restart: always
+
+  dashboard:
+    build:
+      context: .
+      dockerfile: Dockerfile.dashboard
+    container_name: audio_dashboard
+    volumes:
+      - ./downloads:/app/downloads
+      - ./templates:/app/templates
+      - ./processed_files.txt:/app/processed_files.txt
+      - ./.main_service.lock:/app/.main_service.lock
+    env_file:
+      - .env
+    environment:
+      - DASHBOARD_SECRET_KEY=${DASHBOARD_SECRET_KEY:-dashboard-secret-key-change-me}
+      - NVIDIA_VISIBLE_DEVICES=all
+      - ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-https://api.z.ai/api/anthropic}
+      - ANTHROPIC_AUTH_TOKEN=${ANTHROPIC_AUTH_TOKEN}
+    ports:
+      - "5000:5000"
+    depends_on:
+      - app
+    restart: always
+
+  filebrowser:
+    image: filebrowser/filebrowser
+    container_name: downloads_filebrowser
+    volumes:
+      - ./downloads:/srv
+      - ./filebrowser_config:/config
+    command: [
+      "--address", "0.0.0.0",
+      "--port", "8080",
+      "--root", "/srv",
+      "--database", "/config/filebrowser.db",
+      "--username", "ren",
+      "--password", "$$2b$$10$$KbFwEuIb3g26kYCxVzl0Ju81OxhK1KHQNUZCLAPDg298XQBOvhoHS"
+    ]
+    ports:
+      - "8080:8080"
+    restart: always
diff --git a/filebrowser_config/settings.json b/filebrowser_config/settings.json
new file mode 100644
index 0000000..e787ef8
--- /dev/null
+++ b/filebrowser_config/settings.json
@@ -0,0 +1,8 @@
+{
+  "port": 80,
+  "baseURL": "",
+  "address": "",
+  "log": "stdout",
+  "database": "/database/filebrowser.db",
+  "root": "/srv"
+}
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..a97cc9a
--- /dev/null
+++ b/main.py
@@ -0,0 +1,3183 @@
+#!/usr/bin/env python3
+"""
+Nextcloud AI Service - Unified Main Service
+Combina todas las funcionalidades de procesamiento de audio, PDF y documentos en un solo archivo.
+"""
+
+import fcntl
+import logging
+import os
+import re
+import shutil
+import subprocess
+import sys
+import time
+import unicodedata
+import xml.etree.ElementTree as ET
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, Optional, Set
+
+import cv2
+import easyocr
+import numpy as np
+import pytesseract
+import requests
+import torch
+import whisper
+import textwrap
+from concurrent.futures import ThreadPoolExecutor
+from docx import Document
+from docx.shared import Inches
+from pdf2image import convert_from_path
+from pypdf import PdfReader, PdfWriter
+from requests.adapters import HTTPAdapter
+from requests.auth import HTTPBasicAuth
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas
+
+# --- CONFIGURACIÓN DE LOGGING ---
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] - %(message)s",
+    handlers=[logging.StreamHandler()]
+)
+
+# --- CONFIGURACIÓN DE VARIABLES DE ENTORNO ---
+# Cargar variables desde archivo .env si existe
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+
+NEXTCLOUD_URL = os.getenv("NEXTCLOUD_URL")
+NEXTCLOUD_USER = os.getenv("NEXTCLOUD_USER")
+NEXTCLOUD_PASS = os.getenv("NEXTCLOUD_PASS")
+WEBDAV_ENDPOINT = NEXTCLOUD_URL
+
+REMOTE_AUDIOS_FOLDER = "Audios"
+REMOTE_DOCX_AUDIO_FOLDER = "Documentos"
+REMOTE_PDF_FOLDER = "Pdf"
+REMOTE_TXT_FOLDER = "Textos"
+RESUMENES_FOLDER = "Resumenes"
+DOCX_FOLDER = "Documentos"
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+LOCAL_STATE_DIR = os.environ.get("LOCAL_STATE_DIR", BASE_DIR)
+LEGACY_PROCESSED_PATHS = ["/app/processed_files.txt"]
+
+LOCAL_DOWNLOADS_PATH = os.path.join(BASE_DIR, "downloads")
+LOCAL_RESUMENES = LOCAL_DOWNLOADS_PATH
+LOCAL_DOCX = os.path.join(BASE_DIR, "resumenes_docx")
+POLL_INTERVAL = 5
+PROCESSED_FILES_PATH = os.environ.get(
+    "PROCESSED_FILES_PATH",
+    os.path.join(LOCAL_STATE_DIR, "processed_files.txt")
+)
+
+AUDIO_EXTENSIONS = {".mp3", ".wav", ".m4a", ".ogg", ".aac"}
+PDF_EXTENSIONS = {".pdf"}
+TXT_EXTENSIONS = {".txt"}
+
+HTTP_TIMEOUT = int(os.getenv("HTTP_TIMEOUT", "30"))
+WEBDAV_MAX_RETRIES = int(os.getenv("WEBDAV_MAX_RETRIES", "3"))
+DOWNLOAD_CHUNK_SIZE = int(os.getenv("DOWNLOAD_CHUNK_SIZE", "8192"))
+MAX_FILENAME_LENGTH = int(os.getenv("MAX_FILENAME_LENGTH", "80"))
+MAX_FILENAME_BASE_LENGTH = int(os.getenv("MAX_FILENAME_BASE_LENGTH", "40"))
+MAX_FILENAME_TOPICS_LENGTH = int(os.getenv("MAX_FILENAME_TOPICS_LENGTH", "20"))
+
+ZAI_BASE_URL = os.getenv("ZAI_BASE_URL", "https://api.z.ai/api/anthropic")
+ZAI_DEFAULT_MODEL = os.getenv("ZAI_MODEL", "glm-4.6")
+ZAI_AUTH_TOKEN_FALLBACK = os.getenv(
+    "ZAI_AUTH_TOKEN",
+    os.getenv("ANTHROPIC_AUTH_TOKEN", "6fef8efda3d24eb9ad3d718daf1ae9a1.RcFc7QPe5uZLr2mS"),
+)
+
+_WEBDAV_SESSION: Optional[requests.Session] = None
+ProcessedRegistry = Set[str]
+
+# API KEYS
+DEFAULT_GEMINI_API_KEY = "AIzaSyDWOgyAJqscuPU6iSpS6gxupWBm4soNw5o"
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or DEFAULT_GEMINI_API_KEY
+TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
+TELEGRAM_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID")
+OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://ollama:11434")
+OLLAMA_MODEL = "mistral:7b"
+GEMINI_CLI_PATH = shutil.which("gemini")
+CLAUDE_CLI_PATH = shutil.which("claude")
+GEMINI_FLASH_MODEL = os.getenv("GEMINI_FLASH_MODEL")
+GEMINI_PRO_MODEL = os.getenv("GEMINI_PRO_MODEL")
+
+
+def _initialize_gemini_model_defaults() -> None:
+    """Selecciona automáticamente los modelos Gemini 2.5 más recientes disponibles."""
+    global GEMINI_FLASH_MODEL, GEMINI_PRO_MODEL
+
+    default_flash = "gemini-2.5-flash"
+    default_pro = "gemini-2.5-pro-preview-06-05"
+
+    if GEMINI_FLASH_MODEL and GEMINI_PRO_MODEL:
+        return
+
+    if not GEMINI_API_KEY:
+        GEMINI_FLASH_MODEL = GEMINI_FLASH_MODEL or default_flash
+        GEMINI_PRO_MODEL = GEMINI_PRO_MODEL or default_pro
+        return
+
+    try:
+        response = requests.get(
+            "https://generativelanguage.googleapis.com/v1beta/models",
+            params={"key": GEMINI_API_KEY},
+            timeout=12,
+        )
+        response.raise_for_status()
+        payload = response.json()
+        models = payload.get("models", [])
+
+        def choose_latest(pattern: str) -> Optional[str]:
+            candidate_stable = None
+            preview_candidates = []
+
+            for model_info in models:
+                name = model_info.get("name", "")
+                if not name.startswith("models/gemini-2.5"):
+                    continue
+
+                base_name = name.split("/", 1)[-1]
+
+                if pattern == "-flash":
+                    if "-flash" not in base_name or "-flash-lite" in base_name:
+                        continue
+                else:
+                    if pattern not in base_name:
+                        continue
+
+                if "preview" not in base_name and candidate_stable is None:
+                    candidate_stable = base_name
+                else:
+                    version = model_info.get("version") or ""
+                    preview_candidates.append((version, base_name))
+
+            if candidate_stable:
+                return candidate_stable
+
+            if preview_candidates:
+                preview_candidates.sort(key=lambda item: item[0], reverse=True)
+                return preview_candidates[0][1]
+
+            return None
+
+        if not GEMINI_FLASH_MODEL:
+            selected_flash = choose_latest("-flash")
+            if selected_flash:
+                GEMINI_FLASH_MODEL = selected_flash
+
+        if not GEMINI_PRO_MODEL:
+            selected_pro = choose_latest("-pro")
+            if selected_pro:
+                GEMINI_PRO_MODEL = selected_pro
+
+    except Exception as exc:
+        logging.warning(f"No se pudo obtener la lista de modelos Gemini: {exc}")
+
+    GEMINI_FLASH_MODEL = GEMINI_FLASH_MODEL or default_flash
+    GEMINI_PRO_MODEL = GEMINI_PRO_MODEL or default_pro
+
+
+_initialize_gemini_model_defaults()
+
+GEMINI_AVAILABLE = bool(GEMINI_CLI_PATH or GEMINI_API_KEY or CLAUDE_CLI_PATH)
+
+# --- CONFIGURACIÓN DE CARPETAS TEMÁTICAS ---
+TEMATIC_FOLDERS = {
+    "historia": "Historia",
+    "analisis_contable": "Analisis Contable",
+    "instituciones_gobierno": "Instituciones del Gobierno",
+    "otras_clases": "Otras Clases"
+}
+
+# CONFIGURACIÓN PDF - OPTIMIZADO ADAPTATIVO (GPU/CPU)
+_CPU_COUNT = os.cpu_count() or 1
+MAX_PAGES_PER_CHUNK = max(1, int(os.getenv("PDF_MAX_PAGES_PER_CHUNK", "2")))  # Reducido de 3 a 2
+PDF_DPI = max(150, int(os.getenv("PDF_DPI", "200")))  # Mínimo 150 para calidad legible
+PDF_RENDER_THREAD_COUNT = max(1, int(os.getenv("PDF_RENDER_THREAD_COUNT", str(min(4, _CPU_COUNT)))))  # Reducido hilos
+PDF_BATCH_SIZE = max(1, int(os.getenv("PDF_BATCH_SIZE", "2")))  # Reducido de 4 a 2
+PDF_TROCR_MAX_BATCH = max(1, int(os.getenv("PDF_TROCR_MAX_BATCH", str(PDF_BATCH_SIZE))))
+PDF_TESSERACT_THREADS = max(1, int(os.getenv("PDF_TESSERACT_THREADS", str(max(1, min(2, max(1, _CPU_COUNT // 3)))))))  # Reducido
+# Reutilizamos los mismos hilos para preprocesamiento y OCR CPU
+PDF_PREPROCESS_THREADS = max(1, int(os.getenv("PDF_PREPROCESS_THREADS", str(PDF_TESSERACT_THREADS))))
+
+try:
+    PDF_TEXT_DETECTION_MIN_RATIO = float(os.getenv("PDF_TEXT_DETECTION_MIN_RATIO", "0.6"))
+except ValueError:
+    PDF_TEXT_DETECTION_MIN_RATIO = 0.6
+
+try:
+    PDF_TEXT_DETECTION_MIN_AVG_CHARS = int(os.getenv("PDF_TEXT_DETECTION_MIN_AVG_CHARS", "120"))
+except ValueError:
+    PDF_TEXT_DETECTION_MIN_AVG_CHARS = 120
+
+# ERROR THROTTLING
+ERROR_THROTTLE_SECONDS = int(os.environ.get("ERROR_THROTTLE_SECONDS", "600"))
+_last_error_cache = {}
+
+# Caché para modelos con sistema de timeout
+_whisper_model = None
+_ocr_models = None
+_trocr_models = None
+_models_last_used = None
+_MODEL_TIMEOUT_SECONDS = int(os.environ.get("MODEL_TIMEOUT_SECONDS", "300"))  # 300 segundos (5 minutos) para liberar más rápido
+
+# --- TELEGRAM NOTIFICATION FUNCTIONS ---
+def send_telegram_message(message, retries=3, delay=2):
+    """Envía mensaje a Telegram sin parsing de entidades para evitar errores"""
+    if not TELEGRAM_TOKEN or not TELEGRAM_CHAT_ID:
+        logging.warning("Telegram token or chat ID not set. Skipping notification.")
+        return False
+    
+    url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
+    data = {
+        "chat_id": TELEGRAM_CHAT_ID,
+        "text": message
+    }
+    
+    for attempt in range(retries):
+        try:
+            resp = requests.post(url, data=data, timeout=10)
+            if resp.status_code == 200:
+                return True
+            else:
+                logging.error(f"Telegram API error: {resp.status_code} {resp.text}")
+        except Exception as e:
+            logging.error(f"Telegram notification failed (attempt {attempt+1}/{retries}): {e}")
+        time.sleep(delay)
+    
+    logging.error("Telegram notification failed after all retries.")
+    return False
+
+def should_send_error(key, message):
+    """Return True if we should notify for this (key, message) given throttle rules."""
+    now = datetime.utcnow()
+    prev = _last_error_cache.get(key)
+    if prev is None:
+        _last_error_cache[key] = (message, now)
+        return True
+    prev_msg, prev_time = prev
+    if message != prev_msg or (now - prev_time).total_seconds() > ERROR_THROTTLE_SECONDS:
+        _last_error_cache[key] = (message, now)
+        return True
+    return False
+
+def _update_models_usage():
+    """Actualiza el timestamp de uso de los modelos"""
+    global _models_last_used
+    _models_last_used = datetime.utcnow()
+    logging.debug(f"Timestamp actualizado: {_models_last_used}")
+
+def _check_and_free_vram():
+    """Libera VRAM si los modelos no se han usado en el tiempo especificado"""
+    global _whisper_model, _ocr_models, _trocr_models, _models_last_used
+
+    now = datetime.utcnow()
+
+    # Limpieza básica sin interrumpir el procesamiento
+    if torch.cuda.is_available():
+        try:
+            # Solo limpiar caché básica sin liberar modelos
+            torch.cuda.empty_cache()
+        except:
+            pass
+
+    if _models_last_used is None:
+        return
+
+    idle_time = (now - _models_last_used).total_seconds()
+
+    # Verificar si hay modelos cargados antes de liberar
+    models_loaded = _whisper_model is not None or _ocr_models is not None or _trocr_models is not None
+
+    # Solo liberar después de 10 minutos de inactividad real
+    if idle_time > _MODEL_TIMEOUT_SECONDS and models_loaded:
+        logging.info(f"🔄 Models idle for {idle_time:.1f}s (> {_MODEL_TIMEOUT_SECONDS}s), freeing VRAM...")
+
+        models_freed = []
+
+        # Liberar modelo Whisper
+        if _whisper_model is not None:
+            try:
+                if torch.cuda.is_available():
+                    del _whisper_model
+                    _whisper_model = None
+                    models_freed.append("Whisper")
+            except Exception as e:
+                logging.error(f"Error freeing Whisper VRAM: {e}")
+
+        # Liberar modelos OCR
+        if _ocr_models is not None:
+            try:
+                _ocr_models = None
+                models_freed.append("OCR")
+            except Exception as e:
+                logging.error(f"Error freeing OCR VRAM: {e}")
+
+        # Liberar modelos TrOCR
+        if _trocr_models is not None:
+            try:
+                if torch.cuda.is_available():
+                    model = _trocr_models.get('model') if isinstance(_trocr_models, dict) else None
+                    if model is not None:
+                        model.to('cpu')
+                        models_freed.append("TrOCR")
+                    torch.cuda.empty_cache()
+            except Exception as e:
+                logging.error(f"Error freeing TrOCR VRAM: {e}")
+
+        # Limpiar variables globales (los modelos se recargarán cuando se necesiten)
+        _whisper_model = None
+        _ocr_models = None
+        _trocr_models = None
+        _models_last_used = None
+
+        # Forzar limpieza agresiva de VRAM
+        _force_aggressive_vram_cleanup()
+
+        if models_freed:
+            logging.info(f"🎯 Models freed from GPU: {', '.join(models_freed)}, VRAM liberated")
+
+    # Mostrar estado actual de VRAM cada 120 segundos para depuración
+    elif idle_time % 120 < 10:  # Cada ~120 segundos
+        vram_status = get_vram_usage()
+        if isinstance(vram_status, dict) and vram_status.get('any_models_loaded', False):
+            logging.info(f"📊 VRAM Status - Allocated: {vram_status.get('allocated_gb', 0)}GB, Idle: {idle_time:.1f}s")
+
+def _force_aggressive_vram_cleanup():
+    """Fuerza una limpieza agresiva de VRAM para liberar toda la memoria posible"""
+    try:
+        import gc
+        
+        logging.info("🔥 Iniciando limpieza agresiva de VRAM...")
+        
+        if torch.cuda.is_available():
+            # Mostrar estado antes de la limpieza
+            before_allocated = torch.cuda.memory_allocated(0) / 1024**3
+            before_reserved = torch.cuda.memory_reserved(0) / 1024**3
+            logging.info(f"📊 Antes de limpieza - Allocated: {before_allocated:.2f}GB, Reserved: {before_reserved:.2f}GB")
+            
+            # Estrategia 1: Liberar caché básica
+            torch.cuda.empty_cache()
+            
+            # Estrategia 2: Forzar garbage collection múltiple
+            for i in range(5):
+                gc.collect()
+                torch.cuda.empty_cache()
+            
+            # Estrategia 3: Liberar memoria del pool de PyTorch
+            if hasattr(torch.cuda, 'memory'):
+                try:
+                    # Intentar liberar el memory pool
+                    torch.cuda.memory.empty_cache()
+                except:
+                    pass
+            
+            # Estrategia 4: Sincronizar y liberar streams
+            try:
+                torch.cuda.synchronize()
+                torch.cuda.empty_cache()
+            except:
+                pass
+            
+            # Estrategia 5: Forzar liberación de memoria reservada
+            if torch.cuda.memory_reserved(0) > 0:
+                logging.info(f"🧹 Intentando liberar memoria reservada: {torch.cuda.memory_reserved(0) / 1024**3:.2f}GB")
+                
+                # Último recurso: intentar resetear el estado de CUDA
+                try:
+                    # Liberar todos los caches posibles
+                    if hasattr(torch.cuda, 'memory_snapshot'):
+                        torch.cuda.memory_snapshot()
+                    
+                    torch.cuda.empty_cache()
+                    gc.collect()
+                    
+                    # Si aún hay memoria reservada, intentar un enfoque más agresivo
+                    if torch.cuda.memory_reserved(0) > 1024**3:  # Más de 1GB
+                        logging.warning("🚨 Usando liberación extrema de VRAM...")
+                        
+                        # Forzar liberación completa del contexto
+                        torch.cuda.set_device(0)
+                        torch.cuda.empty_cache()
+                        
+                        # Múltiples ciclos de limpieza
+                        for _ in range(3):
+                            gc.collect()
+                            torch.cuda.empty_cache()
+                            time.sleep(0.1)  # Pequeña pausa para permitir liberación
+                        
+                except Exception as e:
+                    logging.warning(f"Error en liberación extrema: {e}")
+            
+            # Mostrar estado después de la limpieza
+            after_allocated = torch.cuda.memory_allocated(0) / 1024**3
+            after_reserved = torch.cuda.memory_reserved(0) / 1024**3
+            logging.info(f"📊 Después de limpieza - Allocated: {after_allocated:.2f}GB, Reserved: {after_reserved:.2f}GB")
+            
+            if after_reserved < before_reserved:
+                logging.info(f"✅ Memoria liberada: {(before_reserved - after_reserved):.2f}GB")
+            else:
+                logging.warning("⚠️ No se pudo liberar memoria reservada significativamente")
+        
+        logging.info("✅ Limpieza agresiva de VRAM completada")
+        
+    except Exception as e:
+        logging.error(f"Error en limpieza agresiva de VRAM: {e}")
+
+def _start_vram_cleanup_timer():
+    """Inicia un hilo de monitoreo continuo para liberar VRAM"""
+    import threading
+
+    def cleanup_worker():
+        while True:
+            time.sleep(60)  # Verificar cada 60 segundos (no tan frecuente)
+            _check_and_free_vram()
+            # Eliminar limpieza extrema adicional que interrumpe el procesamiento
+
+    thread = threading.Thread(target=cleanup_worker, daemon=True)
+    thread.start()
+
+def _force_complete_vram_cleanup():
+    """Fuerza una limpieza completa de VRAM para eliminar residuos"""
+    global _models_last_used
+    
+    try:
+        if torch.cuda.is_available():
+            # Verificar si hay residuos
+            allocated_mb = torch.cuda.memory_allocated(0) / 1024**2
+            reserved_mb = torch.cuda.memory_reserved(0) / 1024**2
+            
+            # Si hay más de 50MiB residuales, forzar limpieza extrema
+            if allocated_mb > 50 and (_models_last_used is None or 
+                (datetime.utcnow() - _models_last_used).total_seconds() > 30):
+                
+                logging.info(f"🔥 Limpieza extrema: {allocated_mb:.1f}MiB residuales detectados")
+                
+                # Estrategia 1: Reset completo del contexto CUDA
+                try:
+                    # Guardar dispositivo actual
+                    current_device = torch.cuda.current_device()
+                    
+                    # Liberar todo lo posible
+                    torch.cuda.empty_cache()
+                    import gc
+                    gc.collect()
+                    
+                    # Múltiples ciclos de limpieza
+                    for i in range(5):
+                        gc.collect()
+                        torch.cuda.empty_cache()
+                        time.sleep(0.05)
+                    
+                    # Intentar resetear el dispositivo
+                    if hasattr(torch.cuda, 'memory_snapshot'):
+                        try:
+                            torch.cuda.memory_snapshot()
+                        except:
+                            pass
+                    
+                    # Sincronizar y limpiar
+                    torch.cuda.synchronize()
+                    torch.cuda.empty_cache()
+                    
+                    # Volver al dispositivo original
+                    torch.cuda.set_device(current_device)
+                    
+                    # Verificar resultado
+                    new_allocated_mb = torch.cuda.memory_allocated(0) / 1024**2
+                    if new_allocated_mb < allocated_mb:
+                        logging.info(f"✅ Limpieza extrema exitosa: {allocated_mb:.1f}MiB -> {new_allocated_mb:.1f}MiB")
+                    
+                except Exception as e:
+                    logging.warning(f"Error en limpieza extrema: {e}")
+                
+    except Exception as e:
+        logging.error(f"Error en limpieza de VRAM: {e}")
+
+def get_vram_usage():
+    """Retorna información sobre el uso de VRAM"""
+    if torch.cuda.is_available():
+        total = torch.cuda.get_device_properties(0).total_memory / 1024**3  # GB
+        allocated = torch.cuda.memory_allocated(0) / 1024**3  # GB
+        cached = torch.cuda.memory_reserved(0) / 1024**3  # GB
+        free = total - allocated
+        
+        return {
+            'total_gb': round(total, 2),
+            'allocated_gb': round(allocated, 2), 
+            'cached_gb': round(cached, 2),
+            'free_gb': round(free, 2),
+            'whisper_loaded': _whisper_model is not None,
+            'ocr_models_loaded': _ocr_models is not None,
+            'trocr_models_loaded': _trocr_models is not None,
+            'any_models_loaded': _whisper_model is not None or _ocr_models is not None or _trocr_models is not None,
+            'last_used': _models_last_used.isoformat() if _models_last_used else None,
+            'timeout_seconds': _MODEL_TIMEOUT_SECONDS
+        }
+    else:
+        return {'error': 'CUDA not available'}
+
+def force_free_vram():
+    """Fuerza la liberación inmediata de VRAM"""
+    logging.info("🔧 Manual VRAM free triggered")
+    
+    # Forzar liberación inmediata sin esperar timeout
+    global _whisper_model, _ocr_models, _trocr_models, _models_last_used
+    
+    models_freed = []
+    
+    # Liberar todos los modelos inmediatamente
+    if _whisper_model is not None:
+        try:
+            if torch.cuda.is_available():
+                del _whisper_model
+                _whisper_model = None
+                models_freed.append("Whisper")
+        except Exception as e:
+            logging.error(f"Error freeing Whisper VRAM: {e}")
+    
+    if _ocr_models is not None:
+        try:
+            _ocr_models = None
+            models_freed.append("OCR")
+        except Exception as e:
+            logging.error(f"Error freeing OCR VRAM: {e}")
+    
+    if _trocr_models is not None:
+        try:
+            if torch.cuda.is_available():
+                model = _trocr_models.get('model') if isinstance(_trocr_models, dict) else None
+                if model is not None:
+                    model.to('cpu')
+                    models_freed.append("TrOCR")
+                torch.cuda.empty_cache()
+        except Exception as e:
+            logging.error(f"Error freeing TrOCR VRAM: {e}")
+    
+    # Limpiar variables globales
+    _whisper_model = None
+    _ocr_models = None
+    _trocr_models = None
+    _models_last_used = None
+    
+    # Forzar limpieza agresiva
+    _force_aggressive_vram_cleanup()
+    
+    if models_freed:
+        logging.info(f"🎯 Manual VRAM free - Models freed: {', '.join(models_freed)}")
+    
+    return "VRAM freed successfully"
+
+
+def ensure_local_directories() -> None:
+    """Garantiza que las carpetas locales necesarias existan."""
+    for path in (LOCAL_DOWNLOADS_PATH, LOCAL_RESUMENES, LOCAL_DOCX):
+        Path(path).mkdir(parents=True, exist_ok=True)
+
+# --- HELPER FUNCTIONS ---
+def normalize_remote_path(path):
+    """Normalize remote paths to a consistent representation."""
+    if not path:
+        return ""
+    normalized = unicodedata.normalize("NFC", str(path)).strip()
+    if not normalized:
+        return ""
+    normalized = normalized.replace("\\", "/")
+    normalized = re.sub(r"/+", "/", normalized)
+    return normalized.lstrip("/")
+
+
+def _ensure_webdav_credentials() -> None:
+    missing = [
+        name for name, value in (
+            ("NEXTCLOUD_URL", NEXTCLOUD_URL),
+            ("NEXTCLOUD_USER", NEXTCLOUD_USER),
+            ("NEXTCLOUD_PASS", NEXTCLOUD_PASS),
+        )
+        if not value
+    ]
+    if missing:
+        raise RuntimeError(
+            "Missing Nextcloud WebDAV configuration: " + ", ".join(missing)
+        )
+
+
+def _get_webdav_session() -> requests.Session:
+    global _WEBDAV_SESSION
+    if _WEBDAV_SESSION is None:
+        _ensure_webdav_credentials()
+        session = requests.Session()
+        session.auth = HTTPBasicAuth(NEXTCLOUD_USER, NEXTCLOUD_PASS)
+        adapter = HTTPAdapter(max_retries=WEBDAV_MAX_RETRIES)
+        session.mount("http://", adapter)
+        session.mount("https://", adapter)
+        _WEBDAV_SESSION = session
+    return _WEBDAV_SESSION
+
+
+def _build_webdav_url(path: str) -> str:
+    _ensure_webdav_credentials()
+    base = (WEBDAV_ENDPOINT or "").rstrip("/")
+    if not base:
+        raise RuntimeError("NEXTCLOUD_URL is not configured")
+    normalized_path = normalize_remote_path(path)
+    return f"{base}/{normalized_path}" if normalized_path else base
+
+
+def _snapshot_existing_remote_files():
+    """Collect current remote files to seed the processed registry on first run."""
+    snapshot = set()
+    targets = [
+        (REMOTE_AUDIOS_FOLDER, AUDIO_EXTENSIONS),
+        (REMOTE_PDF_FOLDER, PDF_EXTENSIONS),
+    ]
+
+    for remote_folder, extensions in targets:
+        try:
+            for remote_path in webdav_list(remote_folder):
+                normalized = normalize_remote_path(remote_path)
+                if not normalized:
+                    continue
+                if not any(normalized.lower().endswith(ext) for ext in extensions):
+                    continue
+                snapshot.add(normalized)
+        except Exception as e:
+            logging.warning(f"No se pudo obtener listado inicial de '{remote_folder}': {e}")
+
+    return snapshot
+
+
+def _initialize_processed_registry():
+    """Ensure the processed files registry exists, migrating legacy data if needed."""
+    target_dir = os.path.dirname(PROCESSED_FILES_PATH) or BASE_DIR
+
+    try:
+        os.makedirs(target_dir, exist_ok=True)
+    except Exception as e:
+        logging.error(f"No se pudo crear el directorio para el registro de procesados: {e}")
+        return
+
+    for legacy_path in LEGACY_PROCESSED_PATHS:
+        if not legacy_path:
+            continue
+        if os.path.abspath(legacy_path) == os.path.abspath(PROCESSED_FILES_PATH):
+            continue
+        if os.path.exists(legacy_path):
+            try:
+                shutil.copy2(legacy_path, PROCESSED_FILES_PATH)
+                logging.info(f"Registro de procesados migrado desde {legacy_path}")
+                return
+            except Exception as e:
+                logging.error(f"Error al migrar registro de {legacy_path}: {e}")
+
+    snapshot = _snapshot_existing_remote_files()
+    try:
+        with open(PROCESSED_FILES_PATH, "w", encoding="utf-8") as f:
+            timestamp = datetime.utcnow().isoformat() + "Z"
+            f.write(f"# Archivos procesados - inicializado {timestamp}\n")
+            for entry in sorted(snapshot):
+                f.write(entry + "\n")
+        if snapshot:
+            logging.info(f"Registro de procesados inicializado con {len(snapshot)} entradas existentes")
+    except Exception as e:
+        logging.error(f"No se pudo crear el registro de procesados: {e}")
+
+
+def load_processed_files() -> ProcessedRegistry:
+    processed: ProcessedRegistry = set()
+
+    if not os.path.exists(PROCESSED_FILES_PATH):
+        _initialize_processed_registry()
+
+    if not os.path.exists(PROCESSED_FILES_PATH):
+        logging.warning("Registro de procesados no disponible; se procesarán todos los archivos encontrados.")
+        return processed
+
+    try:
+        with open(PROCESSED_FILES_PATH, "r", encoding="utf-8") as f:
+            for raw_line in f:
+                line = raw_line.strip()
+                if not line or line.startswith('#'):
+                    continue
+
+                normalized = normalize_remote_path(line)
+                if not normalized:
+                    continue
+
+                ext = os.path.splitext(normalized)[1].lower()
+                if not ext:
+                    continue
+
+                processed.add(normalized)
+                base_name = os.path.basename(normalized)
+                processed.add(base_name)
+
+                # Retrocompatibilidad para entradas sin carpeta
+                if '/' not in normalized:
+                    if ext in AUDIO_EXTENSIONS:
+                        processed.add(f"{REMOTE_AUDIOS_FOLDER}/{base_name}")
+                    elif ext in PDF_EXTENSIONS:
+                        processed.add(f"{REMOTE_PDF_FOLDER}/{base_name}")
+
+        return processed
+    except Exception as e:
+        logging.error(f"Error reading processed files: {e}")
+        return processed
+
+
+def save_processed_file(remote_path: str) -> None:
+    normalized = normalize_remote_path(remote_path)
+    if not normalized:
+        logging.warning(f"Cannot mark empty remote path as processed: {remote_path}")
+        return
+
+    try:
+        processed: ProcessedRegistry = load_processed_files()
+        if normalized in processed or os.path.basename(normalized) in processed:
+            logging.info(f"Archivo ya marcado como procesado: {normalized}")
+            return
+
+        with open(PROCESSED_FILES_PATH, "a", encoding="utf-8") as f:
+            f.write(normalized + "\n")
+        logging.info(f"Marcado como procesado: {normalized}")
+    except Exception as e:
+        logging.error(f"Error saving processed file {normalized}: {e}")
+        # Intentar crear el archivo y reintentar
+        try:
+            os.makedirs(os.path.dirname(PROCESSED_FILES_PATH) or BASE_DIR, exist_ok=True)
+            with open(PROCESSED_FILES_PATH, "w", encoding="utf-8") as f:
+                f.write("# Archivos procesados - recreado automáticamente\n")
+                f.write(normalized + "\n")
+            logging.info(f"Archivo de procesados recreado y guardado: {normalized}")
+        except Exception as e2:
+            logging.error(f"Error recreating processed files: {e2}")
+
+def run_subprocess(cmd, timeout):
+    """Run subprocess capturing stdout/stderr and raise a descriptive error on failure."""
+    cp = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
+    if cp.returncode != 0:
+        stderr = cp.stderr.strip()
+        stdout = cp.stdout.strip()
+        raise Exception(f"Command {cmd} failed (rc={cp.returncode}). stderr: {stderr!s} stdout: {stdout!s}")
+    return cp
+
+def clean_filename(name):
+    """Reemplaza caracteres problemáticos para WebDAV/Nextcloud"""
+    name = re.sub(r'[\\/:"*?<>|]+', '_', name)
+    name = name.replace('...', '_')
+    name = name.replace(' ', '_')
+    return name
+
+# --- WEBDAV FUNCTIONS ---
+def webdav_list(path: str) -> list[str]:
+    """Lista archivos en una carpeta de Nextcloud usando PROPFIND."""
+    session = _get_webdav_session()
+    normalized_target = normalize_remote_path(path)
+    response = None
+    try:
+        response = session.request(
+            "PROPFIND",
+            _build_webdav_url(normalized_target),
+            headers={"Depth": "1"},
+            timeout=HTTP_TIMEOUT,
+        )
+        response.raise_for_status()
+
+        root = ET.fromstring(response.content)
+        files: list[str] = []
+        prefixes = ["/remote.php/webdav/"]
+        if NEXTCLOUD_USER:
+            prefixes.append(f"/remote.php/dav/files/{NEXTCLOUD_USER}/")
+
+        for response_node in root.findall("{DAV:}response"):
+            href_element = response_node.find("{DAV:}href")
+            if href_element is None or not href_element.text:
+                continue
+
+            relative_path = requests.utils.unquote(href_element.text)
+            for prefix in prefixes:
+                if relative_path.startswith(prefix):
+                    relative_path = relative_path[len(prefix):]
+
+            normalized_response = normalize_remote_path(relative_path)
+            if not normalized_response or normalized_response.endswith('/'):
+                continue
+            if normalized_response.strip('/') == normalized_target.strip('/'):
+                continue
+            files.append(normalized_response)
+
+        return files
+    except Exception as exc:
+        logging.error(f"WebDAV LIST falló para '{path}': {exc}")
+        return []
+    finally:
+        if response is not None:
+            response.close()
+
+
+def webdav_download(remote_path: str, local_path: str) -> None:
+    """Descarga un archivo de Nextcloud."""
+    session = _get_webdav_session()
+    local_file = Path(local_path)
+    local_file.parent.mkdir(parents=True, exist_ok=True)
+
+    response = session.get(
+        _build_webdav_url(remote_path),
+        stream=True,
+        timeout=HTTP_TIMEOUT,
+    )
+    try:
+        response.raise_for_status()
+        with local_file.open('wb') as handle:
+            for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
+                if chunk:
+                    handle.write(chunk)
+    finally:
+        response.close()
+
+
+def webdav_upload(local_path: str, remote_path: str) -> None:
+    """Sube un archivo a Nextcloud."""
+    session = _get_webdav_session()
+    with open(local_path, 'rb') as payload:
+        response = session.put(
+            _build_webdav_url(remote_path),
+            data=payload,
+            timeout=HTTP_TIMEOUT,
+        )
+        response.raise_for_status()
+
+
+def webdav_mkdir(remote_path: str) -> None:
+    """Crea una carpeta en Nextcloud."""
+    session = _get_webdav_session()
+    response = None
+    try:
+        response = session.request(
+            "MKCOL",
+            _build_webdav_url(remote_path),
+            timeout=HTTP_TIMEOUT,
+        )
+        if response.status_code in (200, 201, 204, 405):
+            return
+        response.raise_for_status()
+    except Exception as exc:
+        logging.error(f"WebDAV MKCOL falló para '{remote_path}': {exc}")
+    finally:
+        if response is not None:
+            response.close()
+
+
+# --- CLAUDE (GLM-4.6) HELPERS ---
+def get_claude_env(model: Optional[str] = None) -> Dict[str, str]:
+    env = os.environ.copy()
+    env.setdefault('ANTHROPIC_BASE_URL', ZAI_BASE_URL)
+    if ZAI_AUTH_TOKEN_FALLBACK:
+        env.setdefault('ANTHROPIC_AUTH_TOKEN', ZAI_AUTH_TOKEN_FALLBACK)
+    env['CLAUDE_DANGEROUSLY_SKIP_PERMISSIONS'] = '1'
+
+    chosen_model = model or ZAI_DEFAULT_MODEL
+    if chosen_model:
+        env.setdefault('CLAUDE_MODEL', chosen_model)
+        env.setdefault('CLAUDE_DEFAULT_MODEL', chosen_model)
+        env.setdefault('ANTHROPIC_DEFAULT_MODEL', chosen_model)
+
+    return env
+
+
+def run_claude_cli(prompt: str, timeout: int = 300, model: Optional[str] = None) -> str:
+    env = get_claude_env(model)
+    cmd = ['claude', '--dangerously-skip-permissions']
+
+    process = subprocess.run(
+        cmd,
+        input=prompt,
+        env=env,
+        text=True,
+        capture_output=True,
+        timeout=timeout,
+    )
+
+    if process.returncode != 0:
+        stderr = (process.stderr or '').strip()
+        stdout = (process.stdout or '').strip()
+        message = stderr or stdout or 'sin salida'
+        raise RuntimeError(f"Claude CLI failed (rc={process.returncode}): {message}")
+
+    return (process.stdout or '').strip()
+
+def _get_gemini_env(model_name: Optional[str] = None) -> Dict[str, str]:
+    env = os.environ.copy()
+    if GEMINI_API_KEY:
+        env.setdefault("GEMINI_API_KEY", GEMINI_API_KEY)
+    if model_name:
+        env.setdefault("GEMINI_MODEL", model_name)
+    return env
+
+def _call_gemini_api(prompt: str, use_flash: bool = True, timeout: int = 180) -> str:
+    if not GEMINI_API_KEY:
+        raise RuntimeError("Gemini API key not configured")
+
+    if use_flash:
+        model = GEMINI_FLASH_MODEL or "gemini-2.5-flash"
+    else:
+        model = GEMINI_PRO_MODEL or "gemini-2.5-pro-preview-06-05"
+    endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
+    payload = {
+        "contents": [
+            {
+                "parts": [
+                    {"text": prompt}
+                ]
+            }
+        ]
+    }
+
+    try:
+        response = requests.post(
+            endpoint,
+            params={"key": GEMINI_API_KEY},
+            json=payload,
+            timeout=timeout,
+        )
+        response.raise_for_status()
+    except requests.RequestException as exc:
+        raise RuntimeError(f"Gemini API request failed: {exc}") from exc
+
+    try:
+        data = response.json()
+    except ValueError as exc:
+        raise RuntimeError("Gemini API returned a non-JSON response") from exc
+
+    prompt_feedback = data.get("promptFeedback", {})
+    if prompt_feedback.get("blockReason"):
+        raise RuntimeError(f"Gemini prompt blocked: {prompt_feedback.get('blockReason')}")
+
+    candidates = data.get("candidates") or []
+    for candidate in candidates:
+        finish_reason = candidate.get("finishReason")
+        if finish_reason and finish_reason not in ("STOP", "FINISH_REASON_UNSPECIFIED"):
+            logging.warning(f"Gemini candidate finalizado con estado {finish_reason}, intentando leer contenido igualmente.")
+        parts = candidate.get("content", {}).get("parts", []) or []
+        texts = [part.get("text", "") for part in parts if part.get("text")]
+        if texts:
+            return "\n".join(texts).strip()
+
+    raise RuntimeError("Gemini API returned empty response")
+
+def _call_gemini_cli(prompt: str, use_yolo: bool = True, timeout: int = 300) -> str:
+    if not GEMINI_CLI_PATH:
+        raise FileNotFoundError("Gemini CLI binary not found")
+
+    cmd = [GEMINI_CLI_PATH]
+    if use_yolo:
+        cmd.append("--yolo")
+
+    model_name = (GEMINI_FLASH_MODEL or "gemini-2.5-flash") if use_yolo else (GEMINI_PRO_MODEL or "gemini-2.5-pro-preview-06-05")
+
+    process = subprocess.run(
+        cmd,
+        input=prompt,
+        env=_get_gemini_env(model_name),
+        text=True,
+        capture_output=True,
+        timeout=timeout,
+    )
+
+    if process.returncode != 0:
+        stderr = (process.stderr or '').strip()
+        stdout = (process.stdout or '').strip()
+        message = stderr or stdout or 'sin salida'
+        raise RuntimeError(f"Gemini CLI failed (rc={process.returncode}): {message}")
+
+    output = (process.stdout or '').strip()
+    if not output:
+        raise RuntimeError("Gemini CLI returned empty output")
+    return output
+
+# --- AUDIO PROCESSING FUNCTIONS ---
+def transcribe_audio(audio_path, output_path):
+    """Transcribe audio usando Whisper con configuración optimizada para español"""
+    global _whisper_model
+
+    # Check and free VRAM if models are idle
+    _check_and_free_vram()
+
+    # Load Whisper model if not already loaded
+    if _whisper_model is None:
+        try:
+            logging.info("Loading Whisper model (medium) for Spanish transcription...")
+            # Liberar memoria CUDA primero
+            torch.cuda.empty_cache()
+            # Configurar entorno para mejor manejo de CUDA
+            os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+            os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
+            _whisper_model = whisper.load_model("medium", device="cuda")
+            logging.info("✅ Whisper model loaded successfully on GPU")
+        except RuntimeError as e:
+            if "CUDA" in str(e) or "GPU" in str(e):
+                error_msg = f"❌ Error cargando Whisper en GPU: {e}"
+                logging.error(error_msg)
+                send_telegram_message(error_msg)
+                # Liberar memoria y reintentar
+                torch.cuda.empty_cache()
+                time.sleep(2)
+                _whisper_model = whisper.load_model("medium", device="cuda")
+                logging.info("✅ Whisper model loaded successfully on GPU (retry)")
+            else:
+                raise
+
+    # Update usage timestamp
+    _update_models_usage()
+
+    logging.info("Starting audio transcription with Spanish optimization...")
+    try:
+        # Configuración más rápida para español
+        result = _whisper_model.transcribe(
+            audio_path,
+            language="es",  # Forzar español
+            task="transcribe",
+            temperature=0.0,  # Menos aleatoriedad
+            beam_size=1,  # Más rápido
+            condition_on_previous_text=False,  # Evitar bucles
+            fp16=True,  # Más rápido
+            verbose=False
+        )
+    except RuntimeError as e:
+        if "CUDA" in str(e):
+            error_msg = f"❌ CUDA error durante transcripción: {e}"
+            logging.error(error_msg)
+            send_telegram_message(error_msg)
+            # Reintentar con GPU con configuración más ligera
+            try:
+                logging.info("🔄 Reintentando transcripción con GPU (config ligera)...")
+                if _whisper_model is not None:
+                    del _whisper_model
+                    torch.cuda.empty_cache()
+                    time.sleep(2)
+                _whisper_model = whisper.load_model("base", device="cuda")
+                result = _whisper_model.transcribe(
+                    audio_path,
+                    language="es",
+                    task="transcribe",
+                    temperature=0.0,
+                    best_of=3,
+                    beam_size=3,
+                    patience=1.0,
+                    initial_prompt="Este es un audio en español. Hablará claramente y de forma fluida.",
+                    condition_on_previous_text=True,
+                    verbose=True
+                )
+                logging.info("✅ Transcripción completada con GPU (modelo base)")
+            except Exception as gpu_error:
+                logging.error(f"❌ Error crítico en transcripción con GPU: {gpu_error}")
+                raise RuntimeError(f"❌ Error crítico en transcripción: {gpu_error}")
+        else:
+            raise
+
+    # Actualizar timestamp durante el procesamiento
+    _update_models_usage()
+
+    # Post-procesamiento para mejorar español
+    with open(output_path, "w", encoding="utf-8") as f:
+        for seg in result["segments"]:
+            start = int(seg["start"])
+            hours = start // 3600
+            minutes = (start % 3600) // 60
+            seconds = start % 60
+            timestamp = f"[{hours:02}:{minutes:02}:{seconds:02}]"
+
+            # Limpiar y normalizar texto
+            text = seg['text'].strip()
+
+            # Correcciones comunes para español (gallego a español)
+            text = text.replace("xeo", "yo")
+            text = text.replace("non", "no")
+            text = text.replace("hai", "hay")
+            text = text.replace("entóns", "entonces")
+            text = text.replace("máis", "más")
+            text = text.replace("tamén", "también")
+            text = text.replace("sempre", "siempre")
+            text = text.replace("verdade", "verdad")
+            text = text.replace("cousa", "cosa")
+            text = text.replace("xente", "gente")
+            text = text.replace("tempo", "tiempo")
+            text = text.replace("lingua", "lengua")
+            text = text.replace("pode", "puede")
+            text = text.replace("xamón", "shogun")
+            text = text.replace("xomón", "shogun")
+            text = text.replace("unha", "una")
+            text = text.replace("dunha", "de una")
+            text = text.replace("nunha", "en una")
+            text = text.replace("xeral", "general")
+            text = text.replace("xeraria", "jerarquía")
+            text = text.replace("ximéas", "temas")
+            text = text.replace("ximeas", "temas")
+            text = text.replace("ronquera", "reunión")
+            text = text.replace("xocalizar", "juntar")
+            text = text.replace("oanxacular", "juntar")
+            text = text.replace("xocal", "junto")
+            text = text.replace("lúmulo", "grupo")
+            text = text.replace("lúmido", "grupo")
+            text = text.replace("lúmada", "grupos")
+            text = text.replace("nulunxación", "reunificación")
+            text = text.replace("xotalipa", "capitalista")
+            text = text.replace("crente", "gente")
+            text = text.replace("enxucar", "juntar")
+
+            # Normalizar puntuación y espacios
+            text = re.sub(r'\s+', ' ', text)
+            text = text.strip()
+
+            f.write(f"{timestamp} {text}\n")
+
+    # Actualizar timestamp al finalizar
+    _update_models_usage()
+    logging.info(f"Transcription saved to {output_path}")
+
+def run_gemini(prompt, use_flash=True):
+    """Genera contenido usando Claude (GLM-4.6) con fallback a la CLI y API de Gemini."""
+    claude_error = None
+    gemini_cli_error = None
+
+    if CLAUDE_CLI_PATH or ZAI_AUTH_TOKEN_FALLBACK:
+        try:
+            return run_claude_cli(prompt, timeout=300)
+        except FileNotFoundError as exc:
+            claude_error = exc
+            logging.warning("Claude CLI no disponible, utilizando Gemini como fallback.")
+        except Exception as exc:
+            claude_error = exc
+            logging.error(f"Claude CLI error: {exc}")
+
+    if GEMINI_CLI_PATH:
+        try:
+            result = _call_gemini_cli(prompt, use_yolo=True)
+            if claude_error:
+                logging.info("Gemini CLI respondió correctamente tras fallo de Claude CLI.")
+            return result
+        except FileNotFoundError as exc:
+            gemini_cli_error = exc
+            logging.warning("Gemini CLI no disponible en el sistema.")
+        except Exception as exc:
+            gemini_cli_error = exc
+            logging.error(f"Gemini CLI error: {exc}")
+
+    if GEMINI_API_KEY:
+        try:
+            result = _call_gemini_api(prompt, use_flash=use_flash)
+            if claude_error or gemini_cli_error:
+                logging.info("Gemini API respondió correctamente tras fallos previos.")
+            return result
+        except Exception as gemini_exc:
+            logging.error(f"Gemini API error: {gemini_exc}")
+            errors = []
+            if claude_error:
+                errors.append(f"Claude CLI: {claude_error}")
+            if gemini_cli_error:
+                errors.append(f"Gemini CLI: {gemini_cli_error}")
+            if errors:
+                errors.append(f"Gemini API: {gemini_exc}")
+                return " ; ".join(f"Error {e}" for e in errors)
+            return f"Error Gemini API: {gemini_exc}"
+
+    if claude_error:
+        base_error = f"Error Claude CLI: {claude_error}"
+        if gemini_cli_error:
+            return f"{base_error}; Error Gemini CLI: {gemini_cli_error}"
+        return base_error
+
+    if gemini_cli_error:
+        return f"Error Gemini CLI: {gemini_cli_error}"
+
+    return "Error: No hay servicios de resumen disponibles (Claude/Gemini)."
+
+def run_gemini_api_fallback(prompt, use_flash=True):
+    """Compatibilidad: delega en la misma llamada local."""
+    return run_gemini(prompt, use_flash=use_flash)
+
+def run_gemini_summary(prompt):
+    """Genera resumen usando GLM-4.6 (compatibilidad)."""
+    return run_gemini(prompt, use_flash=True)
+
+def run_ollama(prompt):
+    """Genera contenido usando Ollama"""
+    payload = {
+        "model": OLLAMA_MODEL,
+        "messages": [{"role": "user", "content": prompt}],
+        "stream": False
+    }
+    try:
+        r = requests.post(f"{OLLAMA_HOST}/api/chat", json=payload, timeout=120)
+        r.raise_for_status()
+        response = r.json()
+        return response['message']['content']
+    except Exception as e:
+        return f"Error Ollama: {e}"
+
+# --- CLASIFICACIÓN INTELIGENTE DE CONTENIDO ---
+def classify_content_intelligent(text_content):
+    """Clasifica el contenido del resumen en categorías temáticas usando IA"""
+    classification_prompt = f"""
+Analiza el siguiente contenido y clasifícalo en UNA de estas 4 categorías:
+
+1. HISTORIA - Contenido sobre eventos históricos, cronologías, guerras, revoluciones, personajes históricos, civilizaciones antiguas, historia política, social o económica.
+
+2. ANALISIS CONTABLE - Contenido sobre contabilidad, finanzas, balances, estados financieros, costos, presupuestos, auditorías, impuestos, análisis de inversiones, contabilidad de costos.
+
+3. INSTITUCIONES DEL GOBIERNO - Contenido sobre gobierno, política, ideologías políticas, instituciones estatales, administración pública, leyes, reglamentos, políticas públicas, estructura gubernamental.
+
+4. OTRAS CLASES - Contenido que no encaja en las categorías anteriores: ciencias, tecnología, literatura, arte, filosofía, educación, medicina, derecho, etc.
+
+Instrucciones:
+- Responde ÚNICAMENTE con el nombre de la categoría (HISTORIA, ANALISIS CONTABLE, INSTITUCIONES DEL GOBIERNO, OTRAS CLASES)
+- No incluyas explicaciones ni texto adicional
+- Basa tu decisión en el contenido general del texto
+
+Contenido a clasificar:
+{text_content}
+"""
+
+    try:
+        # Usar GLM-4.6 para la clasificación
+        classification = run_gemini_summary(classification_prompt)
+
+        # Limpiar y normalizar la respuesta
+        classification = classification.strip().upper()
+
+        # Mapear las respuestas a las claves del diccionario
+        category_mapping = {
+            "HISTORIA": "historia",
+            "ANALISIS CONTABLE": "analisis_contable",
+            "ANALISIS CONTABLE": "analisis_contable",
+            "INSTITUCIONES DEL GOBIERNO": "instituciones_gobierno",
+            "INSTITUCIONES DE GOBIERNO": "instituciones_gobierno",
+            "GOBIERNO": "instituciones_gobierno",
+            "POLITICA": "instituciones_gobierno",
+            "POLÍTICA": "instituciones_gobierno",
+            "OTRAS CLASES": "otras_clases",
+            "OTRAS": "otras_clases"
+        }
+
+        # Buscar coincidencia exacta primero
+        if classification in category_mapping:
+            return category_mapping[classification]
+
+        # Si no hay coincidencia exacta, buscar por palabras clave
+        for key, value in category_mapping.items():
+            if key in classification:
+                return value
+
+        # Si no se puede clasificar, usar categoría por defecto
+        logging.warning(f"⚠️ No se pudo clasificar el contenido: '{classification}', usando categoría por defecto")
+        return "otras_clases"
+
+    except Exception as e:
+        logging.error(f"❌ Error en clasificación inteligente: {e}")
+        return "otras_clases"  # Categoría por defecto en caso de error
+
+def ensure_thematic_folders_exist():
+    """Asegura que las carpetas temáticas existan en Nextcloud"""
+    for folder_key, folder_name in TEMATIC_FOLDERS.items():
+        try:
+            webdav_mkdir(folder_name)
+            logging.info(f"📁 Verificada/creada carpeta: {folder_name}")
+        except Exception as e:
+            logging.error(f"❌ Error creando carpeta {folder_name}: {e}")
+
+def get_upload_path_for_category(category_key, filename):
+    """Retorna la ruta de subida según la categoría"""
+    if category_key in TEMATIC_FOLDERS:
+        folder_name = TEMATIC_FOLDERS[category_key]
+        return os.path.join(folder_name, filename)
+    else:
+        # Por defecto usar Otras Clases
+        return os.path.join(TEMATIC_FOLDERS["otras_clases"], filename)
+
+# --- EXTRACCIÓN DE TEMAS Y RENOMBRADO AUTOMÁTICO ---
+def extract_key_topics_from_text(text):
+    """Extrae temas principales del texto usando IA"""
+    if not text or len(text) < 100:
+        return ["Temas principales"]
+
+    topics_prompt = f"""
+Analiza el siguiente texto y extrae los 2-3 temas principales más importantes.
+Responde ÚNICAMENTE con los temas separados por comas, sin explicaciones.
+Usa máximo 3 palabras por tema.
+
+Ejemplos de respuesta correcta:
+"Revolución Francesa, Ilustración, Monarquía"
+"Contabilidad financiera, Estados contables, Análisis de ratios"
+"Gobierno democrático, Separación de poderes, Constitución"
+
+Texto a analizar:
+{text[:2000]}  # Limitar texto para no exceder tokens
+"""
+
+    try:
+        topics_response = run_gemini_summary(topics_prompt)
+
+        # Limpiar y procesar la respuesta
+        topics = []
+        for topic in topics_response.split(','):
+            topic = topic.strip().title()
+            if topic and len(topic) > 2:
+                # Limpiar caracteres no deseados
+                topic = re.sub(r'[^\w\sáéíóúüñÁÉÍÓÚÜÑ-]', '', topic)
+                if topic:
+                    topics.append(topic)
+
+        # Asegurar al menos 2 temas
+        if len(topics) == 1 and len(topics[0]) > 20:
+            # Si el tema es muy largo, dividirlo
+            words = topics[0].split()
+            if len(words) >= 4:
+                topics = [words[0] + " " + words[1], words[2] + " " + words[3]]
+        elif len(topics) < 2:
+            topics.append("Temas principales")
+
+        # Limitar a 2-3 temas
+        topics = topics[:3]
+
+        return topics
+
+    except Exception as e:
+        logging.error(f"Error extrayendo temas: {e}")
+        return ["Temas principales", "Contenido académico"]
+
+def clean_filename_for_topics(name: str, max_length: Optional[int] = None) -> str:
+    """Normaliza un nombre de archivo, preservando la extensión."""
+    if not name:
+        return "archivo"
+
+    sanitized = re.sub(r'[<>:"/\\|?*]+', '', name)
+    sanitized = re.sub(r'\s+', ' ', sanitized).strip()
+    if not sanitized:
+        return "archivo"
+
+    limit = max_length or MAX_FILENAME_LENGTH
+    if limit <= 0:
+        return sanitized
+
+    if len(sanitized) <= limit:
+        return sanitized
+
+    stem, ext = os.path.splitext(sanitized)
+    if not ext:
+        truncated = sanitized[:limit].rstrip(' .-_')
+        return truncated or "archivo"
+
+    available = max(1, limit - len(ext))
+    truncated_stem = stem[:available].rstrip(' .-_')
+    if not truncated_stem:
+        truncated_stem = "archivo"
+
+    candidate = f"{truncated_stem}{ext}"
+    if len(candidate) <= limit:
+        return candidate
+
+    # Ajuste final si la extensión por sí sola excede el límite
+    if len(ext) >= limit:
+        return ext[-limit:]
+
+    final_stem = truncated_stem[: limit - len(ext)].rstrip(' .-_') or "archivo"
+    return f"{final_stem}{ext}"
+
+
+def ensure_unique_local_filename(directory: Path, filename: str) -> str:
+    """Garantiza que el nombre no colisione en el directorio indicado."""
+    candidate = clean_filename_for_topics(filename, MAX_FILENAME_LENGTH)
+    path = directory / candidate
+    if not path.exists():
+        return candidate
+
+    stem, ext = os.path.splitext(candidate)
+    counter = 1
+    while True:
+        suffix = f"-{counter}"
+        new_name = f"{stem}{suffix}{ext}"
+        new_name = clean_filename_for_topics(new_name, MAX_FILENAME_LENGTH)
+        if not (directory / new_name).exists():
+            return new_name
+        counter += 1
+
+
+def _append_markdown_to_doc(doc: Document, markdown_text: str) -> None:
+    lines = markdown_text.splitlines()
+    current_paragraph = []
+
+    for raw_line in lines:
+        line = raw_line.rstrip()
+
+        if not line.strip():
+            if current_paragraph:
+                doc.add_paragraph(' '.join(current_paragraph))
+                current_paragraph = []
+            continue
+
+        stripped = line.lstrip()
+
+        if stripped.startswith('#'):
+            if current_paragraph:
+                doc.add_paragraph(' '.join(current_paragraph))
+                current_paragraph = []
+            level = len(stripped) - len(stripped.lstrip('#'))
+            heading_text = stripped.lstrip('#').strip()
+            if heading_text:
+                doc.add_heading(heading_text, level=max(1, min(6, level)))
+            continue
+
+        if stripped.startswith(('-', '*', '•')):
+            if current_paragraph:
+                doc.add_paragraph(' '.join(current_paragraph))
+                current_paragraph = []
+            bullet_text = stripped.lstrip('-*• ').strip()
+            if bullet_text:
+                doc.add_paragraph(bullet_text, style='List Bullet')
+            continue
+
+        current_paragraph.append(line.strip())
+
+    if current_paragraph:
+        doc.add_paragraph(' '.join(current_paragraph))
+
+
+def markdown_to_docx(markdown_text: str, output_path: Path, quiz_source: Optional[str] = None) -> None:
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    doc = Document()
+    doc.add_heading('Resumen generado con GLM-4.6', level=1)
+    doc.add_paragraph('Este documento fue sintetizado automáticamente usando GLM-4.6 a través de la CLI de Claude (z.ai).')
+    doc.add_page_break()
+
+    _append_markdown_to_doc(doc, markdown_text)
+
+    quiz_input = quiz_source or markdown_text
+    if quiz_input:
+        logging.info("🎯 Generando quiz con GLM-4.6...")
+        try:
+            questions, answers = generate_quiz(quiz_input)
+            if questions and answers:
+                add_quiz_to_docx(doc, questions, answers)
+                logging.info("✅ Quiz agregado al documento")
+        except Exception as quiz_error:
+            logging.error(f"❌ Error generando quiz: {quiz_error}")
+
+    doc.save(str(output_path))
+
+
+def markdown_to_pdf(markdown_text: str, pdf_path: Path, title: Optional[str] = None) -> None:
+    pdf_path.parent.mkdir(parents=True, exist_ok=True)
+    canvas_obj = canvas.Canvas(str(pdf_path), pagesize=letter)
+    width, height = letter
+    margin = 72
+    y_position = height - margin
+
+    def new_page():
+        nonlocal y_position
+        canvas_obj.showPage()
+        canvas_obj.setFont('Helvetica', 11)
+        y_position = height - margin
+
+    canvas_obj.setFont('Helvetica', 11)
+
+    if title:
+        canvas_obj.setFont('Helvetica-Bold', 16)
+        canvas_obj.drawString(margin, y_position, title[:100])
+        y_position -= 28
+        canvas_obj.setFont('Helvetica', 11)
+
+    for raw_line in markdown_text.splitlines():
+        line = raw_line.rstrip()
+
+        if not line.strip():
+            y_position -= 14
+            if y_position < margin:
+                new_page()
+            continue
+
+        stripped = line.lstrip()
+
+        if stripped.startswith('#'):
+            level = len(stripped) - len(stripped.lstrip('#'))
+            heading_text = stripped.lstrip('#').strip()
+            if heading_text:
+                font_size = 16 if level == 1 else 14 if level == 2 else 12
+                canvas_obj.setFont('Helvetica-Bold', font_size)
+                canvas_obj.drawString(margin, y_position, heading_text[:120])
+                y_position -= font_size + 6
+                if y_position < margin:
+                    new_page()
+                canvas_obj.setFont('Helvetica', 11)
+            continue
+
+        if stripped.startswith(('-', '*', '•')):
+            bullet_text = stripped.lstrip('-*•').strip()
+            wrapped_lines = textwrap.wrap(bullet_text, width=80) or ['']
+            for idx, wrapped in enumerate(wrapped_lines):
+                prefix = '• ' if idx == 0 else '  '
+                canvas_obj.drawString(margin, y_position, f"{prefix}{wrapped}")
+                y_position -= 14
+                if y_position < margin:
+                    new_page()
+            continue
+
+        wrapped_lines = textwrap.wrap(stripped, width=90) or ['']
+        for wrapped in wrapped_lines:
+            canvas_obj.drawString(margin, y_position, wrapped)
+            y_position -= 14
+            if y_position < margin:
+                new_page()
+
+    canvas_obj.save()
+
+def generate_intelligent_filename(base_name, summary_content):
+    """Genera nombre de archivo inteligente con temas extraídos"""
+    try:
+        # Extraer temas principales
+        topics = extract_key_topics_from_text(summary_content)
+        topics_str = ' - '.join(topics)
+
+        # Limpiar el nombre base original con una longitud razonable
+        clean_base = clean_filename_for_topics(
+            base_name.replace('_unificado', ''),
+            MAX_FILENAME_BASE_LENGTH,
+        )
+        if clean_base.lower() == "archivo":
+            clean_base = "Resumen"
+
+        clean_topics = ''
+        if topics_str:
+            clean_topics = clean_filename_for_topics(topics_str, MAX_FILENAME_TOPICS_LENGTH)
+            if clean_topics.lower() == "archivo":
+                clean_topics = ''
+
+        parts = [clean_base]
+        if clean_topics:
+            parts.append(clean_topics)
+
+        candidate = ' - '.join(parts) + '_unificado.docx'
+        intelligent_name = clean_filename_for_topics(candidate, MAX_FILENAME_LENGTH)
+
+        logging.info(f"🎯 Temas extraídos: {topics_str}")
+        return intelligent_name
+
+    except Exception as e:
+        logging.error(f"Error generando nombre inteligente: {e}")
+        # Retornar nombre por defecto si falla
+        return f"{base_name}_unificado.docx"
+
+# --- QUIZ GENERATION FUNCTIONS ---
+def generate_quiz(summary_text):
+    """Genera un quiz de 10 preguntas basado en el resumen"""
+    prompt = f"""
+Basándote en el siguiente resumen, genera exactamente 10 preguntas de opción múltiple en español.
+Cada pregunta debe tener 4 opciones (A, B, C, D) y solo una respuesta correcta.
+Las preguntas deben cubrir los puntos más importantes del resumen.
+
+Formato requerido:
+PREGUNTA 1: [texto de la pregunta]
+A) [opción A]
+B) [opción B] 
+C) [opción C]
+D) [opción D]
+RESPUESTA: [letra correcta]
+
+PREGUNTA 2: [texto de la pregunta]
+A) [opción A]
+B) [opción B]
+C) [opción C] 
+D) [opción D]
+RESPUESTA: [letra correcta]
+
+[continúa hasta la pregunta 10]
+
+Resumen:
+{summary_text}
+"""
+    
+    logging.info("🎯 Generating quiz with GLM-4.6...")
+    response = run_gemini(prompt)
+    
+    if "Error" in response:
+        logging.error(f"❌ Error generating quiz: {response}")
+        return None, None
+    
+    # Parse response to separate questions and answers
+    questions = []
+    answers = []
+    
+    lines = response.strip().split('\n')
+    current_question = None
+    current_options = []
+    
+    for line in lines:
+        line = line.strip()
+        if line.startswith('PREGUNTA'):
+            if current_question:
+                questions.append(f"{current_question}\n" + "\n".join(current_options))
+                current_options = []
+            current_question = line
+        elif line.startswith(('A)', 'B)', 'C)', 'D)')):
+            current_options.append(line)
+        elif line.startswith('RESPUESTA:'):
+            answer = line.replace('RESPUESTA:', '').strip()
+            answers.append(answer)
+    
+    # Add the last question
+    if current_question:
+        questions.append(f"{current_question}\n" + "\n".join(current_options))
+    
+    return questions, answers
+
+def add_quiz_to_docx(doc, questions, answers):
+    """Agrega el quiz al documento DOCX"""
+    doc.add_page_break()
+    doc.add_heading('Quiz de Evaluación', level=1)
+    doc.add_paragraph('Responde las siguientes preguntas basándote en el resumen anterior.')
+    doc.add_paragraph('')
+    
+    # Add questions
+    for i, question in enumerate(questions, 1):
+        doc.add_paragraph(question)
+        doc.add_paragraph('')
+    
+    # Add answers
+    doc.add_page_break()
+    doc.add_heading('Respuestas del Quiz', level=1)
+    
+    for i, answer in enumerate(answers, 1):
+        doc.add_paragraph(f"Pregunta {i}: {answer}")
+
+# --- DOCUMENT GENERATION FUNCTIONS ---
+def save_summary_docx(content, model_name, filename, text_for_quiz=None):
+    """Guarda el resumen en formato DOCX con formato mejorado (legacy function)"""
+    doc = Document()
+    doc.add_heading('Resumen generado', level=1)
+
+    # Procesar contenido
+    lines = content.splitlines()
+    current_paragraph = []
+    
+    for line in lines:
+        line = line.strip()
+        if not line:
+            if current_paragraph:
+                doc.add_paragraph(' '.join(current_paragraph))
+                current_paragraph = []
+            continue
+            
+        if line.startswith('#'):
+            if current_paragraph:
+                doc.add_paragraph(' '.join(current_paragraph))
+                current_paragraph = []
+            # Procesar encabezado
+            level = len(line) - len(line.lstrip('#'))
+            if level <= 6:
+                doc.add_heading(line.lstrip('#').strip(), level=level)
+            else:
+                current_paragraph.append(line)
+        elif line.startswith('-') or line.startswith('•'):
+            if current_paragraph:
+                doc.add_paragraph(' '.join(current_paragraph))
+                current_paragraph = []
+            doc.add_paragraph(line.lstrip('-•').strip(), style='List Bullet')
+        else:
+            current_paragraph.append(line)
+    
+    if current_paragraph:
+        doc.add_paragraph(' '.join(current_paragraph))
+
+    # Add quiz if text is provided
+    if text_for_quiz:
+        logging.info("🎯 Generating quiz...")
+        try:
+            quiz_text = text_for_quiz if text_for_quiz else content
+            questions, answers = generate_quiz(quiz_text)
+            if questions and answers:
+                add_quiz_to_docx(doc, questions, answers)
+                logging.info("✅ Quiz added to document")
+        except Exception as e:
+            logging.error(f"❌ Error generating quiz: {e}")
+
+    doc.save(filename)
+
+def run_claude_summary_pipeline(text):
+    """Genera bullet points, resumen integrado y formato final usando Claude CLI con chunks."""
+
+    # Validar que el texto tenga contenido suficiente
+    if not text or len(text.strip()) < 50:
+        logging.warning("⚠️ Texto demasiado corto para generar resumen, usando contenido por defecto")
+        text = "Contenido educativo procesado. Se generó un documento editable a partir de un archivo PDF."
+
+    # Dividir texto en partes si es muy largo
+    max_chunk_size = 6000  # Caracteres por chunk (más grande para Claude)
+    if len(text) > max_chunk_size:
+        logging.info(f"📝 Dividiendo texto de {len(text)} caracteres en chunks de {max_chunk_size}")
+        text_chunks = []
+
+        # Dividir por párrafos para mantener coherencia
+        paragraphs = text.split('\n\n')
+        current_chunk = ""
+
+        for paragraph in paragraphs:
+            if len(current_chunk + paragraph) <= max_chunk_size:
+                current_chunk += paragraph + "\n\n"
+            else:
+                if current_chunk.strip():
+                    text_chunks.append(current_chunk.strip())
+                current_chunk = paragraph + "\n\n"
+
+        if current_chunk.strip():
+            text_chunks.append(current_chunk.strip())
+
+        logging.info(f"📝 Texto dividido en {len(text_chunks)} partes")
+    else:
+        text_chunks = [text]
+
+    logging.info("🔹 Claude CLI generando bullet points por partes...")
+
+    # Generar bullet points para cada chunk usando Claude CLI
+    all_bullets = []
+    for i, chunk in enumerate(text_chunks):
+        logging.info(f"🔹 Procesando chunk {i+1}/{len(text_chunks)} con Claude CLI...")
+
+        bullet_prompt = f"""Analiza el siguiente texto y extrae entre 5 y 8 bullet points clave en español.
+
+REGLAS ESTRICTAS:
+1. Devuelve ÚNICAMENTE bullet points, cada línea iniciando con "- "
+2. Cada bullet debe ser conciso (12-20 palabras) y resaltar datos, fechas, conceptos o conclusiones importantes
+3. NO agregues introducciones, conclusiones ni texto explicativo
+4. Concéntrate en los puntos más importantes del texto
+5. Incluye fechas, datos específicos y nombres relevantes si los hay
+
+Texto (parte {i+1} de {len(text_chunks)}):
+{chunk}"""
+
+        try:
+            chunk_bullets = run_claude_cli(bullet_prompt, timeout=300)
+            logging.info(f"✅ Claude CLI responded successfully for chunk {i+1}")
+
+        except subprocess.TimeoutExpired:
+            logging.warning(f"⚠️ Claude CLI timeout for chunk {i+1}, usando fallback")
+            chunk_bullets = f"- Punto principal de la sección {i+1}\n- Concepto secundario importante\n- Información relevante extraída\n- Datos significativos del texto\n- Conclusiones clave"
+        except Exception as e:
+            logging.warning(f"⚠️ Claude CLI error for chunk {i+1}: {e}")
+            chunk_bullets = f"- Punto principal de la sección {i+1}\n- Concepto secundario importante\n- Información relevante extraída\n- Datos significativos del texto\n- Conclusiones clave"
+
+        # Procesar bullets del chunk
+        for line in chunk_bullets.split('\n'):
+            line = line.strip()
+            if line.startswith('-') or line.startswith('•'):
+                bullet = '- ' + line.lstrip('-• ').strip()
+                if len(bullet) > 10:  # Ignorar bullets muy cortos
+                    all_bullets.append(bullet)
+
+        bullet_count = len([b for b in chunk_bullets.split('\n') if b.strip()])
+        logging.info(f"✅ Chunk {i+1} procesado: {bullet_count} bullets")
+
+    # Limitar bullets totales y eliminar duplicados
+    unique_bullets = []
+    seen = set()
+    for bullet in all_bullets[:15]:  # Máximo 15 bullets
+        bullet_clean = bullet.lower().strip()
+        if bullet_clean not in seen and len(bullet_clean) > 15:
+            unique_bullets.append(bullet)
+            seen.add(bullet_clean)
+
+    claude_bullets = "\n".join(unique_bullets)
+    logging.info(f"✅ Total de {len(unique_bullets)} bullets únicos generados con Claude CLI")
+
+    logging.info("🔸 Claude CLI generando resumen integrado...")
+
+    # Para el resumen, usar una versión condensada del texto si es muy largo
+    if len(text) > 6000:
+        summary_text = text[:6000] + "\n\n[El documento continúa con contenido adicional...]"
+        logging.info("📝 Usando versión condensada del texto para el resumen")
+    else:
+        summary_text = text
+
+    summary_prompt = f"""Eres un profesor universitario experto en historia del siglo XX. Redacta un resumen académico integrado en español usando el texto y los bullet points extraídos.
+
+REQUISITOS ESTRICTOS:
+- Extensión entre 500-700 palabras
+- Usa encabezados Markdown con jerarquía clara (##, ###)
+- Desarrolla los puntos clave con profundidad y contexto histórico
+- Mantén un tono académico y analítico
+- Incluye conclusiones significativas
+- NO agregues texto fuera del resumen
+- Devuelve únicamente el resumen en formato Markdown
+
+Bullet points extraídos:
+{claude_bullets}
+
+Texto original (resumido si es muy extenso):
+{summary_text}
+
+Responde únicamente con el resumen en Markdown."""
+
+    try:
+        summary_output = run_claude_cli(summary_prompt, timeout=300)
+        logging.info("✅ Resumen integrado generado por Claude CLI")
+    except subprocess.TimeoutExpired:
+        logging.warning("⚠️ Claude CLI timeout for summary, usando fallback")
+        summary_output = f"""# Resumen del Documento
+
+## Puntos Principales
+- El documento ha sido procesado exitosamente
+- Se extrajo el contenido textual del PDF original
+- El material está disponible en formato editable
+
+## Información Relevante
+El texto procesado contiene información académica sobre el período histórico analizado.
+
+## Conclusiones
+El documento está disponible en formato DOCX para su posterior edición y análisis."""
+        logging.info("✅ Resumen fallback generado")
+    except Exception as e:
+        logging.warning(f"⚠️ Claude CLI error for summary: {e}")
+        summary_output = f"""# Resumen del Documento
+
+## Puntos Principales
+- El documento ha sido procesado exitosamente
+- Se extrajo el contenido textual del PDF original
+- El material está disponible en formato editable
+
+## Información Relevante
+El texto procesado contiene información académica sobre el período histórico analizado.
+
+## Conclusiones
+El documento está disponible en formato DOCX para su posterior edición y análisis."""
+
+    logging.info("🔶 Claude CLI aplicando formato final...")
+    format_prompt = f"""Revisa y mejora el siguiente resumen en Markdown para que sea perfectamente legible:
+
+{summary_output}
+
+Instrucciones:
+- Corrige cualquier error de formato
+- Asegúrate de que los encabezados estén bien espaciados
+- Verifica que las viñetas usen "- " correctamente
+- Mantén exactamente el contenido existente
+- Devuelve únicamente el resumen formateado sin texto adicional"""
+
+    try:
+        formatted_output = run_claude_cli(format_prompt, timeout=180)
+        logging.info("✅ Formato final aplicado por Claude CLI")
+    except Exception as e:
+        logging.warning(f"⚠️ Claude CLI formatting error: {e}")
+        formatted_output = summary_output
+
+    return True, claude_bullets, summary_output, formatted_output
+
+# Mantener la función original para compatibilidad
+def run_gemini_summary_pipeline(text):
+    """Compatibilidad: usa GLM-4.6 vía Claude CLI."""
+    return run_claude_summary_pipeline(text)
+
+
+def generate_unified_summary(local_txt_path, base_name):
+    """Genera resumen en flujo TXT → MD → DOCX → PDF usando GLM-4.6."""
+    with open(local_txt_path, "r", encoding="utf-8") as f:
+        text = f.read()
+
+    logging.info("🤖 Iniciando síntesis colaborativa con GLM-4.6 (z.ai)...")
+    send_telegram_message("Iniciando resumen colaborativo con GLM-4.6 (z.ai)")
+
+    success, bullet_points, raw_summary, formatted_summary = run_gemini_summary_pipeline(text)
+    if not success:
+        return False, None, {}
+
+    summary_content = (formatted_summary or "").strip()
+    if not summary_content:
+        summary_content = "\n\n".join(filter(None, [bullet_points, raw_summary])).strip()
+    if not summary_content:
+        summary_content = text.strip()
+
+    summary_content = summary_content or "Resumen no disponible"
+
+    intelligent_filename = generate_intelligent_filename(base_name, summary_content)
+    intelligent_filename = ensure_unique_local_filename(Path(LOCAL_DOWNLOADS_PATH), intelligent_filename)
+    docx_path = Path(LOCAL_DOWNLOADS_PATH) / intelligent_filename
+
+    markdown_filename = Path(intelligent_filename).with_suffix('.md').name
+    markdown_path = docx_path.with_suffix('.md')
+    with open(markdown_path, 'w', encoding='utf-8') as markdown_file:
+        markdown_file.write(summary_content)
+
+    logging.info(f"📝 Guardando resumen Markdown en {markdown_path}")
+
+    markdown_to_docx(summary_content, docx_path, quiz_source=summary_content)
+    logging.info(f"✅ Documento DOCX generado: {docx_path}")
+
+    pdf_path = docx_path.with_suffix('.pdf')
+    pdf_created = True
+    try:
+        markdown_to_pdf(summary_content, pdf_path, title=docx_path.stem)
+        logging.info(f"✅ PDF generado: {pdf_path}")
+    except Exception as pdf_error:
+        pdf_created = False
+        logging.error(f"❌ Error generando PDF: {pdf_error}")
+
+    send_telegram_message(f"✅ Resumen colaborativo GLM-4.6 completado: {intelligent_filename}")
+
+    output_files = {
+        'docx_path': str(docx_path),
+        'docx_name': intelligent_filename,
+        'markdown_path': str(markdown_path),
+        'markdown_name': markdown_filename,
+        'pdf_path': str(pdf_path) if pdf_created else None,
+        'pdf_name': pdf_path.name if pdf_created else None,
+    }
+
+    return True, summary_content, output_files
+
+def generate_summaries_from_text(local_txt_path, base_name):
+    """Generate unified summary using 3 AI models in collaboration"""
+    return generate_unified_summary(local_txt_path, base_name)
+
+# --- PDF PROCESSING FUNCTIONS ---
+def preprocess_image(img):
+    """Preprocesa la imagen para mejorar la calidad del OCR."""
+    try:
+        img_np = np.array(img)
+        gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
+        binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 10)
+        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
+        contrast = clahe.apply(binary)
+        denoised = cv2.fastNlMeansDenoising(contrast, None, 30, 7, 21)
+        return denoised
+    except Exception as e:
+        logging.error(f"Error en preprocesamiento de imagen: {e}")
+        return np.array(img)
+
+
+def normalize_pdf_extracted_text(text):
+    """Normaliza texto extraído directamente de un PDF manteniendo saltos de línea útiles."""
+    if not text:
+        return ''
+
+    allowed_controls = {'\n', '\r', '\t'}
+    filtered_chars = [
+        char for char in text
+        if unicodedata.category(char)[0] != 'C' or char in allowed_controls
+    ]
+    cleaned = ''.join(filtered_chars)
+    cleaned = cleaned.replace('\r\n', '\n').replace('\r', '\n')
+    cleaned = re.sub(r'[ \t]+', ' ', cleaned)
+    cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
+    return cleaned.strip()
+
+
+def extract_pdf_text_if_text_based(reader, filename):
+    """Intenta detectar y devolver texto directo si el PDF no es escaneado."""
+    total_pages = len(reader.pages)
+    if total_pages == 0:
+        return None, 0.0, 0.0
+
+    page_texts = []
+    text_pages = 0
+    total_chars = 0
+
+    for index, page in enumerate(reader.pages):
+        try:
+            raw_text = page.extract_text() or ''
+        except Exception as exc:
+            logging.debug(
+                "Error extrayendo texto de la página %s de %s: %s",
+                index + 1,
+                filename,
+                exc,
+            )
+            raw_text = ''
+
+        normalized = normalize_pdf_extracted_text(raw_text)
+        if normalized:
+            text_pages += 1
+            total_chars += len(normalized)
+        page_texts.append(normalized)
+
+    ratio = text_pages / total_pages if total_pages else 0.0
+    avg_chars = (total_chars / text_pages) if text_pages else 0.0
+
+    if text_pages and ratio >= PDF_TEXT_DETECTION_MIN_RATIO and avg_chars >= PDF_TEXT_DETECTION_MIN_AVG_CHARS:
+        logging.info(
+            "📑 PDF '%s' detectado como basado en texto (ratio=%.0f%%, avg_chars=%.0f).",
+            filename,
+            ratio * 100,
+            avg_chars,
+        )
+        return page_texts, ratio, avg_chars
+
+    logging.debug(
+        "PDF '%s' requiere OCR (ratio=%.0f%%, avg_chars=%.0f).",
+        filename,
+        ratio * 100,
+        avg_chars,
+    )
+    return None, ratio, avg_chars
+
+
+def process_pdf_file(input_pdf_path, output_docx_path):
+    """Main workflow for processing a single PDF file."""
+    pdf_filename = os.path.basename(input_pdf_path)
+    send_telegram_message(f"⚙️ Iniciando procesamiento de PDF: {pdf_filename}")
+    temp_dir = f"temp_pdf_chunks_{pdf_filename}"
+
+    if not os.path.isfile(input_pdf_path):
+        logging.error(f"Input file not found: {input_pdf_path}")
+        raise FileNotFoundError(f"Input file not found: {input_pdf_path}")
+
+    try:
+        logging.info(f"Processing: {pdf_filename}")
+        reader = PdfReader(input_pdf_path)
+        num_pages = len(reader.pages)
+
+        direct_text_pages, text_ratio, avg_chars = extract_pdf_text_if_text_based(reader, pdf_filename)
+        all_corrected_texts = []
+
+        if direct_text_pages is not None:
+            logging.info(
+                "Usando extracción directa de texto para '%s' (ratio=%.0f%%, avg_chars=%.0f).",
+                pdf_filename,
+                text_ratio * 100,
+                avg_chars,
+            )
+            send_telegram_message(f"📑 Texto incrustado detectado, evitando OCR para: {pdf_filename}")
+            raw_text_content = f"\n\n{_PAGE_BREAK_TOKEN}\n\n".join(direct_text_pages)
+            if raw_text_content.strip():
+        # Para PDFs con texto, NO aplicar corrección con GLM - usar texto directo
+                all_corrected_texts.append(raw_text_content)
+        else:
+            logging.info(
+                "Realizando OCR completo para '%s' (ratio=%.0f%%, avg_chars=%.0f).",
+                pdf_filename,
+                text_ratio * 100,
+                avg_chars,
+            )
+
+            # Para OCR, dividir en chunks solo si es necesario
+            pdf_chunks = []
+            if num_pages > MAX_PAGES_PER_CHUNK:
+                logging.info(f"PDF requires OCR and has {num_pages} pages. Splitting into chunks of {MAX_PAGES_PER_CHUNK}.")
+                os.makedirs(temp_dir, exist_ok=True)
+                for i in range(0, num_pages, MAX_PAGES_PER_CHUNK):
+                    writer = PdfWriter()
+                    chunk_end = min(i + MAX_PAGES_PER_CHUNK, num_pages)
+                    for j in range(i, chunk_end):
+                        writer.add_page(reader.pages[j])
+                    chunk_path = os.path.join(temp_dir, f"chunk_{i // MAX_PAGES_PER_CHUNK}.pdf")
+                    with open(chunk_path, "wb") as f:
+                        writer.write(f)
+                    pdf_chunks.append(chunk_path)
+                send_telegram_message(f"📄 PDF split into {len(pdf_chunks)} parts for OCR processing.")
+            else:
+                pdf_chunks.append(input_pdf_path)
+
+            ocr_reader, trocr_models = get_ocr_models()
+
+            for idx, chunk_path in enumerate(pdf_chunks):
+                logging.info(f"--- Processing chunk {idx + 1}/{len(pdf_chunks)} ---")
+                send_telegram_message(f"🧠 OCR with GPU processing part {idx + 1}/{len(pdf_chunks)} of {pdf_filename}...")
+
+                _update_models_usage()
+
+                images = convert_from_path(chunk_path, dpi=PDF_DPI, thread_count=PDF_RENDER_THREAD_COUNT)
+                full_text_raw = []
+
+                if not images:
+                    logging.warning(f"No se generaron imágenes para el chunk {idx + 1}")
+                    continue
+
+                batch_size = max(1, min(PDF_BATCH_SIZE, len(images)))
+                logging.info(
+                    f"⚙️ Config GPU PDF -> render_threads={PDF_RENDER_THREAD_COUNT}, "
+                    f"batch_size={batch_size}, trocr_max_batch={PDF_TROCR_MAX_BATCH}"
+                )
+
+                def _tesseract_ocr(img_np):
+                    return pytesseract.image_to_string(img_np, lang='spa')
+
+                with ThreadPoolExecutor(max_workers=PDF_PREPROCESS_THREADS) as preprocess_pool, \
+                        ThreadPoolExecutor(max_workers=PDF_TESSERACT_THREADS) as tess_pool:
+                    for i in range(0, len(images), batch_size):
+                        batch_images = images[i:i + batch_size]
+
+                        _update_models_usage()
+
+                        preprocessed_batch = list(preprocess_pool.map(preprocess_image, batch_images))
+
+                        try:
+                            easy_results = ocr_reader.readtext_batched(
+                                preprocessed_batch,
+                                detail=1,
+                                batch_size=len(preprocessed_batch)
+                            )
+                        except AttributeError:
+                            easy_results = [
+                                ocr_reader.readtext(img_data, detail=1, batch_size=len(preprocessed_batch))
+                                for img_data in preprocessed_batch
+                            ]
+                        except Exception as e:
+                            logging.error(f"Error en EasyOCR batched: {e}, usando fallback secuencial")
+                            easy_results = [
+                                ocr_reader.readtext(img_data, detail=1, batch_size=len(preprocessed_batch))
+                                for img_data in preprocessed_batch
+                            ]
+
+                        tess_texts = list(tess_pool.map(_tesseract_ocr, preprocessed_batch))
+
+                        if (not isinstance(trocr_models, dict) or
+                                trocr_models.get('processor') is None or
+                                trocr_models.get('model') is None):
+                            logging.info("♻️ TrOCR models were freed, reloading before OCR batch")
+                            _, trocr_models = get_ocr_models()
+
+                        trocr_texts = trocr_ocr_batch(
+                            batch_images,
+                            trocr_models['processor'],
+                            trocr_models['model'],
+                            max_batch_size=PDF_TROCR_MAX_BATCH
+                        )
+
+                        for img_idx, img_preprocessed in enumerate(preprocessed_batch):
+                            easy_text = ''
+                            if easy_results and img_idx < len(easy_results):
+                                easy_text = ' '.join([line[1] for line in easy_results[img_idx]])
+
+                            text_tess = tess_texts[img_idx] if img_idx < len(tess_texts) else ''
+                            text_trocr = trocr_texts[img_idx] if img_idx < len(trocr_texts) else ''
+
+                            combined_parts = [part for part in (easy_text, text_tess, text_trocr) if part]
+                            combined_text = '\n'.join(combined_parts)
+                            full_text_raw.append(clean_text(combined_text))
+
+                raw_text_content = f"\n\n{_PAGE_BREAK_TOKEN}\n\n".join(full_text_raw)
+                if not raw_text_content.strip():
+                    logging.warning(f"Chunk {idx + 1} no produjo texto significativo tras OCR")
+                    continue
+                corrected_chunk_text = gemini_correct_text(raw_text_content)
+                all_corrected_texts.append(corrected_chunk_text)
+
+        final_text = "\n\n".join(text for text in all_corrected_texts if text)
+        if not final_text.strip():
+            raise ValueError("No se pudo extraer texto del PDF.")
+
+        # Para PDFs con texto, no aplicar formateo con GLM
+        if direct_text_pages is not None:
+            formatted_text = final_text  # Usar texto directo sin formato adicional
+        else:
+            # Solo para OCR, aplicar formateo con GLM
+            formatted_text = format_text_with_gemini_for_docx(final_text, pdf_filename)
+
+        doc = Document()
+        doc.add_heading(f"Documento Editable: {pdf_filename}", level=1)
+        add_markdown_content_to_document(doc, formatted_text)
+        doc.save(output_docx_path)
+        # Determinar tipo de procesamiento para el mensaje
+        if direct_text_pages is not None:
+            send_telegram_message(f"✅ PDF with embedded text processed and saved as DOCX: {os.path.basename(output_docx_path)}")
+        else:
+            send_telegram_message(f"✅ PDF with OCR processed and saved as DOCX: {os.path.basename(output_docx_path)}")
+
+    finally:
+        if os.path.exists(temp_dir):
+            logging.info(f"Cleaning up temporary directory: {temp_dir}")
+            shutil.rmtree(temp_dir)
+
+def trocr_ocr_batch(pil_images, processor, model, max_batch_size=4):
+    """Ejecuta OCR TrOCR sobre una lista de imágenes con manejo adaptativo GPU/CPU."""
+    if not pil_images:
+        return []
+
+    _update_models_usage()
+
+    def _refresh_trocr(proc, mdl, reason):
+        logging.info(f"♻️ TrOCR reload triggered ({reason})")
+        _, trocr_bundle = get_ocr_models()
+        if not isinstance(trocr_bundle, dict):
+            return None, None
+        return trocr_bundle.get('processor'), trocr_bundle.get('model')
+
+    refresh_reason = None
+    if processor is None or model is None:
+        refresh_reason = "models missing"
+    if refresh_reason:
+        processor, model = _refresh_trocr(processor, model, refresh_reason)
+
+    sample_param = None
+    attempts = 0
+    while attempts < 2:
+        if model is None:
+            processor, model = _refresh_trocr(processor, model, "model None on attempt")
+        if model is None:
+            attempts += 1
+            continue
+        try:
+            sample_param = next(model.parameters())
+            break
+        except (AttributeError, StopIteration):
+            logging.warning("TrOCR model parameters unavailable, forcing reload")
+            processor, model = _refresh_trocr(processor, model, "no parameters")
+            attempts += 1
+
+    if sample_param is None:
+        raise RuntimeError("TrOCR model parameters unavailable after reload attempts")
+
+    device = sample_param.device
+    is_gpu = device.type == 'cuda'
+    dtype = sample_param.dtype
+    results = []
+
+    # Reducir batch size en CPU para mayor eficiencia
+    if is_gpu:
+        batch_size = max(1, min(max_batch_size, len(pil_images)))
+    else:
+        batch_size = max(1, min(2, len(pil_images)))  # Batch size más pequeño para CPU
+
+    start_idx = 0
+
+    while start_idx < len(pil_images):
+        end_idx = min(start_idx + batch_size, len(pil_images))
+        current_batch = pil_images[start_idx:end_idx]
+
+        try:
+            with torch.inference_mode():
+                pixel_values = processor(images=current_batch, return_tensors="pt").pixel_values
+                pixel_values = pixel_values.to(device)
+                if pixel_values.dtype != dtype:
+                    pixel_values = pixel_values.to(dtype=dtype)
+
+                generated_ids = model.generate(pixel_values, max_length=512)
+                decoded = processor.batch_decode(generated_ids, skip_special_tokens=True)
+                results.extend(decoded)
+                start_idx = end_idx
+
+        except RuntimeError as e:
+            if "out of memory" in str(e).lower() and is_gpu and batch_size > 1:
+                logging.warning(f"⚠️ TrOCR OOM con batch_size={batch_size}, reduciendo a {batch_size // 2}")
+                torch.cuda.empty_cache()
+                batch_size = max(1, batch_size // 2)
+                continue
+            else:
+                logging.error(f"❌ Error en TrOCR batch: {e}")
+                results.extend([""] * len(current_batch))
+                start_idx = end_idx
+
+        except Exception as e:
+            logging.error(f"Error inesperado en TrOCR batch: {e}")
+            results.extend([""] * len(current_batch))
+            start_idx = end_idx
+
+        # Pequeña pausa en CPU para no sobrecargar
+        if not is_gpu and start_idx < len(pil_images):
+            time.sleep(0.1)
+
+    return results
+
+def clean_text(text):
+    """Limpia y normaliza el texto extraído."""
+    text = ''.join(c for c in text if unicodedata.category(c)[0] != 'C')
+    text = re.sub(r'\s+', ' ', text)
+    text = unicodedata.normalize('NFKC', text)
+    return text
+
+_PAGE_BREAK_TOKEN = "[[PAGE_BREAK]]"
+_LEGACY_PAGE_BREAK_PATTERN = re.compile(r'-{3,}\s*Nueva Página\s*-{3,}', re.IGNORECASE)
+
+
+def format_text_with_gemini_for_docx(text, pdf_filename):
+    """Solicita a GLM-4.6 que añada títulos/subtítulos sin alterar el contenido."""
+    if not text:
+        return text
+
+    if not GEMINI_AVAILABLE:
+        logging.debug("GLM-4.6 no disponible para formateo DOCX, se usa texto sin cambios.")
+        return text
+
+    prompt = (
+        "Eres un asistente editorial que trabaja sobre el contenido íntegro de un PDF ya corregido. "
+        "Tu tarea es devolver EXACTAMENTE el mismo texto, sin resumir, omitir ni reescribir frases. "
+        "Solo puedes insertar títulos y subtítulos descriptivos que ayuden a estructurar el documento.\n\n"
+        "Instrucciones estrictas:\n"
+        "- Usa formato Markdown simple: `# Título principal` y `## Subtítulo`. No utilices niveles adicionales.\n"
+        f"- Mantén el marcador literal {_PAGE_BREAK_TOKEN} cuando aparezca; equivale a un salto de página.\n"
+        "- Conserva el orden y la redacción original de todos los párrafos.\n"
+        "- No agregues listas, viñetas, comentarios ni explicaciones extra.\n"
+        "- Responde únicamente con el contenido formateado. Nada de prefacios ni notas.\n\n"
+        f"Nombre del archivo: {pdf_filename}\n\n"
+        "Contenido:\n"
+        "<<<INICIO>>>\n"
+        f"{text}\n"
+        "<<<FIN>>>"
+    )
+
+    formatted = run_gemini(prompt, use_flash=True)
+    if not formatted or not formatted.strip():
+        logging.error("GLM-4.6 devolvió una respuesta vacía para el formato DOCX")
+        return text
+
+    if formatted.lower().startswith("error"):
+        logging.error(f"GLM-4.6 no pudo formatear el documento: {formatted}")
+        return text
+
+    return formatted.strip()
+
+
+def add_markdown_content_to_document(doc, content):
+    """Convierte la salida Markdown generada por GLM-4.6 en párrafos y encabezados DOCX."""
+    if not content:
+        return
+
+    normalized = content.replace(_PAGE_BREAK_TOKEN, f"\n{_PAGE_BREAK_TOKEN}\n")
+    normalized = _LEGACY_PAGE_BREAK_PATTERN.sub(f"\n{_PAGE_BREAK_TOKEN}\n", normalized)
+
+    buffer = []
+
+    def flush_buffer():
+        if buffer:
+            paragraph_text = ' '.join(line.strip() for line in buffer if line.strip())
+            if paragraph_text:
+                doc.add_paragraph(paragraph_text)
+            buffer.clear()
+
+    for line in normalized.splitlines():
+        stripped = line.strip()
+
+        if not stripped:
+            flush_buffer()
+            continue
+
+        if stripped == _PAGE_BREAK_TOKEN:
+            flush_buffer()
+            doc.add_page_break()
+            continue
+
+        if stripped.startswith('## '):
+            flush_buffer()
+            doc.add_heading(stripped[3:].strip(), level=3)
+            continue
+
+        if stripped.startswith('# '):
+            flush_buffer()
+            doc.add_heading(stripped[2:].strip(), level=2)
+            continue
+
+        buffer.append(line)
+
+    flush_buffer()
+
+
+def gemini_correct_text(text):
+    """Usa la API de GLM-4.6 para corregir y reconstruir el texto."""
+    if not (GEMINI_CLI_PATH or GEMINI_API_KEY or CLAUDE_CLI_PATH):
+        logging.debug("GLM-4.6 no disponible para corrección, se mantiene el texto original.")
+        return text
+
+    prompt = f'''Corrige y reconstruye el siguiente texto extraído por OCR de un documento PDF. El texto puede contener errores, palabras mal escritas, frases incompletas o desordenadas. Tu tarea es devolver únicamente el texto corregido, limpio, coherente y bien estructurado en español. No incluyas explicaciones, preámbulos ni formato adicional. Solo el texto final y legible:
+
+--- INICIO DEL TEXTO ---
+{text}
+--- FIN DEL TEXTO ---'''
+    
+    try:
+        corrected_text = run_gemini(prompt, use_flash=True)
+        if not corrected_text or not corrected_text.strip():
+            return text
+
+        normalized = corrected_text.lstrip()
+        if normalized.lower().startswith("error"):
+            return text
+
+        return corrected_text
+    except Exception as e:
+        logging.error(f"Error en la llamada a la API de GLM-4.6: {e}")
+        return text
+
+def get_ocr_models():
+    """Carga y cachea los modelos OCR para mejorar rendimiento con sistema de timeout - USA GPU/CPU ADAPTATIVO"""
+    global _ocr_models, _trocr_models
+
+    # Actualizar timestamp de uso
+    _update_models_usage()
+
+    # Múltiples intentos para cargar modelos con reintento
+    max_retries = 3
+    use_gpu = torch.cuda.is_available()
+
+    # Verificar memoria disponible si hay GPU
+    if use_gpu:
+        try:
+            total_memory = torch.cuda.get_device_properties(0).total_memory
+            allocated_memory = torch.cuda.memory_allocated(0)
+            free_memory = total_memory - allocated_memory
+
+            # Si menos de 1.5GB libre, forzar CPU
+            if free_memory < 1.5 * 1024**3:
+                logging.warning(f"⚠️ Memoria GPU baja: {free_memory / 1024**3:.2f}GB libre, usando CPU")
+                use_gpu = False
+                send_telegram_message("🔄 Memoria GPU insuficiente, usando CPU para procesamiento PDF")
+        except:
+            use_gpu = False
+
+    for attempt in range(max_retries):
+        try:
+            if use_gpu:
+                logging.info(f"🚀 Loading OCR models on GPU (attempt {attempt + 1}/{max_retries})...")
+            else:
+                logging.info(f"💻 Loading OCR models on CPU (attempt {attempt + 1}/{max_retries})...")
+
+            # Limpiar VRAM antes de cargar si usamos GPU
+            if use_gpu:
+                torch.cuda.empty_cache()
+                import gc
+                gc.collect()
+
+                if attempt > 0:
+                    force_free_vram()
+                    time.sleep(2)
+
+            # Cargar EasyOCR con GPU/CPU adaptativo
+            if _ocr_models is None:
+                _ocr_models = easyocr.Reader(['es'], gpu=use_gpu, verbose=False)
+                logging.info(f"✅ EasyOCR loaded on {'GPU' if use_gpu else 'CPU'}")
+
+            # Cargar TrOCR con manejo de memoria mejorado
+            if _trocr_models is None:
+                processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+                model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+
+                if use_gpu:
+                    try:
+                        device = "cuda"
+                        model = model.to(device)
+                        model.eval()
+
+                        # Activar FP16 para reducir uso de memoria
+                        try:
+                            major, _ = torch.cuda.get_device_capability(0)
+                            if major >= 7:
+                                model = model.half()
+                                logging.info("⚡ TrOCR en FP16 habilitado")
+                        except Exception as capability_error:
+                            logging.warning(f"No se pudo habilitar FP16 en TrOCR: {capability_error}")
+
+                        logging.info("✅ TrOCR model loaded on GPU")
+                    except RuntimeError as e:
+                        if "out of memory" in str(e).lower() and attempt < max_retries - 1:
+                            logging.warning(f"⚠️ TrOCR OOM en GPU, reintentando con CPU...")
+                            use_gpu = False
+                            continue
+                        else:
+                            raise
+                else:
+                    # Usar CPU directamente
+                    device = "cpu"
+                    model = model.to(device)
+                    model.eval()
+                    logging.info("✅ TrOCR model loaded on CPU")
+
+                _trocr_models = {
+                    'processor': processor,
+                    'model': model
+                }
+
+            # Update usage timestamp after loading models
+            _update_models_usage()
+            return _ocr_models, _trocr_models
+
+        except RuntimeError as e:
+            if "CUDA-capable device" in str(e) or "out of memory" in str(e).lower():
+                if use_gpu and attempt < max_retries - 1:
+                    logging.error(f"❌ CUDA error en intento {attempt + 1}: {e}")
+                    logging.info(f"🔄 Reintentando con CPU...")
+                    use_gpu = False  # Forzar CPU en siguiente intento
+                    continue
+                else:
+                    error_msg = f"❌ ERROR después de {max_retries} intentos: {e}"
+                    logging.error(error_msg)
+                    if attempt == max_retries - 1:
+                        send_telegram_message(error_msg)
+                    raise RuntimeError(error_msg)
+            else:
+                logging.error(f"❌ Error inesperado: {e}")
+                raise
+
+    # Si llegamos aquí, todos los intentos fallaron
+    error_msg = "❌ ERROR: No se pudieron cargar los modelos OCR"
+    logging.error(error_msg)
+    raise RuntimeError(error_msg)
+
+# --- DOCUMENT CONVERSION FUNCTIONS ---
+def docx_to_text(docx_path):
+    """Convert DOCX to plain text"""
+    doc = Document(docx_path)
+    return '\n'.join([para.text for para in doc.paragraphs if para.text.strip()])
+
+def docx_to_markdown(docx_path):
+    """Convert DOCX to Markdown format"""
+    doc = Document(docx_path)
+    md_lines = []
+    for para in doc.paragraphs:
+        text = para.text.strip()
+        if para.style.name.startswith('Heading'):
+            level = int(para.style.name.replace('Heading ', ''))
+            md_lines.append('#' * level + ' ' + text)
+        elif para.style.name == 'List Bullet':
+            md_lines.append(f"- {text}")
+        else:
+            md_lines.append(text)
+    return '\n'.join(md_lines)
+
+def summarize_text_with_gemini(text):
+    """Summarize text using the GLM-4.6 pipeline (compatibilidad)."""
+    success, _, _, formatted_summary = run_gemini_summary_pipeline(text)
+    if not success or not formatted_summary:
+        raise RuntimeError("GLM-4.6 no pudo generar el resumen solicitado")
+    return formatted_summary
+
+
+# --- MAIN PROCESSING FUNCTIONS ---
+def process_audio_file(file_path):
+    """Process a single audio file"""
+    filename = os.path.basename(file_path)
+    send_telegram_message(
+        f"🎵 Nuevo audio detectado: {filename}\n"
+        f"🤖 Flujo activado:\n"
+        f"• GLM-4.6: puntos clave + resumen integrado\n"
+        f"• GLM-4.6: formato final"
+    )
+    
+    base_name = os.path.splitext(filename)[0]
+    local_audio_path = os.path.join(LOCAL_DOWNLOADS_PATH, filename)
+    local_txt_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{base_name}.txt")
+    
+    try:
+        send_telegram_message(f"⬇️ Descargando audio: {filename}")
+        webdav_download(file_path, local_audio_path)
+        
+        send_telegram_message(f"📝 Iniciando transcripción de audio: {filename}")
+        transcribe_audio(local_audio_path, local_txt_path)
+        
+        # Generate unified summary
+        result = generate_unified_summary(local_txt_path, base_name)
+        if result and result[0]:
+            success, summary_content, output_files = result
+
+            docx_path = Path(output_files.get('docx_path', '')) if output_files else None
+            markdown_path = Path(output_files.get('markdown_path', '')) if output_files else None
+            pdf_path_str = output_files.get('pdf_path') if output_files else None
+            pdf_path = Path(pdf_path_str) if pdf_path_str else None
+            docx_filename = output_files.get('docx_name') if output_files else None
+
+            if docx_path and docx_path.exists() and docx_filename:
+                ensure_thematic_folders_exist()
+
+                logging.info("🧠 Clasificando contenido inteligentemente...")
+                category = classify_content_intelligent(summary_content)
+                category_name = TEMATIC_FOLDERS.get(category, TEMATIC_FOLDERS["otras_clases"])
+
+                remote_docx_path = get_upload_path_for_category(category, docx_filename)
+                webdav_upload(str(docx_path), remote_docx_path)
+                logging.info(f"☁️ DOCX subido a {category_name}: {docx_filename}")
+
+                if pdf_path and pdf_path.exists():
+                    pdf_name = pdf_path.name
+                    remote_pdf_path = get_upload_path_for_category(category, pdf_name)
+                    webdav_upload(str(pdf_path), remote_pdf_path)
+                    logging.info(f"☁️ PDF subido a {category_name}: {pdf_name}")
+
+                try:
+                    if markdown_path and markdown_path.exists():
+                        remote_md_path = os.path.join('Notes', markdown_path.name)
+                        webdav_upload(str(markdown_path), remote_md_path)
+                        logging.info(f"Markdown subido a Notes: {markdown_path.name}")
+                except Exception as e:
+                    logging.error(f"Error subiendo Markdown para {docx_filename}: {e}")
+
+                topics = extract_key_topics_from_text(summary_content)
+                topics_str = ' - '.join(topics[:2])
+
+                send_telegram_message(
+                    f"☁️ ✅ Resumen GLM-4.6 clasificado y subido a '{category_name}'\n"
+                    f"📄 {docx_filename}\n"
+                    f"🧾 Recursos generados: DOCX, PDF y Markdown\n"
+                    f"🧠 Temas: {topics_str}"
+                )
+                save_processed_file(file_path)
+        else:
+            raise Exception("Failed to generate summaries")
+            
+    except Exception as e:
+        logging.error(f"Error processing audio {filename}: {e}")
+        send_telegram_message(f"❌ Error processing audio {filename}: {e}")
+
+def process_txt_file(file_path):
+    """Process a single text file"""
+    filename = os.path.basename(file_path)
+    send_telegram_message(
+        f"📄 Nuevo texto detectado: {filename}\n"
+        f"🤖 Flujo activado:\n"
+        f"• GLM-4.6: puntos clave + resumen integrado\n"
+        f"• GLM-4.6: formato final"
+    )
+
+    base_name = os.path.splitext(filename)[0]
+    local_txt_path = os.path.join(LOCAL_DOWNLOADS_PATH, filename)
+
+    try:
+        send_telegram_message(f"⬇️ Descargando texto: {filename}")
+        webdav_download(file_path, local_txt_path)
+
+        # Generate unified summary
+        result = generate_unified_summary(local_txt_path, base_name)
+        if result and result[0]:
+            success, summary_content, output_files = result
+
+            docx_path = Path(output_files.get('docx_path', '')) if output_files else None
+            markdown_path = Path(output_files.get('markdown_path', '')) if output_files else None
+            pdf_path_str = output_files.get('pdf_path') if output_files else None
+            pdf_path = Path(pdf_path_str) if pdf_path_str else None
+            docx_filename = output_files.get('docx_name') if output_files else None
+
+            # Upload to Nextcloud with intelligent name
+            if docx_path and docx_path.exists():
+                remote_docx_path = f"{REMOTE_DOCX_AUDIO_FOLDER}/{docx_filename}"
+                webdav_upload(str(docx_path), remote_docx_path)
+                send_telegram_message(f"✅ Resumen DOCX subido: {docx_filename}")
+
+            if pdf_path and pdf_path.exists():
+                remote_pdf_filename = docx_filename.replace('.docx', '.pdf') if docx_filename else f"{base_name}.pdf"
+                remote_pdf_path = f"{RESUMENES_FOLDER}/{remote_pdf_filename}"
+                webdav_upload(str(pdf_path), remote_pdf_path)
+
+            if markdown_path and markdown_path.exists():
+                remote_md_filename = docx_filename.replace('.docx', '.md') if docx_filename else f"{base_name}.md"
+                remote_md_path = f"{RESUMENES_FOLDER}/{remote_md_filename}"
+                webdav_upload(str(markdown_path), remote_md_path)
+
+            send_telegram_message(
+                f"✅ Resumen completado: {filename}\n"
+                f"📄 DOCX: {REMOTE_DOCX_AUDIO_FOLDER}/{docx_filename if docx_filename else base_name}"
+            )
+            save_processed_file(file_path)
+        else:
+            raise Exception("Failed to generate summaries")
+
+    except Exception as e:
+        logging.error(f"Error processing text {filename}: {e}")
+        send_telegram_message(f"❌ Error processing text {filename}: {e}")
+
+def check_pdf_already_processed(file_path, filename, base_name):
+    """Verificación inteligente para evitar reprocesamiento de PDFs"""
+
+    # 1. Verificar si el DOCX editable ya existe localmente
+    local_docx_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{base_name}_editable.docx")
+    if os.path.exists(local_docx_path):
+        logging.info(f"📋 DOCX editable ya existe localmente: {base_name}_editable.docx")
+        return True
+
+    # 2. Verificar si el DOCX editable ya existe en Nextcloud
+    try:
+        remote_docx_path = os.path.join(os.path.dirname(file_path), f"{base_name}_editable.docx")
+        response = requests.request(
+            "PROPFIND",
+            f"{WEBDAV_ENDPOINT}/{remote_docx_path}",
+            auth=HTTPBasicAuth(NEXTCLOUD_USER, NEXTCLOUD_PASS),
+            headers={"Depth": "0"},
+            timeout=5
+        )
+        if response.status_code == 207:  # Multi-Status significa que existe
+            logging.info(f"☁️ DOCX editable ya existe en Nextcloud: {remote_docx_path}")
+            return True
+    except Exception as e:
+        logging.debug(f"No se pudo verificar existencia en Nextcloud: {e}")
+
+    # 3. Verificar si ya fue procesado (fallback)
+    processed_files = load_processed_files()
+    normalized_path = normalize_remote_path(file_path)
+    base_name_check = os.path.basename(normalized_path)
+
+    if (normalized_path in processed_files or
+        base_name_check in processed_files or
+        filename in processed_files):
+        logging.info(f"📋 PDF ya está en registro de procesados: {filename}")
+        return True
+
+    return False
+
+def process_pdf_main(file_path):
+    """Process a single PDF file - main handler"""
+    filename = os.path.basename(file_path)
+
+    base_name = os.path.splitext(filename)[0]
+    local_pdf_path = os.path.join(LOCAL_DOWNLOADS_PATH, filename)
+    local_docx_output_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{base_name}_editable.docx")
+    remote_docx_filename = f"{base_name}_editable.docx"
+
+    # VERIFICACIÓN INTELIGENTE ANTES DE PROCESAR
+    if check_pdf_already_processed(file_path, filename, base_name):
+        logging.info(f"⏭️ PDF ya procesado, omitiendo: {filename}")
+        return
+
+    send_telegram_message(f"📄 Nuevo PDF detectado para procesar: {filename}")
+
+    try:
+        logging.info(f"Downloading PDF: {filename}")
+        webdav_download(file_path, local_pdf_path)
+        
+        logging.info(f"Starting OCR and correction processing for: {filename}")
+        process_pdf_file(local_pdf_path, local_docx_output_path)
+        
+        # Upload the generated editable DOCX file
+        if os.path.exists(local_docx_output_path):
+            remote_docx_path = os.path.join(os.path.dirname(file_path), remote_docx_filename)
+            logging.info(f"Uploading editable document to Nextcloud: {remote_docx_filename}")
+            webdav_upload(local_docx_output_path, remote_docx_path)
+            send_telegram_message(f"📄☁️ Documento editable subido a Nextcloud para: {filename}")
+
+            # Marcar como procesado inmediatamente después de subir el DOCX editable
+            save_processed_file(file_path)
+            logging.info(f"✅ Archivo PDF marcado como procesado: {filename}")
+
+            # Generar resumen completo con GLM-4.6 para todos los PDFs (no bloquea el procesamiento)
+            try:
+                send_telegram_message(f"🤖 Generando resumen completo con GLM-4.6 para: {filename}")
+
+                docx_text = docx_to_text(local_docx_output_path)
+
+                # Usar el sistema de resumen unificado con GLM-4.6
+                success, bullet_points, raw_summary, formatted_summary = run_gemini_summary_pipeline(docx_text)
+
+                if success and formatted_summary:
+                    # Crear documento DOCX con el resumen
+                    summary_docx_path = os.path.join(LOCAL_DOWNLOADS_PATH, f"{base_name}_resumen_completo.docx")
+
+                    doc = Document()
+                    doc.add_heading('Resumen Completo Generado con GLM-4.6', level=1)
+                    doc.add_paragraph(f'Documento original: {filename}')
+                    doc.add_paragraph('')
+
+                    # Añadir contenido formateado
+                    lines = formatted_summary.split('\n')
+                    current_paragraph = []
+
+                    for line in lines:
+                        line = line.strip()
+                        if not line:
+                            if current_paragraph:
+                                doc.add_paragraph(' '.join(current_paragraph))
+                                current_paragraph = []
+                            continue
+
+                        if line.startswith('#'):
+                            if current_paragraph:
+                                doc.add_paragraph(' '.join(current_paragraph))
+                                current_paragraph = []
+                            # Procesar encabezado
+                            level = len(line) - len(line.lstrip('#'))
+                            if level <= 6:
+                                doc.add_heading(line.lstrip('#').strip(), level=level)
+                            else:
+                                current_paragraph.append(line)
+                        elif line.startswith('-') or line.startswith('•'):
+                            if current_paragraph:
+                                doc.add_paragraph(' '.join(current_paragraph))
+                                current_paragraph = []
+                            doc.add_paragraph(line.lstrip('-•').strip(), style='List Bullet')
+                        else:
+                            current_paragraph.append(line)
+
+                    if current_paragraph:
+                        doc.add_paragraph(' '.join(current_paragraph))
+
+                    # Generar quiz si hay contenido suficiente
+                    try:
+                        quiz_text = (bullet_points or "") + "\n\n" + (raw_summary or "")
+                        if len(quiz_text.strip()) > 100:
+                            questions, answers = generate_quiz(quiz_text)
+                            if questions and answers:
+                                add_quiz_to_docx(doc, questions, answers)
+                                logging.info("✅ Quiz agregado al resumen del PDF")
+                    except Exception as quiz_error:
+                        logging.warning(f"No se pudo generar quiz para el PDF: {quiz_error}")
+
+                    doc.save(summary_docx_path)
+
+                    # Subir resumen DOCX a Nextcloud
+                    remote_summary_path = os.path.join('Resumenes', f"{base_name}_resumen_completo.docx")
+                    webdav_mkdir('Resumenes')
+                    webdav_upload(summary_docx_path, remote_summary_path)
+
+                    # También crear y subir versión Markdown
+                    md_content = f"# Resumen: {filename}\n\n{formatted_summary}"
+                    md_filename = f"{base_name}_resumen_completo.md"
+                    local_md_path = os.path.join(LOCAL_DOWNLOADS_PATH, md_filename)
+                    with open(local_md_path, 'w', encoding='utf-8') as f:
+                        f.write(md_content)
+                    remote_md_path = os.path.join('Notes', md_filename)
+                    webdav_upload(local_md_path, remote_md_path)
+
+                    send_telegram_message(f"✅ Resumen completo generado y subido para: {filename}\n📄 DOCX en Resumenes/\n📝 Markdown en Notes/")
+                    logging.info(f"✅ Resumen completo generado y subido para {filename}")
+
+                else:
+                    # Fallback: resumen simple si falla GLM-4.6
+                    simple_summary = f"# Resumen de {filename}\n\nTexto procesado exitosamente. No se pudo generar resumen detallado."
+                    md_filename = f"{base_name}_resumen_simple.md"
+                    local_md_path = os.path.join(LOCAL_DOWNLOADS_PATH, md_filename)
+                    with open(local_md_path, 'w', encoding='utf-8') as f:
+                        f.write(simple_summary)
+                    remote_md_path = os.path.join('Notes', md_filename)
+                    webdav_upload(local_md_path, remote_md_path)
+                    logging.warning(f"⚠️ Resumen simple generado para {filename}")
+
+            except Exception as e:
+                logging.error(f"Error generando resumen para {filename}: {e}")
+                # No notificar error por Telegram para evitar spam
+        else:
+            logging.warning(f"Expected output file not found: {local_docx_output_path}")
+            # Si no se encontró el archivo, igual marcar como procesado para evitar bucles
+            save_processed_file(file_path)
+            logging.warning(f"⚠️ Archivo marcado como procesado sin DOCX: {filename}")
+
+    except Exception as e:
+        logging.error(f"Error in conversion process for PDF {filename}: {e}")
+        key = f"pdf_process::{file_path}"
+        msg = f"❌ Error processing PDF {filename}: {e}"
+        if should_send_error(key, str(e)):
+            send_telegram_message(msg)
+
+def acquire_lock():
+    """Adquiere un bloqueo para evitar múltiples instancias"""
+    try:
+        lock_file = os.path.join(LOCAL_STATE_DIR, ".main_service.lock")
+        os.makedirs(os.path.dirname(lock_file), exist_ok=True)
+
+        lock_fd = open(lock_file, 'w')
+        fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+        # Escribir PID en el archivo de lock
+        lock_fd.write(str(os.getpid()))
+        lock_fd.flush()
+
+        logging.info(f"🔒 Bloqueo adquirido. PID: {os.getpid()}")
+        return lock_fd
+
+    except (IOError, OSError) as e:
+        if e.errno == 11:  # EAGAIN - Resource temporarily unavailable
+            logging.error("❌ Ya hay otra instancia del servicio corriendo")
+            sys.exit(1)
+        else:
+            logging.error(f"❌ Error adquiriendo bloqueo: {e}")
+            sys.exit(1)
+
+
+def release_lock(lock_fd) -> None:
+    """Libera el bloqueo de ejecución si está activo."""
+    if not lock_fd:
+        return
+    try:
+        fcntl.flock(lock_fd.fileno(), fcntl.LOCK_UN)
+    except Exception as exc:
+        logging.warning(f"No se pudo liberar el bloqueo limpiamente: {exc}")
+    finally:
+        try:
+            lock_fd.close()
+        except Exception:
+            pass
+
+# --- MAIN LOOP ---
+def main():
+    """Main application loop"""
+    # Adquirir bloqueo para evitar múltiples instancias
+    lock_fd = acquire_lock()
+    try:
+        logging.info("=== INICIO Nextcloud AI Service - Flujo GLM-4.6 (Claude CLI) ===")
+        logging.info("🤖 Configuración: GLM-4.6 (Claude CLI via z.ai) para puntos clave y resumen integral")
+        logging.info("📝 Modo de operación: Resúmenes colaborativos unificados")
+        logging.info("🔒 Servicio protegido contra múltiples instancias")
+
+        # Enviar mensaje de Telegram (no bloquear si falla)
+        try:
+            send_telegram_message(
+                "✨ Nextcloud AI Service Started ✨\n"
+                "🚀 Flujo GLM-4.6 activado:\n"
+                "• GLM-4.6: Puntos clave y resumen integrado\n"
+                "• GLM-4.6: Formato y entrega final"
+            )
+        except Exception as e:
+            logging.warning(f"No se pudo enviar mensaje de Telegram: {e}")
+
+        # Create necessary directories
+        ensure_local_directories()
+
+        # Inicializar timestamp y sistema de monitoreo de VRAM
+        logging.info("🚀 Iniciando sistema de monitoreo de VRAM...")
+        _update_models_usage()  # Inicializar timestamp al inicio
+        _start_vram_cleanup_timer()
+
+        while True:
+            try:
+                logging.info("--- Polling for new files ---")
+                processed_files = load_processed_files()
+
+                # --- PROCESS PDFs FOR CONVERSION TO EDITABLE ---
+                try:
+                    webdav_mkdir(REMOTE_PDF_FOLDER)
+                    pdf_files = webdav_list(REMOTE_PDF_FOLDER)
+                    for file_path in pdf_files:
+                        normalized_path = normalize_remote_path(file_path)
+                        base_name = os.path.basename(normalized_path)
+                        filename = base_name
+
+                        # Skip if not PDF or if it's already an editable PDF
+                        if (
+                            not normalized_path.lower().endswith('.pdf')
+                            or '_editable.docx' in normalized_path.lower()
+                        ):
+                            continue
+
+                        # VERIFICACIÓN INTELIGENTE ANTES DE PROCESAR (doble seguridad)
+                        base_name_no_ext = os.path.splitext(filename)[0]
+                        if check_pdf_already_processed(normalized_path, filename, base_name_no_ext):
+                            logging.info(f"⏭️ PDF ya verificado como procesado, omitiendo: {filename}")
+                            continue
+
+                        process_pdf_main(normalized_path)
+
+                except Exception as e:
+                    logging.error(f"Error processing PDF folder for conversion: {e}")
+
+                # --- PROCESS AUDIOS ---
+                try:
+                    webdav_mkdir(REMOTE_DOCX_AUDIO_FOLDER)
+                    audio_files = webdav_list(REMOTE_AUDIOS_FOLDER)
+                    for file_path in audio_files:
+                        normalized_path = normalize_remote_path(file_path)
+                        base_name = os.path.basename(normalized_path)
+
+                        if (
+                            not any(normalized_path.lower().endswith(ext) for ext in AUDIO_EXTENSIONS)
+                            or normalized_path in processed_files
+                            or base_name in processed_files
+                        ):
+                            continue
+
+                        process_audio_file(normalized_path)
+
+                except Exception as e:
+                    logging.error(f"Error processing Audio folder: {e}")
+
+                # --- PROCESS TEXT FILES ---
+                try:
+                    webdav_mkdir(REMOTE_TXT_FOLDER)
+                    txt_files = webdav_list(REMOTE_TXT_FOLDER)
+                    for file_path in txt_files:
+                        normalized_path = normalize_remote_path(file_path)
+                        base_name = os.path.basename(normalized_path)
+
+                        if (
+                            not any(normalized_path.lower().endswith(ext) for ext in TXT_EXTENSIONS)
+                            or normalized_path in processed_files
+                            or base_name in processed_files
+                        ):
+                            continue
+
+                        process_txt_file(normalized_path)
+
+                except Exception as e:
+                    logging.error(f"Error processing Text folder: {e}")
+
+            except Exception as cycle_error:
+                logging.exception(f"Error inesperado en el ciclo principal: {cycle_error}")
+                if should_send_error("main_loop", str(cycle_error)):
+                    send_telegram_message(f"❌ Error en ciclo principal: {cycle_error}")
+
+            logging.info(f"--- Cycle completed. Waiting {POLL_INTERVAL} seconds... ---")
+            time.sleep(POLL_INTERVAL)
+
+    except KeyboardInterrupt:
+        logging.info("🛑 Interrupción recibida, cerrando servicio")
+    finally:
+        release_lock(lock_fd)
+
+def start_dashboard():
+    """Inicia el dashboard Flask en un hilo separado"""
+    try:
+        # Importar dashboard aquí para evitar importaciones circulares
+        import dashboard
+        import threading
+
+        def run_dashboard():
+            """Función para ejecutar el dashboard en un hilo"""
+            logging.info("🚀 Iniciando Dashboard Flask en http://localhost:5000")
+            dashboard.app.run(
+                host='0.0.0.0',
+                port=5000,
+                debug=False,
+                threaded=True,
+                use_reloader=False  # Importante: evitar reloading en producción
+            )
+
+        # Crear y iniciar hilo para el dashboard
+        dashboard_thread = threading.Thread(target=run_dashboard, daemon=True)
+        dashboard_thread.start()
+
+        logging.info("✅ Dashboard iniciado en hilo separado")
+        logging.info("🌐 Accede al dashboard en: http://localhost:5000")
+
+        return dashboard_thread
+
+    except Exception as e:
+        logging.error(f"❌ Error iniciando dashboard: {e}")
+        logging.warning("⚠️ El servicio principal continuará sin dashboard")
+        return None
+
+
+if __name__ == "__main__":
+    # Handle command line arguments for specific operations
+    if len(sys.argv) > 1:
+        command = sys.argv[1]
+
+        if command == "whisper" and len(sys.argv) == 4:
+            # Whisper transcription mode
+            transcribe_audio(sys.argv[2], sys.argv[3])
+        elif command == "pdf" and len(sys.argv) == 4:
+            # PDF processing mode
+            process_pdf_file(sys.argv[2], sys.argv[3])
+        elif command == "seed-processed":
+            snapshot = _snapshot_existing_remote_files()
+            current = load_processed_files()
+
+            entries_to_add = []
+            for entry in snapshot:
+                normalized = normalize_remote_path(entry)
+                base_name = os.path.basename(normalized)
+                if normalized in current or base_name in current:
+                    continue
+                entries_to_add.append(normalized)
+
+            if entries_to_add:
+                with open(PROCESSED_FILES_PATH, "a", encoding="utf-8") as f:
+                    for normalized in sorted(entries_to_add):
+                        f.write(normalized + "\n")
+                print(f"✅ {len(entries_to_add)} entradas añadidas al registro de procesados")
+                logging.info(f"Registro de procesados actualizado con {len(entries_to_add)} entradas nuevas")
+            else:
+                print("ℹ️ No se encontraron archivos adicionales para marcar como procesados")
+            sys.exit(0)
+        elif command == "txt2docx" and len(sys.argv) == 4:
+            # Text to unified DOCX conversion mode
+            txt_file = sys.argv[2]
+            output_docx = sys.argv[3]
+
+            if not os.path.exists(txt_file):
+                print(f"❌ Text file not found: {txt_file}")
+                sys.exit(1)
+
+            # Extract base name for file generation
+            base_name = os.path.splitext(os.path.basename(txt_file))[0]
+
+            print(f"🤖 Iniciando resumen colaborativo para: {txt_file}")
+
+            # Generate unified summary
+            result = generate_unified_summary(txt_file, base_name)
+
+            if result and result[0]:
+                success, summary_content, output_files = result
+
+                docx_path = Path(output_files.get('docx_path', ''))
+                markdown_path = Path(output_files.get('markdown_path', ''))
+                pdf_path_str = output_files.get('pdf_path')
+                pdf_path = Path(pdf_path_str) if pdf_path_str else None
+
+                if not docx_path.exists():
+                    print("❌ No se generó el DOCX de salida")
+                    sys.exit(1)
+
+                if str(docx_path) != output_docx:
+                    shutil.copy2(docx_path, output_docx)
+
+                category = classify_content_intelligent(summary_content)
+                category_name = TEMATIC_FOLDERS.get(category, TEMATIC_FOLDERS["otras_clases"])
+                topics = extract_key_topics_from_text(summary_content)
+                topics_str = ' - '.join(topics[:2])
+
+                print(f"✅ Resumen unificado generado: {output_docx}")
+                print(f"🧠 Clasificación automática: {category_name}")
+                print(f"🎯 Temas identificados: {topics_str}")
+                print(f"📝 Nombre inteligente: {output_files.get('docx_name')}")
+                if markdown_path and markdown_path.exists():
+                    print(f"📄 Markdown: {markdown_path}")
+                if pdf_path and pdf_path.exists():
+                    print(f"📄 PDF: {pdf_path}")
+            else:
+                print("❌ Failed to generate unified summary")
+                sys.exit(1)
+        elif command == "quiz" and len(sys.argv) == 4:
+            # Quiz generation mode
+            input_text = sys.argv[2]
+            output_file = sys.argv[3]
+
+            # If the first argument is a file, read it
+            if os.path.isfile(input_text):
+                with open(input_text, 'r', encoding='utf-8') as f:
+                    summary_text = f.read()
+            else:
+                summary_text = input_text
+
+            # Generate quiz
+            questions, answers = generate_quiz(summary_text)
+
+            if not questions or not answers:
+                print("❌ Could not generate quiz")
+                sys.exit(1)
+
+            # Create document
+            doc = Document()
+            doc.add_heading('Quiz Generado', level=1)
+
+            # Add quiz to document
+            add_quiz_to_docx(doc, questions, answers)
+
+            # Save file
+            doc.save(output_file)
+            print(f"✅ Quiz generated: {output_file}")
+        elif command == "dashboard-only":
+            # Solo ejecutar el dashboard
+            import dashboard
+            logging.info("🚀 Iniciando Dashboard Flask únicamente")
+            dashboard.app.run(host='0.0.0.0', port=5000, debug=False, threaded=True)
+        else:
+            print("Usage:")
+            print("  python main.py                           # Run main polling service + dashboard")
+            print("  python main.py whisper <audio> <txt>     # Transcribe audio")
+            print("  python main.py pdf <pdf> <docx>          # Process PDF to editable DOCX")
+            print("  python main.py seed-processed            # Marca archivos actuales como procesados")
+            print("  python main.py txt2docx <txt> <docx>     # Convert text to summary DOCX")
+            print("  python main.py quiz <text> <docx>        # Generate quiz from text")
+            print("  python main.py dashboard-only            # Solo ejecutar dashboard")
+            sys.exit(1)
+    else:
+        # Run main polling service with integrated dashboard
+        logging.info("=" * 60)
+        logging.info("🚀 INICIANDO SERVICIO COMPLETO")
+        logging.info("=" * 60)
+
+        # Iniciar dashboard en hilo separado
+        dashboard_thread = start_dashboard()
+
+        # Pequeña pausa para que el dashboard se inicie
+        time.sleep(2)
+
+        # Ejecutar servicio principal
+        main()
diff --git a/requirements-dashboard.txt b/requirements-dashboard.txt
new file mode 100644
index 0000000..d9014be
--- /dev/null
+++ b/requirements-dashboard.txt
@@ -0,0 +1,18 @@
+Flask==2.3.3
+Flask-CORS==4.0.0
+requests==2.31.0
+webdavclient3
+opencv-python-headless
+python-dotenv
+easyocr
+pytesseract
+Pillow
+python-docx
+openai
+ollama
+pdf2image
+transformers
+pypdf
+reportlab
+torch
+numpy
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100755
index 0000000..3e8461c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,17 @@
+webdavclient3
+requests
+torch
+openai-whisper
+pytesseract
+Pillow
+python-docx
+openai
+ollama
+pdf2image
+easyocr
+opencv-python-headless
+numpy
+transformers
+pypdf
+python-dotenv
+reportlab
diff --git a/start_with_info.py b/start_with_info.py
new file mode 100755
index 0000000..9ce635e
--- /dev/null
+++ b/start_with_info.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+"""
+Script mejorado para iniciar el servicio completo con mejor información
+"""
+
+import sys
+import os
+import time
+
+# Añadir path
+sys.path.append('/home/ren/cbc')
+
+def print_banner():
+    """Muestra banner de inicio"""
+    print("\n" + "=" * 70)
+    print("🚀 NEXTCLOUD AI SERVICE - DASHBOARD INTEGRADO")
+    print("=" * 70)
+    print()
+
+def print_step(step, message):
+    """Imprime un paso con formato"""
+    print(f"  [{step}] {message}")
+
+def check_dependencies():
+    """Verifica dependencias"""
+    print_step("1", "Verificando dependencias...")
+
+    try:
+        import flask
+        print_step("  ✓", f"Flask {flask.__version__}")
+    except ImportError:
+        print_step("  ✗", "Flask no está instalado")
+        print("\n  💡 Instalar: pip3 install flask flask-cors")
+        return False
+
+    try:
+        import flask_cors
+        print_step("  ✓", "Flask-CORS")
+    except ImportError:
+        print_step("  ✗", "Flask-CORS no está instalado")
+        print("\n  💡 Instalar: pip3 install flask-cors")
+        return False
+
+    return True
+
+def check_dashboard():
+    """Verifica dashboard"""
+    print_step("2", "Verificando dashboard...")
+
+    try:
+        import dashboard
+        print_step("  ✓", "Dashboard importado correctamente")
+        return True
+    except Exception as e:
+        print_step("  ✗", f"Error importando dashboard: {e}")
+        return False
+
+def check_main():
+    """Verifica main.py"""
+    print_step("3", "Verificando servicio principal...")
+
+    try:
+        import main
+        print_step("  ✓", "Servicio principal importado")
+        return True
+    except Exception as e:
+        print_step("  ✗", f"Error importando servicio principal: {e}")
+        return False
+
+def start_dashboard():
+    """Inicia dashboard en hilo separado"""
+    print_step("4", "Iniciando dashboard en hilo separado...")
+
+    try:
+        import dashboard
+        import threading
+
+        def run_dashboard():
+            """Función para ejecutar dashboard"""
+            print("\n  🌐 Iniciando servidor Flask...")
+            dashboard.app.run(
+                host='0.0.0.0',
+                port=5000,
+                debug=False,
+                threaded=True,
+                use_reloader=False
+            )
+
+        # Crear hilo
+        dashboard_thread = threading.Thread(target=run_dashboard, daemon=True)
+        dashboard_thread.start()
+
+        # Dar tiempo para que inicie
+        time.sleep(2)
+
+        print_step("  ✓", "Dashboard iniciado")
+        print("\n  📱 Dashboard disponible en:")
+        print("     http://localhost:5000")
+        print()
+
+        return dashboard_thread
+
+    except Exception as e:
+        print_step("  ✗", f"Error iniciando dashboard: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+def start_main_service():
+    """Inicia servicio principal"""
+    print_step("5", "Iniciando servicio principal...")
+
+    try:
+        import main
+
+        print("\n  ⏳ Iniciando bucle principal...")
+        print("  📊 El servicio procesará archivos automáticamente")
+        print("  ⏹️  Para detener: Ctrl+C")
+        print()
+        print("=" * 70)
+
+        # Iniciar servicio
+        main.main()
+
+    except KeyboardInterrupt:
+        print("\n\n🛑 Servicio detenido por el usuario")
+    except Exception as e:
+        print(f"\n\n❌ Error en servicio principal: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+    return True
+
+def main():
+    """Función principal"""
+    print_banner()
+
+    # Verificar todo
+    if not check_dependencies():
+        sys.exit(1)
+
+    if not check_dashboard():
+        sys.exit(1)
+
+    if not check_main():
+        sys.exit(1)
+
+    # Iniciar dashboard
+    dashboard_thread = start_dashboard()
+    if not dashboard_thread:
+        print("\n⚠️  Continuando sin dashboard...")
+
+    # Iniciar servicio principal
+    start_main_service()
+
+if __name__ == '__main__':
+    main()
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..241eae0
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,1586 @@
+<!DOCTYPE html>
+<html lang="es">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Dashboard de Gestión de Audio</title>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
+    <style>
+        :root {
+            /* Dark Mode Colors */
+            --bg-primary: #0f1115;
+            --bg-secondary: #1a1d24;
+            --bg-tertiary: #222734;
+            --bg-hover: #2a3042;
+            --text-primary: #e6e9ef;
+            --text-secondary: #a4a9b6;
+            --text-tertiary: #7c8293;
+            --border-color: #2a3042;
+            --accent-color: #646cff;
+            --accent-hover: #7c83ff;
+            --success-color: #3ddc84;
+            --warning-color: #f9a826;
+            --error-color: #ff6b6b;
+            --shadow-sm: 0 2px 8px rgba(0, 0, 0, 0.3);
+            --shadow-md: 0 4px 16px rgba(0, 0, 0, 0.4);
+            --shadow-lg: 0 8px 32px rgba(0, 0, 0, 0.5);
+            --shadow-xl: 0 12px 48px rgba(0, 0, 0, 0.6);
+            --glass-bg: rgba(255, 255, 255, 0.03);
+        }
+
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            background: var(--bg-primary);
+            color: var(--text-primary);
+            line-height: 1.6;
+            -webkit-font-smoothing: antialiased;
+            -moz-osx-font-smoothing: grayscale;
+            min-height: 100vh;
+            overflow-x: hidden;
+        }
+
+        /* Animated background particles */
+        body::before {
+            content: '';
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background:
+                radial-gradient(circle at 20% 30%, rgba(100, 108, 255, 0.1) 0%, transparent 50%),
+                radial-gradient(circle at 80% 70%, rgba(61, 220, 132, 0.08) 0%, transparent 50%),
+                radial-gradient(circle at 50% 50%, rgba(249, 168, 38, 0.05) 0%, transparent 50%);
+            pointer-events: none;
+            z-index: 0;
+            animation: backgroundShift 20s ease-in-out infinite;
+        }
+
+        @keyframes backgroundShift {
+            0%, 100% { opacity: 1; }
+            50% { opacity: 0.8; }
+        }
+
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+            padding: 60px 20px;
+            position: relative;
+            z-index: 1;
+            animation: fadeIn 0.6s ease-out;
+        }
+
+        @keyframes fadeIn {
+            from {
+                opacity: 0;
+                transform: translateY(20px);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+
+        .header {
+            background: var(--bg-secondary);
+            border-radius: 24px;
+            padding: 50px;
+            margin-bottom: 40px;
+            box-shadow: var(--shadow-lg);
+            border: 1px solid var(--border-color);
+            position: relative;
+            overflow: hidden;
+            animation: slideInDown 0.8s cubic-bezier(0.34, 1.56, 0.64, 1);
+            transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
+        }
+
+        .header::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: 0;
+            right: 0;
+            height: 3px;
+            background: linear-gradient(90deg, var(--accent-color), var(--success-color), var(--warning-color));
+            background-size: 200% 100%;
+            animation: gradientMove 3s linear infinite;
+        }
+
+        @keyframes gradientMove {
+            0% { background-position: 0% 0%; }
+            100% { background-position: 200% 0%; }
+        }
+
+        .header:hover {
+            box-shadow: var(--shadow-xl);
+            border-color: var(--accent-color);
+            transform: translateY(-4px);
+        }
+
+        .header h1 {
+            color: var(--text-primary);
+            font-size: 3.2rem;
+            font-weight: 800;
+            margin-bottom: 15px;
+            display: flex;
+            align-items: center;
+            gap: 20px;
+            letter-spacing: -0.02em;
+            animation: fadeInUp 0.8s ease 0.2s backwards;
+        }
+
+        .header .icon {
+            font-size: 4rem;
+            animation: float 3s ease-in-out infinite, fadeIn 1s ease;
+            filter: drop-shadow(0 0 20px rgba(100, 108, 255, 0.5));
+        }
+
+        @keyframes float {
+            0%, 100% { transform: translateY(0px) rotate(0deg); }
+            33% { transform: translateY(-8px) rotate(1deg); }
+            66% { transform: translateY(-4px) rotate(-1deg); }
+        }
+
+        .header p {
+            color: var(--text-secondary);
+            font-size: 1.3rem;
+            font-weight: 400;
+            margin-top: 10px;
+            animation: fadeInUp 0.8s ease 0.4s backwards;
+        }
+
+        @keyframes slideInDown {
+            from {
+                opacity: 0;
+                transform: translateY(-60px) scale(0.95);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0) scale(1);
+            }
+        }
+
+        @keyframes fadeInUp {
+            from {
+                opacity: 0;
+                transform: translateY(30px);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+
+        .stats {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
+            gap: 25px;
+            margin-bottom: 40px;
+        }
+
+        .stat-card {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border-color);
+            border-radius: 20px;
+            padding: 35px;
+            text-align: center;
+            box-shadow: var(--shadow-md);
+            transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
+            position: relative;
+            overflow: hidden;
+            animation: fadeInUp 0.6s ease backwards;
+        }
+
+        .stat-card::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: 0;
+            right: 0;
+            height: 4px;
+            background: linear-gradient(90deg, var(--accent-color), var(--accent-hover));
+            transform: translateX(-100%);
+            transition: transform 0.4s ease;
+        }
+
+        .stat-card:nth-child(2)::before {
+            background: linear-gradient(90deg, var(--success-color), #5ff3a0);
+        }
+
+        .stat-card:nth-child(3)::before {
+            background: linear-gradient(90deg, var(--warning-color), #ffb84d);
+        }
+
+        .stat-card:hover::before {
+            transform: translateX(0);
+        }
+
+        .stat-card:nth-child(1) { animation-delay: 0.1s; }
+        .stat-card:nth-child(2) { animation-delay: 0.2s; }
+        .stat-card:nth-child(3) { animation-delay: 0.3s; }
+
+        .stat-card:hover {
+            transform: translateY(-8px) scale(1.02);
+            box-shadow: var(--shadow-xl);
+            border-color: var(--accent-color);
+        }
+
+        .stat-number {
+            font-size: 3.5rem;
+            font-weight: 900;
+            margin-bottom: 10px;
+            background: linear-gradient(135deg, var(--text-primary) 0%, var(--accent-color) 100%);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            background-clip: text;
+            animation: countUp 1s ease 0.5s backwards;
+            text-shadow: 0 0 30px rgba(100, 108, 255, 0.3);
+        }
+
+        @keyframes countUp {
+            from {
+                transform: scale(0.5);
+                opacity: 0;
+                filter: blur(10px);
+            }
+            to {
+                transform: scale(1);
+                opacity: 1;
+                filter: blur(0);
+            }
+        }
+
+        .stat-label {
+            color: var(--text-secondary);
+            font-size: 1rem;
+            font-weight: 600;
+            text-transform: uppercase;
+            letter-spacing: 0.15em;
+        }
+
+        .controls {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border-color);
+            border-radius: 20px;
+            padding: 35px;
+            margin-bottom: 40px;
+            box-shadow: var(--shadow-md);
+            display: flex;
+            gap: 15px;
+            flex-wrap: wrap;
+            animation: fadeIn 0.8s ease 0.5s backwards;
+            transition: all 0.3s ease;
+        }
+
+        .controls:hover {
+            border-color: var(--accent-color);
+            box-shadow: var(--shadow-lg);
+        }
+
+        .controls button {
+            background: var(--bg-tertiary);
+            color: var(--text-primary);
+            border: 1px solid var(--border-color);
+            padding: 14px 30px;
+            border-radius: 14px;
+            cursor: pointer;
+            font-size: 1rem;
+            font-weight: 600;
+            font-family: 'Inter', sans-serif;
+            transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+            position: relative;
+            overflow: hidden;
+        }
+
+        .controls button::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: -100%;
+            width: 100%;
+            height: 100%;
+            background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.1), transparent);
+            transition: left 0.5s;
+        }
+
+        .controls button:hover::before {
+            left: 100%;
+        }
+
+        .controls button:hover {
+            background: var(--accent-color);
+            border-color: var(--accent-color);
+            color: white;
+            transform: translateY(-3px);
+            box-shadow: 0 8px 24px rgba(100, 108, 255, 0.4);
+        }
+
+        .controls button:active {
+            transform: translateY(-1px);
+        }
+
+        .controls button:disabled {
+            opacity: 0.5;
+            cursor: not-allowed;
+            transform: none;
+        }
+
+        .refresh-btn {
+            background: linear-gradient(135deg, var(--success-color) 0%, #5ff3a0 100%);
+            border: none;
+            color: #000;
+            font-weight: 700;
+        }
+
+        .refresh-btn:hover {
+            background: linear-gradient(135deg, #5ff3a0 0%, var(--success-color) 100%);
+            box-shadow: 0 8px 24px rgba(61, 220, 132, 0.5);
+            color: #000;
+        }
+
+        #reprocessAllBtn {
+            background: linear-gradient(135deg, var(--accent-color) 0%, var(--accent-hover) 100%);
+            border: none;
+            color: white;
+            font-weight: 700;
+        }
+
+        #reprocessAllBtn:hover {
+            box-shadow: 0 8px 24px rgba(100, 108, 255, 0.5);
+            color: white;
+        }
+
+        .files-container {
+            background: var(--bg-secondary);
+            border: 1px solid var(--border-color);
+            border-radius: 24px;
+            padding: 45px;
+            box-shadow: var(--shadow-lg);
+            position: relative;
+            animation: fadeIn 1s ease 0.6s backwards;
+            transition: all 0.3s ease;
+        }
+
+        .files-container:hover {
+            border-color: var(--accent-color);
+            box-shadow: var(--shadow-xl);
+        }
+
+        .files-header {
+            margin-bottom: 30px;
+            padding-bottom: 25px;
+            border-bottom: 1px solid var(--border-color);
+        }
+
+        .header-title h2 {
+            color: var(--text-primary);
+            font-size: 2.2rem;
+            font-weight: 800;
+            margin: 0;
+            letter-spacing: -0.02em;
+            animation: fadeInLeft 0.6s ease 0.7s backwards;
+        }
+
+        .header-controls {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            gap: 20px;
+            margin-top: 25px;
+            flex-wrap: wrap;
+        }
+
+        .source-filters {
+            display: flex;
+            gap: 15px;
+            align-items: center;
+            animation: fadeInLeft 0.6s ease 0.8s backwards;
+        }
+
+        .filter-checkbox {
+            display: flex;
+            align-items: center;
+            cursor: pointer;
+            user-select: none;
+            font-size: 0.95rem;
+            color: var(--text-secondary);
+            font-weight: 600;
+            transition: all 0.3s ease;
+            padding: 10px 20px;
+            border-radius: 12px;
+            background: var(--bg-tertiary);
+            border: 1px solid var(--border-color);
+            position: relative;
+            overflow: hidden;
+        }
+
+        .filter-checkbox::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background: var(--accent-color);
+            opacity: 0;
+            transition: opacity 0.3s ease;
+            z-index: 0;
+        }
+
+        .filter-checkbox:hover::before {
+            opacity: 0.1;
+        }
+
+        .filter-checkbox:hover {
+            color: var(--text-primary);
+            border-color: var(--accent-color);
+            transform: translateY(-2px);
+            box-shadow: var(--shadow-md);
+        }
+
+        .filter-checkbox span {
+            position: relative;
+            z-index: 1;
+        }
+
+        .filter-checkbox input[type="checkbox"] {
+            display: none;
+        }
+
+        .filter-checkbox .checkmark {
+            width: 22px;
+            height: 22px;
+            border: 2px solid var(--border-color);
+            border-radius: 8px;
+            margin-right: 10px;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            transition: all 0.3s ease;
+            background: var(--bg-secondary);
+            position: relative;
+            z-index: 1;
+        }
+
+        .filter-checkbox:hover .checkmark {
+            border-color: var(--accent-color);
+            transform: scale(1.1);
+            box-shadow: 0 0 15px rgba(100, 108, 255, 0.3);
+        }
+
+        .filter-checkbox input[type="checkbox"]:checked + .checkmark {
+            background: var(--accent-color);
+            border-color: var(--accent-color);
+            animation: checkMarkPop 0.3s ease;
+        }
+
+        @keyframes checkMarkPop {
+            0% { transform: scale(0); }
+            50% { transform: scale(1.3); }
+            100% { transform: scale(1); }
+        }
+
+        .filter-checkbox input[type="checkbox"]:checked + .checkmark::after {
+            content: '✓';
+            color: white;
+            font-size: 14px;
+            font-weight: bold;
+            animation: checkMarkBounce 0.3s ease;
+        }
+
+        @keyframes checkMarkBounce {
+            0% { transform: scale(0) rotate(-180deg); }
+            50% { transform: scale(1.2) rotate(-90deg); }
+            100% { transform: scale(1) rotate(0deg); }
+        }
+
+        .sort-checkbox {
+            display: flex;
+            align-items: center;
+            cursor: pointer;
+            user-select: none;
+            font-size: 0.95rem;
+            color: var(--text-secondary);
+            font-weight: 600;
+            transition: all 0.3s ease;
+            padding: 10px 20px;
+            border-radius: 12px;
+            background: var(--bg-tertiary);
+            border: 1px solid var(--border-color);
+            position: relative;
+            overflow: hidden;
+        }
+
+        .sort-checkbox::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background: var(--success-color);
+            opacity: 0;
+            transition: opacity 0.3s ease;
+            z-index: 0;
+        }
+
+        .sort-checkbox:hover::before {
+            opacity: 0.1;
+        }
+
+        .sort-checkbox:hover {
+            color: var(--text-primary);
+            border-color: var(--success-color);
+            transform: translateY(-2px);
+            box-shadow: var(--shadow-md);
+        }
+
+        .sort-checkbox span {
+            position: relative;
+            z-index: 1;
+        }
+
+        .sort-checkbox input[type="radio"] {
+            display: none;
+        }
+
+        .sort-checkbox .checkmark {
+            width: 22px;
+            height: 22px;
+            border: 2px solid var(--border-color);
+            border-radius: 50%;
+            margin-right: 10px;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            transition: all 0.3s ease;
+            background: var(--bg-secondary);
+            position: relative;
+            z-index: 1;
+        }
+
+        .sort-checkbox:hover .checkmark {
+            border-color: var(--success-color);
+            transform: scale(1.1);
+            box-shadow: 0 0 15px rgba(61, 220, 132, 0.3);
+        }
+
+        .sort-checkbox input[type="radio"]:checked + .checkmark {
+            background: var(--success-color);
+            border-color: var(--success-color);
+            animation: checkMarkPop 0.3s ease;
+        }
+
+        .sort-checkbox input[type="radio"]:checked + .checkmark::after {
+            content: '';
+            width: 10px;
+            height: 10px;
+            background: white;
+            border-radius: 50%;
+            animation: checkMarkBounce 0.3s ease;
+        }
+
+        @keyframes checkMarkBounce {
+            0% { transform: scale(0); opacity: 0; }
+            50% { transform: scale(1.5); opacity: 1; }
+            100% { transform: scale(1); opacity: 1; }
+        }
+
+        .search-box {
+            background: var(--bg-tertiary);
+            border: 2px solid var(--border-color);
+            border-radius: 14px;
+            padding: 14px 22px;
+            font-size: 1rem;
+            width: 350px;
+            color: var(--text-primary);
+            font-family: 'Inter', sans-serif;
+            transition: all 0.3s ease;
+            animation: fadeInRight 0.6s ease 0.9s backwards;
+        }
+
+        .search-box::placeholder {
+            color: var(--text-tertiary);
+        }
+
+        .search-box:focus {
+            outline: none;
+            border-color: var(--accent-color);
+            background: var(--bg-secondary);
+            box-shadow: 0 0 0 4px rgba(100, 108, 255, 0.15), var(--shadow-md);
+            transform: translateY(-2px);
+        }
+
+        .files-grid {
+            display: grid;
+            gap: 18px;
+        }
+
+        .file-card {
+            background: var(--bg-tertiary);
+            border: 1px solid var(--border-color);
+            border-radius: 16px;
+            padding: 25px;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
+            position: relative;
+            overflow: hidden;
+            animation: fadeInLeft 0.6s ease backwards;
+            cursor: pointer;
+        }
+
+        .file-card::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 4px;
+            height: 100%;
+            background: var(--accent-color);
+            transform: scaleY(0);
+            transition: transform 0.3s ease;
+            transform-origin: bottom;
+        }
+
+        .file-card:hover::before {
+            transform: scaleY(1);
+        }
+
+        .file-card:nth-child(odd) { animation-delay: 0.1s; }
+        .file-card:nth-child(even) { animation-delay: 0.2s; }
+
+        @keyframes fadeInLeft {
+            from {
+                opacity: 0;
+                transform: translateX(-50px);
+            }
+            to {
+                opacity: 1;
+                transform: translateX(0);
+            }
+        }
+
+        .file-card:hover {
+            border-color: var(--accent-color);
+            box-shadow: var(--shadow-xl);
+            transform: translateY(-5px) scale(1.01);
+            background: var(--bg-hover);
+        }
+
+        .file-info {
+            flex: 1;
+        }
+
+        .file-name {
+            font-size: 1.2rem;
+            font-weight: 600;
+            color: var(--text-primary);
+            margin-bottom: 8px;
+            letter-spacing: -0.01em;
+        }
+
+        .file-meta {
+            display: flex;
+            gap: 18px;
+            flex-wrap: wrap;
+            font-size: 0.9rem;
+            color: var(--text-tertiary);
+        }
+
+        .file-formats {
+            margin-top: 12px;
+            display: flex;
+            flex-wrap: wrap;
+            gap: 6px;
+        }
+
+        .file-actions {
+            display: flex;
+            gap: 12px;
+            align-items: center;
+        }
+
+        .status-badge {
+            padding: 8px 16px;
+            border-radius: 20px;
+            font-size: 0.8rem;
+            font-weight: 700;
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+            animation: pulse 2s ease infinite;
+        }
+
+        @keyframes pulse {
+            0%, 100% { transform: scale(1); opacity: 1; }
+            50% { transform: scale(1.05); opacity: 0.9; }
+        }
+
+        .status-processed {
+            background: rgba(61, 220, 132, 0.15);
+            color: var(--success-color);
+            border: 1px solid rgba(61, 220, 132, 0.3);
+        }
+
+        .status-pending {
+            background: rgba(249, 168, 38, 0.15);
+            color: var(--warning-color);
+            border: 1px solid rgba(249, 168, 38, 0.3);
+        }
+
+        .action-btn {
+            background: linear-gradient(135deg, var(--accent-color) 0%, var(--accent-hover) 100%);
+            color: white;
+            border: none;
+            padding: 10px 20px;
+            border-radius: 12px;
+            cursor: pointer;
+            font-size: 0.9rem;
+            font-weight: 600;
+            font-family: 'Inter', sans-serif;
+            transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
+            position: relative;
+            overflow: hidden;
+        }
+
+        .action-btn::after {
+            content: '';
+            position: absolute;
+            top: 50%;
+            left: 50%;
+            width: 0;
+            height: 0;
+            background: rgba(255, 255, 255, 0.3);
+            border-radius: 50%;
+            transform: translate(-50%, -50%);
+            transition: width 0.6s, height 0.6s;
+        }
+
+        .action-btn:active::after {
+            width: 300px;
+            height: 300px;
+        }
+
+        .action-btn:hover {
+            transform: translateY(-3px);
+            box-shadow: 0 8px 25px rgba(100, 108, 255, 0.5);
+        }
+
+        .action-btn:disabled {
+            background: var(--bg-hover);
+            border: 1px solid var(--border-color);
+            cursor: not-allowed;
+            transform: none;
+            box-shadow: none;
+        }
+
+        .action-btn.reset {
+            background: linear-gradient(135deg, var(--warning-color) 0%, #ffb84d 100%);
+            color: #000;
+        }
+
+        .action-btn.reset:hover {
+            box-shadow: 0 8px 25px rgba(249, 168, 38, 0.5);
+        }
+
+        .source-badge {
+            padding: 5px 12px;
+            border-radius: 14px;
+            font-size: 0.75rem;
+            font-weight: 700;
+            text-transform: uppercase;
+            letter-spacing: 0.05em;
+        }
+
+        .source-webdav {
+            background: rgba(100, 108, 255, 0.15);
+            color: var(--accent-color);
+            border: 1px solid rgba(100, 108, 255, 0.3);
+        }
+
+        .source-local {
+            background: rgba(61, 220, 132, 0.15);
+            color: var(--success-color);
+            border: 1px solid rgba(61, 220, 132, 0.3);
+        }
+
+        .loading {
+            display: none;
+            text-align: center;
+            padding: 60px;
+            color: var(--text-secondary);
+            animation: fadeIn 0.3s ease;
+        }
+
+        .loading.active {
+            display: block;
+        }
+
+        .spinner {
+            display: inline-block;
+            width: 60px;
+            height: 60px;
+            border: 4px solid var(--border-color);
+            border-top: 4px solid var(--accent-color);
+            border-radius: 50%;
+            animation: spin 1s linear infinite, glow 2s ease-in-out infinite;
+        }
+
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+
+        @keyframes glow {
+            0%, 100% { box-shadow: 0 0 20px rgba(100, 108, 255, 0.3); }
+            50% { box-shadow: 0 0 40px rgba(100, 108, 255, 0.6); }
+        }
+
+        .loading p {
+            margin-top: 20px;
+            font-size: 1.2rem;
+            font-weight: 600;
+        }
+
+        .error, .success {
+            padding: 20px 24px;
+            border-radius: 14px;
+            margin: 20px 0;
+            font-weight: 600;
+            border: 1px solid;
+            animation: slideInRight 0.5s ease;
+            backdrop-filter: blur(10px);
+        }
+
+        @keyframes slideInRight {
+            from {
+                opacity: 0;
+                transform: translateX(100px);
+            }
+            to {
+                opacity: 1;
+                transform: translateX(0);
+            }
+        }
+
+        .error {
+            background: rgba(255, 107, 107, 0.1);
+            border-color: var(--error-color);
+            color: var(--error-color);
+        }
+
+        .success {
+            background: rgba(61, 220, 132, 0.1);
+            border-color: var(--success-color);
+            color: var(--success-color);
+        }
+
+        .format-link {
+            display: inline-block;
+            padding: 6px 12px;
+            margin: 3px 6px 3px 0;
+            background: var(--bg-secondary);
+            color: var(--text-secondary);
+            text-decoration: none;
+            border-radius: 10px;
+            font-size: 0.8rem;
+            font-weight: 600;
+            transition: all 0.3s ease;
+            border: 1px solid var(--border-color);
+            position: relative;
+            overflow: hidden;
+        }
+
+        .format-link::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: -100%;
+            width: 100%;
+            height: 100%;
+            background: var(--accent-color);
+            transition: left 0.3s ease;
+            z-index: 0;
+        }
+
+        .format-link:hover::before {
+            left: 0;
+        }
+
+        .format-link:hover {
+            color: white;
+            border-color: var(--accent-color);
+            text-decoration: none;
+            transform: translateY(-3px);
+            box-shadow: var(--shadow-md);
+        }
+
+        .format-link span {
+            position: relative;
+            z-index: 1;
+        }
+
+        @keyframes fadeInRight {
+            from {
+                opacity: 0;
+                transform: translateX(50px);
+            }
+            to {
+                opacity: 1;
+                transform: translateX(0);
+            }
+        }
+
+        @media (max-width: 768px) {
+            .container {
+                padding: 30px 15px;
+            }
+
+            .header {
+                padding: 30px;
+            }
+
+            .header h1 {
+                font-size: 2.2rem;
+            }
+
+            .stats {
+                grid-template-columns: 1fr;
+            }
+
+            .stat-card {
+                padding: 25px;
+            }
+
+            .stat-number {
+                font-size: 2.5rem;
+            }
+
+            .header-controls {
+                flex-direction: column;
+                align-items: stretch;
+                gap: 15px;
+            }
+
+            .source-filters {
+                justify-content: center;
+                flex-wrap: wrap;
+            }
+
+            .search-box {
+                width: 100%;
+            }
+
+            .file-card {
+                flex-direction: column;
+                align-items: stretch;
+                gap: 15px;
+            }
+
+            .file-actions {
+                justify-content: center;
+            }
+
+            .controls {
+                flex-direction: column;
+            }
+
+            .controls button {
+                width: 100%;
+            }
+        }
+
+        @media (prefers-reduced-motion: reduce) {
+            *,
+            *::before,
+            *::after {
+                animation-duration: 0.01ms !important;
+                animation-iteration-count: 1 !important;
+                transition-duration: 0.01ms !important;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <!-- Header -->
+        <div class="header">
+            <h1>
+                <span class="icon">🎵</span>
+                Dashboard de Gestión de Audio
+            </h1>
+            <p>Monitorea y reprocesa archivos de audio con un solo click</p>
+        </div>
+
+        <!-- Estadísticas -->
+        <div class="stats">
+            <div class="stat-card">
+                <div class="stat-number total" id="totalFiles">-</div>
+                <div class="stat-label">Total Archivos</div>
+            </div>
+            <div class="stat-card">
+                <div class="stat-number processed" id="processedFiles">-</div>
+                <div class="stat-label">Procesados</div>
+            </div>
+            <div class="stat-card">
+                <div class="stat-number pending" id="pendingFiles">-</div>
+                <div class="stat-label">Pendientes</div>
+            </div>
+        </div>
+
+        <!-- Controles -->
+        <div class="controls">
+            <button onclick="refreshFiles()" class="refresh-btn">
+                🔄 Refrescar Lista
+            </button>
+            <button onclick="reprocessAllPending()" id="reprocessAllBtn">
+                ⚡ Reprocesar Todos los Pendientes
+            </button>
+        </div>
+
+        <!-- Mensajes -->
+        <div id="messages"></div>
+
+        <!-- Cargando -->
+        <div class="loading" id="loading">
+            <div class="spinner"></div>
+            <p>Cargando archivos...</p>
+        </div>
+
+        <!-- Lista de Archivos -->
+        <div class="files-container">
+            <div class="files-header">
+                <div class="header-title">
+                    <h2>📁 Archivos de Audio</h2>
+                </div>
+                <div class="header-controls">
+                    <div class="source-filters">
+                        <label class="filter-checkbox">
+                            <input type="checkbox" id="filterLocal" checked onchange="filterFiles()">
+                            <span class="checkmark"></span>
+                            <span>Local</span>
+                        </label>
+                        <label class="filter-checkbox">
+                            <input type="checkbox" id="filterWebDAV" onchange="filterFiles()">
+                            <span class="checkmark"></span>
+                            <span>WebDAV</span>
+                        </label>
+                        <div style="width: 2px; height: 30px; background: var(--border-color); margin: 0 10px;"></div>
+                        <label class="sort-checkbox">
+                            <input type="radio" id="sortDate" name="sortBy" value="date" checked onchange="filterFiles()">
+                            <span class="checkmark"></span>
+                            <span>📅 Fecha</span>
+                        </label>
+                        <label class="sort-checkbox">
+                            <input type="radio" id="sortName" name="sortBy" value="name" onchange="filterFiles()">
+                            <span class="checkmark"></span>
+                            <span>🔤 Nombre</span>
+                        </label>
+                    </div>
+                    <input type="text" class="search-box" id="searchBox" placeholder="🔍 Buscar archivos..." onkeyup="filterFiles()">
+                </div>
+            </div>
+            <div class="files-grid" id="filesGrid">
+                <!-- Los archivos se cargarán aquí dinámicamente -->
+            </div>
+        </div>
+    </div>
+
+    <script>
+        let files = [];
+        let loading = false;
+
+        // Inicialización
+        document.addEventListener('DOMContentLoaded', function() {
+            loadFiles();
+        });
+
+        // Cargar archivos desde la API
+        async function loadFiles() {
+            setLoading(true);
+            try {
+                const response = await fetch('/api/files');
+                const data = await response.json();
+
+                if (data.success) {
+                    files = data.files;
+                    updateStats(data);
+                    renderFiles();
+                    hideMessages();
+                } else {
+                    showMessage('Error cargando archivos: ' + data.message, 'error');
+                }
+            } catch (error) {
+                showMessage('Error de conexión con el servidor', 'error');
+                console.error('Error:', error);
+            } finally {
+                setLoading(false);
+            }
+        }
+
+        // Obtener estadísticas actuales de los archivos locales
+        function getCurrentStats() {
+            const total = files.length;
+            const processed = files.filter(f => f.processed).length;
+            const pending = total - processed;
+            return { total, processed, pending };
+        }
+
+        // Actualizar estadísticas
+        function updateStats(data) {
+            document.getElementById('totalFiles').textContent = data.total;
+            document.getElementById('processedFiles').textContent = data.processed;
+            document.getElementById('pendingFiles').textContent = data.pending;
+        }
+
+        // Renderizar archivos
+        function renderFiles() {
+            const grid = document.getElementById('filesGrid');
+            const filteredFiles = filterFilesArray();
+
+            if (filteredFiles.length === 0) {
+                grid.innerHTML = '<div style="text-align: center; padding: 40px; color: var(--text-tertiary); font-size: 1.1rem; animation: fadeIn 0.5s ease;">No se encontraron archivos</div>';
+                return;
+            }
+
+            grid.innerHTML = filteredFiles.map(file => {
+                const formats = file.available_formats || {};
+                const availableFormatsList = Object.keys(formats).filter(ext => formats[ext]);
+
+                const formatLinks = availableFormatsList.length > 0
+                    ? availableFormatsList.map(ext => {
+                        const baseName = file.filename.substring(0, file.filename.lastIndexOf('.'));
+                        const possibleNames = [
+                            `${baseName}.${ext}`,
+                            `${baseName}_unificado.${ext}`,
+                            `${baseName.replace(/ /g, '_')}.${ext}`,
+                            `${baseName.replace(/ /g, '_')}_unificado.${ext}`
+                        ];
+                        const icon = ext === 'txt' ? '📝' : ext === 'docx' ? '📄' : ext === 'md' ? '📋' : '📑';
+                        return `<a href="/downloads/find-file?filename=${encodeURIComponent(baseName)}&ext=${ext}" target="_blank" class="format-link"><span>${icon} ${ext}</span></a>`;
+                    }).join(' ')
+                    : '<span style="color: var(--text-tertiary); font-size: 0.9rem;">no procesado aún</span>';
+
+                return `
+                    <div class="file-card" data-filename="${file.filename.toLowerCase()}">
+                        <div class="file-info">
+                            <div class="file-name">${file.filename}</div>
+                            <div class="file-meta">
+                                <span class="source-badge source-${file.source}">${file.source}</span>
+                                <span>📦 ${file.size}</span>
+                                <span>📅 ${file.last_modified}</span>
+                            </div>
+                            <div class="file-formats">
+                                ${formatLinks}
+                            </div>
+                        </div>
+                        <div class="file-actions">
+                            <span class="status-badge ${file.processed ? 'status-processed' : 'status-pending'}">
+                                ${file.processed ? 'Procesado' : 'Pendiente'}
+                            </span>
+                            ${!file.processed ? `
+                                <button class="action-btn" onclick="reprocessFile('${file.path}', '${file.source}')" id="btn-${file.filename.replace(/[^a-zA-Z0-9]/g, '')}">
+                                    🚀 Procesar
+                                </button>
+                            ` : `
+                                <button class="action-btn reset" onclick="markUnprocessed('${file.path}')" id="btn-${file.filename.replace(/[^a-zA-Z0-9]/g, '')}">
+                                    🔄 Resetear
+                                </button>
+                            `}
+                        </div>
+                    </div>
+                `;
+            }).join('');
+        }
+
+        // Filtrar archivos
+        function filterFiles() {
+            renderFiles();
+        }
+
+        function filterFilesArray() {
+            const searchTerm = document.getElementById('searchBox').value.toLowerCase();
+            const filterLocal = document.getElementById('filterLocal').checked;
+            const filterWebDAV = document.getElementById('filterWebDAV').checked;
+            const sortBy = document.querySelector('input[name="sortBy"]:checked')?.value || 'date';
+
+            let filteredFiles = files.filter(file => {
+                const matchesSearch = file.filename.toLowerCase().includes(searchTerm);
+                const matchesSource =
+                    (filterLocal && file.source === 'local') ||
+                    (filterWebDAV && file.source === 'webdav');
+
+                return matchesSearch && matchesSource;
+            });
+
+            // Ordenar archivos
+            if (sortBy === 'date') {
+                // Para archivos locales, convertir last_modified a fecha
+                // Para archivos webdav, usar last_modified como string
+                filteredFiles.sort((a, b) => {
+                    // Si ambos tienen last_modified
+                    if (a.last_modified !== 'Unknown' && b.last_modified !== 'Unknown') {
+                        // Si uno es local y otro webdav, comparar como fechas
+                        if (a.source === 'local' || b.source === 'local') {
+                            try {
+                                // Intentar convertir a timestamp
+                                const dateA = new Date(a.last_modified).getTime();
+                                const dateB = new Date(b.last_modified).getTime();
+                                return dateB - dateA; // Más reciente primero
+                            } catch (e) {
+                                // Si falla, ordenar alfabéticamente por fecha como string
+                                return b.last_modified.localeCompare(a.last_modified);
+                            }
+                        }
+                    }
+                    // Ordenar alfabéticamente como fallback
+                    return a.filename.localeCompare(b.filename);
+                });
+            } else if (sortBy === 'name') {
+                filteredFiles.sort((a, b) => a.filename.localeCompare(b.filename));
+            }
+
+            return filteredFiles;
+        }
+
+        // Actualizar estado de un archivo específico
+        function updateFileStatus(filePath, isProcessed) {
+            const file = files.find(f => f.path === filePath);
+            if (file) {
+                file.processed = isProcessed;
+                renderFiles();
+            }
+        }
+
+        // Reprocesar un archivo
+        async function reprocessFile(filePath, source) {
+            const file = files.find(f => f.path === filePath);
+            if (!file) return;
+
+            const formats = file.available_formats || {};
+            const existingFormats = Object.keys(formats).filter(ext => formats[ext]);
+
+            if (existingFormats.length > 0) {
+                const confirmed = await showConfirmDialog(
+                    `⚠️ Archivos existentes detectados`,
+                    `El archivo "${file.filename}" ya tiene los siguientes formatos: ${existingFormats.join(', ')}.\n\n` +
+                    `¿Estás seguro de que quieres reprocesarlo? Esto sobrescribirá los archivos existentes.`
+                );
+
+                if (!confirmed) {
+                    return;
+                }
+            }
+
+            await performReprocess(filePath, source);
+        }
+
+        // Función que ejecuta el reprocesamiento real
+        async function performReprocess(filePath, source) {
+            const btnId = 'btn-' + filePath.replace(/[^a-zA-Z0-9]/g, '');
+            const btn = document.getElementById(btnId);
+
+            if (!btn) return;
+
+            const originalText = btn.innerHTML;
+            btn.disabled = true;
+            btn.innerHTML = '⏳ Procesando...';
+
+            try {
+                const response = await fetch('/api/reprocess', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({
+                        path: filePath,
+                        source: source
+                    })
+                });
+
+                const data = await response.json();
+
+                if (data.success) {
+                    showMessage('✅ ' + data.message, 'success');
+                    setTimeout(async () => {
+                        await loadFiles();
+                        showMessage('🔄 Actualizando estado de archivos...', 'success');
+                    }, 2000);
+
+                } else {
+                    showMessage('❌ ' + data.message, 'error');
+                    btn.disabled = false;
+                    btn.innerHTML = originalText;
+                }
+            } catch (error) {
+                showMessage('❌ Error de conexión', 'error');
+                btn.disabled = false;
+                btn.innerHTML = originalText;
+                console.error('Error:', error);
+            }
+        }
+
+        // Marcar como no procesado
+        async function markUnprocessed(filePath) {
+            if (!confirm('¿Estás seguro de que quieres marcar este archivo como no procesado?')) {
+                return;
+            }
+
+            try {
+                const response = await fetch('/api/mark-unprocessed', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({
+                        path: filePath
+                    })
+                });
+
+                const data = await response.json();
+
+                if (data.success) {
+                    showMessage('✅ ' + data.message, 'success');
+                    updateFileStatus(filePath, false);
+                    updateStats(getCurrentStats());
+                } else {
+                    showMessage('❌ ' + data.message, 'error');
+                }
+            } catch (error) {
+                showMessage('❌ Error de conexión', 'error');
+                console.error('Error:', error);
+            }
+        }
+
+        // Refrescar archivos
+        async function refreshFiles() {
+            showMessage('🔄 Actualizando lista de archivos...', 'success');
+            await loadFiles();
+        }
+
+        // Reprocesar todos los pendientes
+        async function reprocessAllPending() {
+            const pendingFiles = files.filter(f => !f.processed);
+
+            if (pendingFiles.length === 0) {
+                showMessage('ℹ️ No hay archivos pendientes para procesar', 'success');
+                return;
+            }
+
+            if (!confirm(`¿Estás seguro de que quieres procesar ${pendingFiles.length} archivos pendientes?`)) {
+                return;
+            }
+
+            const btn = document.getElementById('reprocessAllBtn');
+            const originalText = btn.innerHTML;
+            btn.disabled = true;
+            btn.innerHTML = '⏳ Procesando...';
+
+            showMessage(`🚀 Iniciando procesamiento de ${pendingFiles.length} archivos...`, 'success');
+
+            for (let i = 0; i < pendingFiles.length; i++) {
+                const file = pendingFiles[i];
+                try {
+                    await reprocessFile(file.path, file.source);
+                    if (i < pendingFiles.length - 1) {
+                        await new Promise(resolve => setTimeout(resolve, 1000));
+                    }
+                } catch (error) {
+                    console.error(`Error procesando ${file.filename}:`, error);
+                }
+            }
+
+            btn.disabled = false;
+            btn.innerHTML = originalText;
+            showMessage('✅ Procesamiento en lote completado', 'success');
+        }
+
+        // Control de estado de carga
+        function setLoading(isLoading) {
+            loading = isLoading;
+            const loadingEl = document.getElementById('loading');
+            const gridEl = document.getElementById('filesGrid');
+
+            if (isLoading) {
+                loadingEl.classList.add('active');
+                gridEl.style.display = 'none';
+            } else {
+                loadingEl.classList.remove('active');
+                gridEl.style.display = 'block';
+            }
+        }
+
+        // Mostrar mensajes
+        function showMessage(message, type) {
+            const messagesEl = document.getElementById('messages');
+            const messageEl = document.createElement('div');
+            messageEl.className = type;
+            messageEl.textContent = message;
+
+            messagesEl.innerHTML = '';
+            messagesEl.appendChild(messageEl);
+
+            setTimeout(() => {
+                if (messagesEl.contains(messageEl)) {
+                    messagesEl.removeChild(messageEl);
+                }
+            }, 5000);
+        }
+
+        function hideMessages() {
+            document.getElementById('messages').innerHTML = '';
+        }
+
+        // Función para mostrar diálogo de confirmación
+        async function showConfirmDialog(title, message) {
+            return new Promise((resolve) => {
+                const modalOverlay = document.createElement('div');
+                modalOverlay.style.cssText = `
+                    position: fixed;
+                    top: 0;
+                    left: 0;
+                    width: 100%;
+                    height: 100%;
+                    background: rgba(0, 0, 0, 0.7);
+                    display: flex;
+                    justify-content: center;
+                    align-items: center;
+                    z-index: 10000;
+                    backdrop-filter: blur(5px);
+                    animation: fadeIn 0.3s ease;
+                `;
+
+                const modalContent = document.createElement('div');
+                modalContent.style.cssText = `
+                    background: var(--bg-secondary);
+                    border: 2px solid var(--border-color);
+                    border-radius: 20px;
+                    padding: 40px;
+                    max-width: 500px;
+                    width: 90%;
+                    box-shadow: var(--shadow-xl);
+                    text-align: center;
+                    animation: scaleIn 0.3s cubic-bezier(0.34, 1.56, 0.64, 1);
+                `;
+
+                modalContent.innerHTML = `
+                    <h2 style="color: var(--text-primary); margin-bottom: 20px; font-size: 1.8rem; font-weight: 700;">${title}</h2>
+                    <p style="color: var(--text-secondary); margin-bottom: 30px; line-height: 1.6; white-space: pre-line; font-size: 1.05rem;">${message}</p>
+                    <div style="display: flex; gap: 15px; justify-content: center;">
+                        <button id="confirm-cancel" style="
+                            background: var(--bg-tertiary);
+                            color: var(--text-primary);
+                            border: 1px solid var(--border-color);
+                            padding: 12px 28px;
+                            border-radius: 12px;
+                            cursor: pointer;
+                            font-size: 1rem;
+                            font-weight: 600;
+                            font-family: 'Inter', sans-serif;
+                            transition: all 0.3s ease;
+                        ">Cancelar</button>
+                        <button id="confirm-ok" style="
+                            background: linear-gradient(135deg, var(--error-color) 0%, #ff5252 100%);
+                            color: white;
+                            border: none;
+                            padding: 12px 28px;
+                            border-radius: 12px;
+                            cursor: pointer;
+                            font-size: 1rem;
+                            font-weight: 600;
+                            font-family: 'Inter', sans-serif;
+                            transition: all 0.3s ease;
+                        ">Sí, reprocesar</button>
+                    </div>
+                `;
+
+                modalOverlay.appendChild(modalContent);
+                document.body.appendChild(modalOverlay);
+
+                const cancelBtn = document.getElementById('confirm-cancel');
+                const okBtn = document.getElementById('confirm-ok');
+
+                const cleanup = () => {
+                    modalOverlay.style.animation = 'fadeOut 0.3s ease';
+                    setTimeout(() => {
+                        if (document.body.contains(modalOverlay)) {
+                            document.body.removeChild(modalOverlay);
+                        }
+                    }, 300);
+                };
+
+                cancelBtn.addEventListener('click', () => {
+                    cleanup();
+                    resolve(false);
+                });
+
+                okBtn.addEventListener('click', () => {
+                    cleanup();
+                    resolve(true);
+                });
+
+                cancelBtn.addEventListener('mouseenter', () => {
+                    cancelBtn.style.transform = 'translateY(-3px)';
+                    cancelBtn.style.boxShadow = 'var(--shadow-md)';
+                    cancelBtn.style.borderColor = 'var(--accent-color)';
+                });
+
+                cancelBtn.addEventListener('mouseleave', () => {
+                    cancelBtn.style.transform = 'translateY(0)';
+                    cancelBtn.style.boxShadow = 'none';
+                    cancelBtn.style.borderColor = 'var(--border-color)';
+                });
+
+                okBtn.addEventListener('mouseenter', () => {
+                    okBtn.style.transform = 'translateY(-3px)';
+                    okBtn.style.boxShadow = '0 8px 25px rgba(255, 107, 107, 0.5)';
+                });
+
+                okBtn.addEventListener('mouseleave', () => {
+                    okBtn.style.transform = 'translateY(0)';
+                    okBtn.style.boxShadow = 'none';
+                });
+
+                modalOverlay.addEventListener('click', (e) => {
+                    if (e.target === modalOverlay) {
+                        cleanup();
+                        resolve(false);
+                    }
+                });
+            });
+        }
+
+        // Añadir estilos de animación dinámicos
+        const style = document.createElement('style');
+        style.textContent = `
+            @keyframes scaleIn {
+                from {
+                    opacity: 0;
+                    transform: scale(0.8) translateY(-20px);
+                }
+                to {
+                    opacity: 1;
+                    transform: scale(1) translateY(0);
+                }
+            }
+
+            @keyframes fadeOut {
+                from {
+                    opacity: 1;
+                }
+                to {
+                    opacity: 0;
+                }
+            }
+        `;
+        document.head.appendChild(style);
+    </script>
+</body>
+</html>
\ No newline at end of file