CBCFacil v8.0 - Refactored with AMD GPU support

2026-01-09 13:05:46 -03:00
parent cb17136f21
commit b017504c52
54 changed files with 7251 additions and 3670 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,76 @@
 # =============================================================================
 # CBCFacil Configuration Template
 # =============================================================================
 # Copy this file to .env.secrets and fill in your actual values
 # NEVER commit .env.secrets to version control
 # =============================================================================
 # =============================================================================
 # Application Configuration
 # =============================================================================
 DEBUG=false
 LOG_LEVEL=INFO
 # =============================================================================
 # Nextcloud/WebDAV Configuration (Required for file sync)
 # =============================================================================
 NEXTCLOUD_URL=https://your-nextcloud.example.com/remote.php/webdav
 NEXTCLOUD_USER=your_username
 NEXTCLOUD_PASSWORD=your_secure_password
 # =============================================================================
 # AI Providers Configuration (Required for summarization)
 # =============================================================================
 # Option 1: Claude/Anthropic
 ANTHROPIC_AUTH_TOKEN=your_claude_api_token_here
 ZAI_BASE_URL=https://api.z.ai/api/anthropic
 # Option 2: Google Gemini
 GEMINI_API_KEY=your_gemini_api_key_here
 # =============================================================================
 # CLI Tools (Optional - for local AI tools)
 # =============================================================================
 CLAUDE_CLI_PATH=/path/to/claude  # or leave empty
 GEMINI_CLI_PATH=/path/to/gemini  # or leave empty
 # =============================================================================
 # Telegram Notifications (Optional)
 # =============================================================================
 TELEGRAM_TOKEN=your_telegram_bot_token
 TELEGRAM_CHAT_ID=your_telegram_chat_id
 # =============================================================================
 # Dashboard Configuration (Required for production)
 # =============================================================================
 # Generate a secure key with: python -c "import os; print(os.urandom(24).hex())"
 DASHBOARD_SECRET_KEY=generate_a_secure_random_key_here
 DASHBOARD_HOST=0.0.0.0
 DASHBOARD_PORT=5000
 # =============================================================================
 # GPU Configuration (Optional)
 # =============================================================================
 # Use specific GPU: CUDA_VISIBLE_DEVICES=0
 # Use all GPUs: CUDA_VISIBLE_DEVICES=all
 CUDA_VISIBLE_DEVICES=all
 PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
 # =============================================================================
 # Performance Tuning (Optional)
 # =============================================================================
 OMP_NUM_THREADS=4
 MKL_NUM_THREADS=4
 # =============================================================================
 # Processing Configuration (Optional)
 # =============================================================================
 POLL_INTERVAL=5
 HTTP_TIMEOUT=30
 WEBDAV_MAX_RETRIES=3
 DOWNLOAD_CHUNK_SIZE=65536
 # =============================================================================
 # Logging (Optional)
 # =============================================================================
 LOG_FILE=logs/app.log
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
-# Local environment files
+.env.secrets
 .env.local
 .env
 # Python cache
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -0,0 +1,501 @@
 # Arquitectura CBCFacil v9
 ## Resumen Ejecutivo
 CBCFacil es un servicio de IA modular para procesamiento de documentos (audio, PDF, texto) con integracion a Nextcloud. Este documento describe la arquitectura actual, patrones de diseno utilizados y guia de extension del sistema.
 ## Evucion Arquitectonica
 ### Problema Original (v7)
 El proyecto sufria de un archivo monolitico de 3167 lineas (`main.py`) que contenia todas las responsabilidades en un solo archivo.
 ### Solucion Actual (v9)
 Arquitectura modular con separacion clara de responsabilidades en capas independientes.
 ```
                              FLUJO DE DATOS
                              =============
  1. MONITOREO              2. DESCARGA            3. PROCESAMIENTO
  +---------------+         +---------------+       +---------------+
  |   Nextcloud   |-------->|  Downloads/   |------>|  Processors   |
  |   (WebDAV)    |         |   Local       |       |  (Audio/PDF)  |
  +---------------+         +---------------+       +---------------+
         |                                                  |
         v                                                  v
  +---------------+                                +---------------+
  |   WebDAV      |                                |   AI Services |
  |   Service     |                                |   (Claude/    |
  +---------------+                                |    Gemini)    |
                                                     +---------------+
                                                             |
                                                             v
  4. GENERACION            5. REGISTRO            6. NOTIFICACION
  +---------------+         +---------------+       +---------------+
  |   Document    |-------->|   Processed   |------>|   Telegram    |
  |   Generators  |         |   Registry    |       |   Service     |
  +---------------+         +---------------+       +---------------+
         |                                                  |
         v                                                  v
  +---------------+                                +---------------+
  |   Nextcloud   |                                |   Dashboard   |
  |   (Upload)    |                                |   (Flask)     |
  +---------------+                                +---------------+
 ```
 ## Estructura de Directorios
 ```
 cbcfacil/
 ├── main.py                      # Orquestador principal (149 lineas)
 ├── run.py                       # Script de ejecucion alternativo
 ├── config/                      # Configuracion centralizada
 │   ├── __init__.py              # Exports: settings, validate_environment
 │   ├── settings.py              # Configuracion desde variables de entorno
 │   └── validators.py            # Validadores de configuracion
 ├── core/                        # Nucleo compartido
 │   ├── __init__.py              # Exports: excepciones, Result
 │   ├── exceptions.py            # Excepciones personalizadas
 │   ├── result.py                # Patron Result/Error handling
 │   └── base_service.py          # Clase base BaseService
 ├── services/                    # Servicios externos
 │   ├── __init__.py              # Exports de servicios
 │   ├── webdav_service.py        # WebDAV/Nextcloud operaciones
 │   ├── vram_manager.py          # GPU memory management
 │   ├── telegram_service.py      # Telegram notificaciones
 │   ├── metrics_collector.py     # Metricas y estadisticas
 │   ├── ai_service.py            # Servicio AI unificado
 │   └── ai/                      # AI Providers
 │       ├── __init__.py
 │       ├── base_provider.py     # Interfaz BaseProvider
 │       ├── claude_provider.py   # Claude (Z.ai) implementation
 │       ├── gemini_provider.py   # Gemini API/CLI implementation
 │       └── provider_factory.py  # Factory para proveedores
 ├── processors/                  # Procesadores de archivos
 │   ├── __init__.py              # Exports de procesadores
 │   ├── base_processor.py        # Clase base FileProcessor
 │   ├── audio_processor.py       # Whisper transcription
 │   ├── pdf_processor.py         # PDF OCR processing
 │   └── text_processor.py        # Text summarization
 ├── document/                    # Generacion de documentos
 │   ├── __init__.py
 │   └── generators.py            # DOCX/PDF/Markdown generation
 ├── storage/                     # Persistencia y cache
 │   ├── __init__.py
 │   └── processed_registry.py    # Registro de archivos procesados
 ├── api/                         # API REST
 │   ├── __init__.py
 │   └── routes.py                # Flask routes y endpoints
 ├── tests/                       # Tests unitarios e integracion
 │   ├── conftest.py              # Fixtures pytest
 │   ├── test_config.py           # Tests de configuracion
 │   ├── test_storage.py          # Tests de almacenamiento
 │   ├── test_webdav.py           # Tests de WebDAV
 │   ├── test_processors.py       # Tests de procesadores
 │   ├── test_ai_providers.py     # Tests de AI providers
 │   ├── test_vram_manager.py     # Tests de VRAM manager
 │   └── test_main_integration.py # Tests de integracion main
 ├── docs/                        # Documentacion
 │   ├── archive/                 # Documentacion historica
 │   ├── SETUP.md                 # Guia de configuracion
 │   ├── TESTING.md               # Guia de testing
 │   └── DEPLOYMENT.md            # Guia de despliegue
 ├── requirements.txt             # Dependencias produccion
 ├── requirements-dev.txt         # Dependencias desarrollo
 ├── .env.example                 # Template de configuracion
 ├── .env.secrets                 # Configuracion local (no versionar)
 └── Dockerfile                   # Container Docker
 ```
 ## Componentes Principales
 ### 1. Servicios (services/)
 #### WebDAVService
 **Archivo**: `services/webdav_service.py`
 Responsabilidades:
 - Conexion y operaciones con Nextcloud via WebDAV
 - Download/upload de archivos
 - Listado y creacion de directorios remotos
 - Manejo de errores con reintentos configurables
 ```python
 class WebDAVService:
    def initialize(self) -> None: ...
    def list(self, remote_path: str) -> List[str]: ...
    def download(self, remote_path: str, local_path: Path) -> None: ...
    def upload(self, local_path: Path, remote_path: str) -> None: ...
    def mkdir(self, remote_path: str) -> None: ...
 ```
 #### VRAMManager
 **Archivo**: `services/vram_manager.py`
 Responsabilidades:
 - Gestion de memoria GPU
 - Carga/descarga de modelos (Whisper, OCR, TrOCR)
 - Limpieza automatica de VRAM ociosa
 - Fallback a CPU cuando GPU no disponible
 ```python
 class VRAMManager:
    def initialize(self) -> None: ...
    def cleanup(self) -> None: ...
    def should_cleanup(self) -> bool: ...
    def lazy_cleanup(self) -> None: ...
 ```
 #### TelegramService
 **Archivo**: `services/telegram_service.py`
 Responsabilidades:
 - Envio de notificaciones a Telegram
 - Throttling de errores para evitar spam
 - Notificaciones de inicio/parada del servicio
 ```python
 class TelegramService:
    def configure(self, token: str, chat_id: str) -> None: ...
    def send_message(self, message: str) -> None: ...
    def send_error_notification(self, context: str, error: str) -> None: ...
    def send_start_notification(self) -> None: ...
 ```
 ### 2. Procesadores (processors/)
 #### AudioProcessor
 **Archivo**: `processors/audio_processor.py`
 Responsabilidades:
 - Transcripcion de audio usando Whisper
 - Modelo: medium (optimizado para espanol)
 - Soporte GPU/CPU automatico
 - Post-procesamiento de texto transcrito
 #### PDFProcessor
 **Archivo**: `processors/pdf_processor.py`
 Responsabilidades:
 - Extraccion de texto de PDFs
 - OCR con EasyOCR + Tesseract + TrOCR en paralelo
 - Correccion de texto con IA
 - Generacion de documentos DOCX
 #### TextProcessor
 **Archivo**: `processors/text_processor.py`
 Responsabilidades:
 - Resumenes usando IA (Claude/Gemini)
 - Clasificacion de contenido
 - Generacion de quizzes opcionales
 ### 3. AI Services (services/ai/)
 #### ProviderFactory
 **Archivo**: `services/ai/provider_factory.py`
 Patron Factory para seleccion dinamica de proveedor de IA:
 ```python
 class ProviderFactory:
    def get_provider(self, provider_type: str = "auto") -> BaseProvider: ...
 ```
 Proveedores disponibles:
 - `claude`: Claude via Z.ai API
 - `gemini`: Google Gemini API
 - `gemini_cli`: Gemini CLI local
 - `auto`: Seleccion automatica basada en disponibilidad
 ### 4. Document Generation (document/)
 #### DocumentGenerator
 **Archivo**: `document/generators.py`
 Responsabilidades:
 - Creacion de documentos DOCX
 - Conversion a PDF
 - Formateo Markdown
 - Plantillas de documentos
 ### 5. Storage (storage/)
 #### ProcessedRegistry
 **Archivo**: `storage/processed_registry.py`
 Responsabilidades:
 - Registro persistente de archivos procesados
 - Cache en memoria con TTL
 - File locking para thread-safety
 ```python
 class ProcessedRegistry:
    def initialize(self) -> None: ...
    def load(self) -> Set[str]: ...
    def save(self, file_path: str) -> None: ...
    def is_processed(self, file_path: str) -> bool: ...
    def mark_for_reprocess(self, file_path: str) -> None: ...
 ```
 ### 6. API (api/)
 #### Flask Routes
 **Archivo**: `api/routes.py`
 Endpoints REST disponibles:
 - `GET /api/files` - Listado de archivos
 - `POST /api/reprocess` - Reprocesar archivo
 - `POST /api/mark-unprocessed` - Resetear estado
 - `GET /api/refresh` - Sincronizar con Nextcloud
 - `GET /health` - Health check
 ## Patrones de Diseno Utilizados
 ### 1. Repository Pattern
 ```python
 # storage/processed_registry.py
 class ProcessedRegistry:
    def save(self, file_path: str) -> None: ...
    def load(self) -> Set[str]: ...
    def is_processed(self, file_path: str) -> bool: ...
 ```
 ### 2. Factory Pattern
 ```python
 # services/ai/provider_factory.py
 class ProviderFactory:
    def get_provider(self, provider_type: str = "auto") -> BaseProvider: ...
 ```
 ### 3. Strategy Pattern
 ```python
 # services/vram_manager.py
 class VRAMManager:
    def cleanup(self) -> None: ...
    def should_cleanup(self) -> bool: ...
    def lazy_cleanup(self) -> None: ...
 ```
 ### 4. Service Layer Pattern
 ```python
 # services/webdav_service.py
 class WebDAVService:
    def list(self, remote_path: str) -> List[str]: ...
    def download(self, remote_path: str, local_path: Path) -> None: ...
    def upload(self, local_path: Path, remote_path: str) -> None: ...
 ```
 ### 5. Singleton Pattern
 Servicios implementados como singletons para compartir estado:
 ```python
 # services/webdav_service.py
 webdav_service = WebDAVService()
 # services/vram_manager.py
 vram_manager = VRAMManager()
 ```
 ### 6. Result Pattern
 ```python
 # core/result.py
 class Result:
    @staticmethod
    def success(value): ...
    @staticmethod
    def failure(error): ...
 ```
 ## Decisiones Arquitectonicas (ADR)
 ### ADR-001: Arquitectura Modular
 **Decision**: Separar el monolito en modulos independientes.
 **Contexto**: El archivo main.py de 3167 lineas era dificil de mantener y testar.
 **Decision**: Separar en capas: config/, core/, services/, processors/, document/, storage/, api/.
 **Consecuencias**:
 - Positivo: Codigo mas mantenible y testeable
 - Positivo: Reutilizacion de componentes
 - Negativo: Mayor complejidad inicial
 ### ADR-002: Configuracion Centralizada
 **Decision**: Usar clase Settings con variables de entorno.
 **Contexto**: Credenciales hardcodeadas representan riesgo de seguridad.
 **Decision**: Todas las configuraciones via variables de entorno con .env.secrets.
 **Consecuencias**:
 - Positivo: Seguridad mejorada
 - Positivo: Facil despliegue en diferentes entornos
 - Negativo: Requiere documentacion de variables
 ### ADR-003: GPU-First con CPU Fallback
 **Decision**: Optimizar para GPU pero soportar CPU.
 **Contexto**: No todos los usuarios tienen GPU disponible.
 **Decision**: VRAMManager con lazy loading y cleanup automatico.
 **Consecuencias**:
 - Positivo: Performance optimo en GPU
 - Positivo: Funciona sin GPU
 - Negativo: Complejidad adicional en gestion de memoria
 ### ADR-004: Factory para AI Providers
 **Decision**: Abstraer proveedores de IA detras de interfaz comun.
 **Contexto**: Multiples proveedores (Claude, Gemini) con diferentes APIs.
 **Decision**: BaseProvider con implementaciones concretas y ProviderFactory.
 **Consecuencias**:
 - Positivo: Facilidad para agregar nuevos proveedores
 - Positivo: Fallback entre proveedores
 - Negativo: Sobrecarga de abstraccion
 ## Guia de Extension del Sistema
 ### Agregar Nuevo Procesador
 1. Crear archivo en `processors/`:
 ```python
 from processors.base_processor import FileProcessor
 class NuevoProcessor(FileProcessor):
    def process(self, file_path: str) -> None:
        # Implementar procesamiento
        pass
 ```
 2. Registrar en `processors/__init__.py`:
 ```python
 from processors.nuevo_processor import NuevoProcessor
 __all__ = ['NuevoProcessor', ...]
 ```
 3. Integrar en `main.py`:
 ```python
 from processors.nuevo_processor import NuevoProcessor
 nuevo_processor = NuevoProcessor()
 ```
 ### Agregar Nuevo AI Provider
 1. Crear clase en `services/ai/`:
 ```python
 from services.ai.base_provider import BaseProvider
 class NuevoProvider(BaseProvider):
    def summarize(self, text: str) -> str:
        # Implementar
        pass
 ```
 2. Registrar en `provider_factory.py`:
 ```python
 PROVIDERS = {
    'nuevo': NuevoProvider,
    ...
 }
 ```
 3. Usar:
 ```python
 provider = factory.get_provider('nuevo')
 ```
 ### Agregar Nuevo Servicio
 1. Crear archivo en `services/`:
 ```python
 from core.base_service import BaseService
 class NuevoService(BaseService):
    def initialize(self) -> None:
        pass
 nuevo_service = NuevoService()
 ```
 2. Inicializar en `main.py`:
 ```python
 from services.nuevo_service import nuevo_service
 nuevo_service.initialize()
 ```
 ### Agregar Nuevo Endpoint API
 1. Editar `api/routes.py`:
 ```python
@app.route('/api/nuevo', methods=['GET'])
 def nuevo_endpoint():
    return {'status': 'ok'}, 200
 ```
 ## Configuracion Detallada
 ### Variables de Entorno Principales
 | Variable | Requerido | Default | Descripcion |
 |----------|-----------|---------|-------------|
 | NEXTCLOUD_URL | Si | - | URL de Nextcloud WebDAV |
 | NEXTCLOUD_USER | Si | - | Usuario Nextcloud |
 | NEXTCLOUD_PASSWORD | Si | - | Contrasena Nextcloud |
 | ANTHROPIC_AUTH_TOKEN | No | - | Token Claude/Z.ai |
 | GEMINI_API_KEY | No | - | API Key Gemini |
 | TELEGRAM_TOKEN | No | - | Token Bot Telegram |
 | TELEGRAM_CHAT_ID | No | - | Chat ID Telegram |
 | CUDA_VISIBLE_DEVICES | No | "all" | GPU a usar |
 | POLL_INTERVAL | No | 5 | Segundos entre polls |
 | LOG_LEVEL | No | "INFO" | Nivel de logging |
 ## Metricas y Benchmarks
 | Metrica | Valor |
 |---------|-------|
 | Lineas main.py | 149 (antes 3167) |
 | Modulos independientes | 8+ |
 | Cobertura tests | ~60%+ |
 | Tiempo inicio | 5-10s |
 | Transcripcion Whisper | ~1x tiempo audio (GPU) |
 | OCR PDF | 0.5-2s/pagina |
 ## Beneficios de la Arquitectura
 1. **Mantenibilidad**: Cada responsabilidad en su propio modulo
 2. **Testabilidad**: Servicios independientes y testeables
 3. **Escalabilidad**: Facil agregar nuevos procesadores/servicios
 4. **Reutilizacion**: Componentes desacoplados
 5. **Legibilidad**: Codigo organizado y documentado
 6. **Seguridad**: Configuracion centralizada sin hardcoding
 ## Licencia
 MIT License - Ver LICENSE para detalles.
--- a/README.md
+++ b/README.md
@@ -1,129 +1,206 @@
-# Nextcloud AI Service v8
+# CBCFacil v9
-Servicio unificado que escucha automáticamente tu Nextcloud, descarga audios y PDFs, los procesa con Whisper + 3 modelos de IA y publica resúmenes enriquecidos junto con un dashboard en tiempo real.
+Servicio de IA unificado para procesamiento inteligente de documentos (audio, PDF, texto) con integracion a Nextcloud.
 ## Descripcion General
 CBCFacil monitoriza automaticamente tu servidor Nextcloud, descarga archivos multimedia, los transcribe/resume utilizando modelos de IA (Whisper, Claude, Gemini) y genera documentos formateados para su descarga.
 ## Arquitectura
 ```
-┌─────────────┐    ┌─────────────┐    ┌──────────────┐    ┌──────────────┐
+----------------+     +--------------+     +-----------------+     +------------------+
-│  Nextcloud  │───▶│  Whisper    │───▶│ Claude (Z.ai)│───▶│ Gemini (CLI  │
+|   Nextcloud    |---->|  Procesador  |---->|  IA Services    |---->|  Dashboard/API   |
-│ Audios/PDFs │    │ Transcribe  │    │ Bullet +     │    │ + API)        │
+|   (WebDAV)     |     |  (Audio/PDF) |     |  (Claude/Gemini)|     |  (Flask)         |
-└─────────────┘    └─────────────┘    │ Resumenes    │    │ Formato final │
+----------------+     +--------------+     +-----------------+     +------------------+
-                                      └──────┬───────┘    └──────┬───────┘
+                              |                      |                       |
-                                             │                 │
+                              v                      v                       v
-                                             ▼                 ▼
+                        +------------+        +------------+         +------------+
-                                      Markdown / DOCX / PDF   Dashboard
+                        |   Whisper  |        |   Gemini   |         |  Telegram  |
                        |   (GPU)    |        |   API/CLI  |         |  (notify)  |
                        +------------+        +------------+         +------------+
 ```
-## Principales capacidades
+## Estructura del Proyecto
 - **Pipeline único**: descarga desde Nextcloud vía WebDAV, transcribe con Whisper, resume con Claude + Gemini y clasifica automáticamente.
 - **Dashboard integrado**: panel Flask (http://localhost:5000) para ver archivos, reprocesar o limpiar estados con un click.
 - **Diseño GPU-first**: Docker + CUDA 12.1 con PyTorch optimizado; limpieza agresiva de VRAM cuando los modelos están ociosos.
 - **Alertas opcionales**: soporte Telegram y WebDAV retries para operaciones largas.
 - **Código limpio**: sin Ollama ni servicios secundarios; sólo lo esencial para escalar y mantener.
 ## Estructura mínima
 ```
-cbc/
+cbcfacil/
-├─ Dockerfile
+├── main.py                      # Punto de entrada del servicio
-├─ docker-compose.yml
+├── run.py                       # Script de ejecucion alternativo
-├─ main.py              # Servicio principal + loop de monitoreo
+├── config/                      # Configuracion centralizada
-├─ dashboard.py         # Flask dashboard reutilizando la lógica de main.py
+│   ├── settings.py              # Variables de entorno y settings
-├─ templates/index.html # UI del dashboard
+│   └── validators.py            # Validadores de configuracion
-├─ requirements.txt
+├── core/                        # Nucleo del sistema
-└─ README.md
+│   ├── exceptions.py            # Excepciones personalizadas
 │   ├── result.py                # Patron Result
 │   └── base_service.py          # Clase base para servicios
 ├── services/                    # Servicios externos
 │   ├── webdav_service.py        # Operacones WebDAV/Nextcloud
 │   ├── vram_manager.py          # Gestion de memoria GPU
 │   ├── telegram_service.py      # Notificaciones Telegram
 │   └── ai/                      # Proveedores de IA
 │       ├── base_provider.py     # Interfaz base
 │       ├── claude_provider.py   # Claude (Z.ai)
 │       ├── gemini_provider.py   # Gemini API/CLI
 │       └── provider_factory.py  # Factory de proveedores
 ├── processors/                  # Procesadores de archivos
 │   ├── audio_processor.py       # Transcripcion Whisper
 │   ├── pdf_processor.py         # OCR y extraccion PDF
 │   └── text_processor.py        # Resumenes y clasificacion
 ├── document/                    # Generacion de documentos
 │   └── generators.py            # DOCX, PDF, Markdown
 ├── storage/                     # Persistencia
 │   └── processed_registry.py    # Registro de archivos procesados
 ├── api/                         # API REST
 │   └── routes.py                # Endpoints Flask
 ├── tests/                       # Tests unitarios e integracion
 ├── docs/                        # Documentacion
 │   ├── archive/                 # Documentacion historica
 │   ├── SETUP.md                 # Guia de configuracion
 │   ├── TESTING.md               # Guia de testing
 │   └── DEPLOYMENT.md            # Guia de despliegue
 ├── requirements.txt             # Dependencias Python
 ├── requirements-dev.txt         # Dependencias desarrollo
 ├── .env.secrets                 # Configuracion local (no versionar)
 └── Dockerfile                   # Container Docker
 ```
 ## Requisitos
 - NVIDIA GPU con drivers CUDA 12.1+ y `nvidia-container-toolkit` si usas Docker.
 - Python 3.10+ (el Dockerfile usa 3.10) y Node.js ≥ 20 para las CLI externas.
 - Claves activas para:
  - **Z.ai (Claude CLI)** → `ANTHROPIC_AUTH_TOKEN` y `ANTHROPIC_BASE_URL`.
  - **Google Gemini** (CLI o API) → `GEMINI_API_KEY`.
  - **DeepInfra GPT-OSS-120B** → `DEEPINFRA_API_KEY`.
 - Servidor Nextcloud accesible por WebDAV (usuario, contraseña y URL remota).
-### Instalación de las CLIs externas
+- Python 3.10+
 - NVIDIA GPU con drivers CUDA 12.1+ (opcional, soporta CPU fallback)
 - Nextcloud accesible via WebDAV
 - Claves API para servicios de IA (opcional)
 ## Instalacion Rapida
 ```bash
-npm install -g @anthropic-ai/claude-code
+# Clonar y entrar al directorio
-npm install -g @google/gemini-cli
+git clone <repo_url>
-```
+cd cbcfacil
 Recuerda exportar las mismas variables de entorno (`ANTHROPIC_*`, `GEMINI_API_KEY`) para que las CLIs compartan credenciales con el servicio.
-## Configuración
+# Crear entorno virtual
 1. Copia el ejemplo `.env` (no versionado) y completa:
   ```env
   NEXTCLOUD_URL=http://tu-servidor:8080/remote.php/webdav
   NEXTCLOUD_USER=...
   NEXTCLOUD_PASS=...
   GEMINI_API_KEY=...
   DEEPINFRA_API_KEY=...
   ANTHROPIC_BASE_URL=https://api.z.ai/api/anthropic
   ANTHROPIC_AUTH_TOKEN=...
   TELEGRAM_TOKEN=... (opcional)
   TELEGRAM_CHAT_ID=... (opcional)
   ```
 2. Crea los directorios utilizados por los volúmenes (si no existen):
   ```bash
   mkdir -p downloads resumenes_docx
   ```
 ## Ejecución local (sin Docker)
 ```bash
 python3 -m venv .venv
 source .venv/bin/activate
 pip install --upgrade pip
 pip install -r requirements.txt
 python3 main.py
 ```
 - El dashboard se expone en `http://localhost:5000`.
 - Los registros viven en `logs/service.log`.
-## Ejecución con Docker
+# Instalar dependencias
 pip install -r requirements.txt
 # Configurar variables de entorno
 cp .env.example .env.secrets
 # Editar .env.secrets con tus credenciales
 # Ejecutar el servicio
 python main.py
 ```
 ## Configuracion
 ### Variables de Entorno (.env.secrets)
 ```bash
 # Nextcloud/WebDAV (requerido para sincronizacion)
 NEXTCLOUD_URL=https://tu-nextcloud.com/remote.php/webdav
 NEXTCLOUD_USER=usuario
 NEXTCLOUD_PASSWORD=contrasena
 # AI Providers (requerido para resúmenes)
 ANTHROPIC_AUTH_TOKEN=sk-ant-...
 GEMINI_API_KEY=AIza...
 # Telegram (opcional)
 TELEGRAM_TOKEN=bot_token
 TELEGRAM_CHAT_ID=chat_id
 # GPU (opcional)
 CUDA_VISIBLE_DEVICES=0
 ```
 Ver `.env.example` para todas las variables disponibles.
 ## Uso
 ### Servicio Completo
 ```bash
 # Ejecutar con monitoring y dashboard
 python main.py
 ```
 ### Comandos CLI
 ```bash
 # Transcribir audio
 python main.py whisper <archivo_audio> <output_dir>
 # Procesar PDF
 python main.py pdf <archivo_pdf> <output_dir>
 ```
 ### Dashboard
 El dashboard se expone en `http://localhost:5000` e incluye:
 - Vista de archivos procesados/pendientes
 - API REST para integraciones
 - Endpoints de salud
 ## Testing
 ```bash
 # Ejecutar todos los tests
 pytest tests/
 # Tests con coverage
 pytest tests/ --cov=cbcfacil --cov-report=term-missing
 # Tests especificos
 pytest tests/test_config.py -v
 pytest tests/test_storage.py -v
 pytest tests/test_processors.py -v
 ```
 Ver `docs/TESTING.md` para guia completa.
 ## Despliegue
 ### Docker
 ```bash
 docker compose up -d --build
-docker compose logs -f app
+docker logs -f cbcfacil
 ```
 El único servicio (`nextcloud_ai_app`) ya expone el dashboard y comparte `downloads/` y `resumenes_docx/` como volúmenes.
-## Flujo del pipeline
+### Produccion
 1. **Monitoreo**: cada `POLL_INTERVAL` segundos se listan nuevas entradas en Nextcloud (`Audios`, `Pdf`, `Textos`...).
 2. **Descarga y preproceso**: los archivos se guardan en `downloads/` con normalización y sanitización de nombres.
 3. **Transcripción (Whisper)**: modelo `medium` optimizado para español (soporte GPU).
 4. **Resúmenes colaborativos**:
   - Claude CLI genera bullet points por lotes y resumen integral.
   - Gemini CLI/API aplica formato final y estilo consistente.
 5. **Clasificación y renombrado**: se detectan temáticas (Historia, Contabilidad, Gobierno, Otras Clases) + topics para nombrar archivos inteligentemente.
 6. **Entrega**: se generan `.md`, `.docx`, `.pdf` y se suben de nuevo a Nextcloud.
 7. **Dashboard**: refleja estado (procesados/pendientes), permite reprocesar o resetear registros, y sirve descargas locales.
-## Dashboard en detalle
+Ver `docs/DEPLOYMENT.md` para guia completa de despliegue.
 - **Vista general**: tarjetas con totales, filtros por origen (local/WebDAV) y buscador instantáneo.
 - **Acciones rápidas**:
  - `Procesar`: envía el archivo nuevamente al pipeline.
  - `Resetear`: elimina la marca de procesado para forzar un reprocesamiento automático.
  - `Descargar`: enlaces directos a TXT/MD/DOCX/PDF disponibles.
 - **API**:
  - `GET /api/files` → listado.
  - `POST /api/reprocess` → reprocesa.
  - `POST /api/mark-unprocessed` → limpia estado.
  - `GET /api/refresh` → sincroniza.
  - `GET /health` → healthcheck.
-## Buenas prácticas operativas
+## Metricas de Performance
 - **CUDA libre**: el servicio libera VRAM si los modelos quedan ociosos; revisa el log para ver las limpiezas agresivas.
 -.**Telegram**: usa `ERROR_THROTTLE_SECONDS` para evitar spam en errores repetidos.
 - **Respaldo**: `processed_files.txt` guarda el historial de todo lo procesado; respáldalo si cambias de servidor.
 - **Extensibilidad**: si necesitas nuevos formatos o pasos, agrega funciones en `main.py` reutilizando `ThreadPoolExecutor` y los helpers existentes.
-## Troubleshooting rápido
+| Componente | Metrica |
-| Problema | Fix |
+|------------|---------|
-| --- | --- |
+| main.py | 149 lineas (antes 3167) |
-| Claude o Gemini devuelven error de permisos | Exporta `CLAUDE_DANGEROUSLY_SKIP_PERMISSIONS=1` (ya se envía al contenedor). |
+| Cobertura tests | ~60%+ |
-| No aparecen archivos en el dashboard | Verifica credenciales Nextcloud y `logs/service.log`. |
+| Tiempo inicio | ~5-10s |
-| Tiempo de espera alto en WebDAV | Ajusta `HTTP_TIMEOUT` y `WEBDAV_MAX_RETRIES` en `.env`. |
+| Transcripcion Whisper | ~1x tiempo audio (GPU) |
-| GPU ocupada constantemente | Reduce `MODEL_TIMEOUT_SECONDS` o baja el tamaño del modelo Whisper. |
+| Resumen Claude | ~2-5s por documento |
 | OCR PDF | Depende de paginas |
---
+## Contribucion
-La meta de esta versión es mantener el proyecto ágil y escalable: menos archivos, menos servicios y una sola fuente de verdad. Si necesitas habilitar nuevas integraciones (por ejemplo, más modelos o destinos), añade módulos dedicados dentro de `main.py` o expórtalos a un paquete propio manteniendo este núcleo ligero. ¡Felices resúmenes! 💡
+1. Fork el repositorio
 2. Crear branch feature (`git checkout -b feature/nueva-funcionalidad`)
 3. Commit cambios (`git commit -am 'Add nueva funcionalidad'`)
 4. Push al branch (`git push origin feature/nueva-funcionalidad`)
 5. Crear Pull Request
 ## Documentacion
 - `docs/SETUP.md` - Guia de configuracion inicial
 - `docs/TESTING.md` - Guia de testing
 - `docs/DEPLOYMENT.md` - Guia de despliegue
 - `ARCHITECTURE.md` - Documentacion arquitectonica
 - `CHANGELOG.md` - Historial de cambios
 ## Licencia
 MIT License - Ver LICENSE para detalles.
--- a/amd/ROCM_SETUP.md
+++ b/amd/ROCM_SETUP.md
@@ -0,0 +1,187 @@
 # 🚀 ROCm Setup para AMD GPU
 ## ✅ Estado Actual del Sistema
 **GPU**: AMD Radeon RX 6800 XT
 **ROCm**: 6.0
 **PyTorch**: 2.5.0+rocm6.0
 ## 📋 Comandos Esenciales
 ### Verificar GPU
 ```bash
 # Información básica de la GPU
 lspci | grep -i vga
 # Estado en tiempo real de ROCm
 rocm-smi
 # Información detallada del sistema
 rocminfo
 ```
 ### Variables de Entorno Críticas
 ```bash
 # CRÍTICO para gfx1030 (RX 6000 series)
 export HSA_OVERRIDE_GFX_VERSION=10.3.0
 # Agregar al ~/.bashrc o ~/.zshrc
 echo 'export HSA_OVERRIDE_GFX_VERSION=10.3.0' >> ~/.bashrc
 source ~/.bashrc
 ```
 ### Verificar PyTorch con ROCm
 ```bash
 # Test básico de PyTorch
 python3 -c "
 import torch
 print(f'PyTorch: {torch.__version__}')
 print(f'ROCm disponible: {torch.cuda.is_available()}')
 print(f'Dispositivos: {torch.cuda.device_count()}')
 if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    print(f'Memoria: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB')
 "
 # Benchmark rápido
 python3 -c "
 import torch, time
 a = torch.randn(4096, 4096, device='cuda')
 b = torch.randn(4096, 4096, device='cuda')
 start = time.time()
 c = torch.matmul(a, b)
 torch.cuda.synchronize()
 print(f'GPU time: {time.time() - start:.4f}s')
 "
 ```
 ## 🧪 Script de Stress Test
 ### Ejecutar Stress Test (2 minutos)
 ```bash
 python3 /home/ren/gpu/rocm_stress_test.py
 ```
 ## 🔧 Troubleshooting
 ### Si ROCm no detecta la GPU:
 ```bash
 # Verificar módulos del kernel
 lsmod | grep amdgpu
 lsmod | grep kfd
 # Recargar módulos
 sudo modprobe amdgpu
 sudo modprobe kfd
 # Verificar logs
 dmesg | grep amdgpu
 ```
 ### Si PyTorch no encuentra ROCm:
 ```bash
 # Reinstalar PyTorch con ROCm
 pip uninstall torch torchvision torchaudio
 pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
 ```
 ### Si hay errores de memoria:
 ```bash
 # Limpiar cache de GPU
 python3 -c "import torch; torch.cuda.empty_cache()"
 # Verificar uso de memoria
 rocm-smi --meminfo
 ```
 ## 📊 Monitoreo Continuo
 ### Terminal 1 - Monitor en tiempo real
 ```bash
 watch -n 1 rocm-smi
 ```
 ### Terminal 2 - Información detallada
 ```bash
 rocm-smi --showtemp --showmeminfo vram --showmeminfo all
 ```
 ## 💡 Ejemplos de Uso
 ### Cargar modelo en GPU
 ```python
 import torch
 from transformers import AutoModel
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Usando dispositivo: {device}")
 model = AutoModel.from_pretrained("bert-base-uncased")
 model = model.to(device)
 # Los tensores ahora se procesarán en la GPU
 inputs = torch.tensor([1, 2, 3]).to(device)
 ```
 ### Entrenamiento en GPU
 ```python
 import torch
 import torch.nn as nn
 device = torch.device("cuda")
 model = tu_modelo().to(device)
 criterion = nn.CrossEntropyLoss().to(device)
 optimizer = torch.optim.Adam(model.parameters())
 for epoch in range(epochs):
    for batch in dataloader:
        inputs, labels = batch
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 ```
 ## 🎯 Optimizaciones
 ### Para mejor rendimiento:
 ```python
 # Usar mixed precision (más rápido en RDNA2)
 from torch.cuda.amp import autocast, GradScaler
 scaler = GradScaler()
 with autocast():
    output = model(inputs)
    loss = criterion(output, targets)
 scaler.scale(loss).backward()
 scaler.step(optimizer)
 scaler.update()
 ```
 ## 📈 Comandos Útiles
 ```bash
 # Ver versión de ROCm
 rocm-smi --version
 # Verificar HSA
 rocminfo
 # Test de compatibilidad
 python3 /opt/rocm/bin/rocprofiler-compute-test.py
 # Verificar BLAS
 python3 -c "import torch; print(torch.backends.mps.is_available())"  # False en AMD
 ```
 ## ⚡ Performance Tips
 1. **Siempre mueve datos a GPU**: `.to(device)`
 2. **Usa batch sizes grandes**: Aprovecha los 16GB de VRAM
 3. **Mixed precision**: Acelera el entrenamiento 1.5-2x
 4. **DataLoader con num_workers**: Carga datos en paralelo
 5. **torch.cuda.synchronize()**: Para benchmarks precisos
--- a/amd/rocm_stress_test.py
+++ b/amd/rocm_stress_test.py
@@ -0,0 +1,255 @@
 #!/usr/bin/env python3
 """
 🔥 ROCm Stress Test - Prueba de resistencia para GPU AMD
 Ejecuta operaciones intensivas durante 2 minutos y monitorea métricas
 """
 import torch
 import time
 import sys
 from datetime import datetime
 def print_header():
    print("=" * 70)
    print("🔥 ROCm STRESS TEST - AMD GPU STRESS TEST")
    print("=" * 70)
    print(f"Inicio: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'}")
    print(f"VRAM Total: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    print("=" * 70)
    print()
 def get_gpu_stats():
    """Obtener estadísticas actuales de la GPU"""
    if not torch.cuda.is_available():
        return None
    props = torch.cuda.get_device_properties(0)
    mem_allocated = torch.cuda.memory_allocated(0) / 1024**3
    mem_reserved = torch.cuda.memory_reserved(0) / 1024**3
    mem_total = props.total_memory / 1024**3
    return {
        'mem_allocated': mem_allocated,
        'mem_reserved': mem_reserved,
        'mem_total': mem_total,
        'mem_percent': (mem_allocated / mem_total) * 100
    }
 def stress_test(duration_seconds=120):
    """Ejecutar stress test durante duración especificada"""
    print(f"🧪 Iniciando stress test por {duration_seconds} segundos...")
    print(f"   Presiona Ctrl+C para detener en cualquier momento\n")
    if not torch.cuda.is_available():
        print("❌ ERROR: CUDA/ROCm no está disponible!")
        sys.exit(1)
    device = torch.device("cuda")
    torch.cuda.set_per_process_memory_fraction(0.85, 0)  # Usar 85% de VRAM
    # Inicializar
    results = {
        'matmul_times': [],
        'conv_times': [],
        'reLU_times': [],
        'softmax_times': [],
        'iterations': 0,
        'errors': 0
    }
    start_time = time.time()
    iteration = 0
    last_print = 0
    try:
        while time.time() - start_time < duration_seconds:
            iteration += 1
            results['iterations'] = iteration
            try:
                # Operaciones intensivas de ML
                # 1. Matriz multiplicación (varía el tamaño)
                if iteration % 5 == 0:
                    size = 8192  # Matriz grande ocasionalmente
                elif iteration % 3 == 0:
                    size = 4096
                else:
                    size = 2048
                a = torch.randn(size, size, device=device, dtype=torch.float16)
                b = torch.randn(size, size, device=device, dtype=torch.float16)
                torch.cuda.synchronize()
                t0 = time.time()
                c = torch.matmul(a, b)
                torch.cuda.synchronize()
                matmul_time = time.time() - t0
                results['matmul_times'].append(matmul_time)
                del a, b, c
                # 2. Convolución 3D
                x = torch.randn(32, 128, 64, 64, 64, device=device)
                conv = torch.nn.Conv3d(128, 256, kernel_size=3, padding=1).to(device)
                torch.cuda.synchronize()
                t0 = time.time()
                out = conv(x)
                torch.cuda.synchronize()
                conv_time = time.time() - t0
                results['conv_times'].append(conv_time)
                # 3. ReLU + BatchNorm
                bn = torch.nn.BatchNorm2d(256).to(device)
                torch.cuda.synchronize()
                t0 = time.time()
                out = bn(torch.relu(out))
                torch.cuda.synchronize()
                relu_time = time.time() - t0
                results['reLU_times'].append(relu_time)
                del x, out
                # 4. Softmax grande
                x = torch.randn(2048, 2048, device=device)
                torch.cuda.synchronize()
                t0 = time.time()
                softmax_out = torch.softmax(x, dim=-1)
                torch.cuda.synchronize()
                softmax_time = time.time() - t0
                results['softmax_times'].append(softmax_time)
                del softmax_out
            except Exception as e:
                results['errors'] += 1
                if results['errors'] < 5:  # Solo mostrar primeros errores
                    print(f"\n⚠️  Error en iteración {iteration}: {str(e)[:50]}")
            # Progress cada segundo
            elapsed = time.time() - start_time
            if elapsed - last_print >= 1.0 or elapsed >= duration_seconds:
                last_print = elapsed
                progress = (elapsed / duration_seconds) * 100
                # Stats
                stats = get_gpu_stats()
                matmul_avg = sum(results['matmul_times'][-10:]) / len(results['matmul_times'][-10:]) if results['matmul_times'] else 0
                print(f"\r⏱️  Iteración {iteration:4d} | "
                      f"Tiempo: {elapsed:6.1f}s/{duration_seconds}s [{progress:5.1f}%] | "
                      f"VRAM: {stats['mem_allocated']:5.2f}GB/{stats['mem_total']:.2f}GB ({stats['mem_percent']:5.1f}%) | "
                      f"MatMul avg: {matmul_avg*1000:6.2f}ms | "
                      f"Iter/s: {iteration/elapsed:5.2f}",
                      end='', flush=True)
            # Pequeña pausa para evitar sobrecarga
            time.sleep(0.05)
    except KeyboardInterrupt:
        print("\n\n⏹️  Interrumpido por el usuario")
        elapsed = time.time() - start_time
        print(f"   Duración real: {elapsed:.1f} segundos")
        print(f"   Iteraciones: {iteration}")
    print("\n")
    return results
 def print_summary(results, duration):
    """Imprimir resumen de resultados"""
    print("\n" + "=" * 70)
    print("📊 RESUMEN DEL STRESS TEST")
    print("=" * 70)
    print(f"Duración total: {duration:.2f} segundos")
    print(f"Iteraciones completadas: {results['iterations']}")
    print(f"Errores: {results['errors']}")
    print()
    if results['matmul_times']:
        matmul_avg = sum(results['matmul_times']) / len(results['matmul_times'])
        matmul_min = min(results['matmul_times'])
        matmul_max = max(results['matmul_times'])
        matmul_last10_avg = sum(results['matmul_times'][-10:]) / len(results['matmul_times'][-10:])
        print(f"🔢 MATRIZ MULTIPLICACIÓN (2048-8192)")
        print(f"  Promedio:     {matmul_avg*1000:8.2f} ms")
        print(f"  Últimas 10:   {matmul_last10_avg*1000:8.2f} ms")
        print(f"  Mínimo:       {matmul_min*1000:8.2f} ms")
        print(f"  Máximo:       {matmul_max*1000:8.2f} ms")
        print()
    if results['conv_times']:
        conv_avg = sum(results['conv_times']) / len(results['conv_times'])
        conv_min = min(results['conv_times'])
        conv_max = max(results['conv_times'])
        print(f"🧮 CONVOLUCIÓN 3D (32x128x64³)")
        print(f"  Promedio:     {conv_avg*1000:8.2f} ms")
        print(f"  Mínimo:       {conv_min*1000:8.2f} ms")
        print(f"  Máximo:       {conv_max*1000:8.2f} ms")
        print()
    if results['reLU_times']:
        relu_avg = sum(results['reLU_times']) / len(results['reLU_times'])
        relu_min = min(results['reLU_times'])
        relu_max = max(results['reLU_times'])
        print(f"⚡ ReLU + BatchNorm")
        print(f"  Promedio:     {relu_avg*1000:8.4f} ms")
        print(f"  Mínimo:       {relu_min*1000:8.4f} ms")
        print(f"  Máximo:       {relu_max*1000:8.4f} ms")
        print()
    if results['softmax_times']:
        softmax_avg = sum(results['softmax_times']) / len(results['softmax_times'])
        softmax_min = min(results['softmax_times'])
        softmax_max = max(results['softmax_times'])
        print(f"🔥 Softmax (2048x2048)")
        print(f"  Promedio:     {softmax_avg*1000:8.2f} ms")
        print(f"  Mínimo:       {softmax_min*1000:8.2f} ms")
        print(f"  Máximo:       {softmax_max*1000:8.2f} ms")
        print()
    # Performance score
    total_ops = results['iterations']
    if total_ops > 0 and duration > 0:
        print("=" * 70)
        print(f"✅ TEST COMPLETADO")
        print(f"   📈 {total_ops} operaciones en {duration:.1f} segundos")
        print(f"   ⚡ {total_ops/duration:.2f} operaciones/segundo")
        print(f"   💾 Uso de VRAM: Hasta ~85% (configurado)")
        print("=" * 70)
        print()
        # Calcular GFLOPS aproximado para matmul
        if results['matmul_times']:
            # GFLOPS = 2 * n^3 / (time * 10^9) para matriz n x n
            avg_matmul_ms = (sum(results['matmul_times']) / len(results['matmul_times'])) * 1000
            avg_n = sum([2048 if i%3==0 else 4096 if i%5==0 else 2048 for i in range(len(results['matmul_times']))]) / len(results['matmul_times'])
            gflops = (2 * (avg_n**3)) / (avg_matmul_ms / 1000) / 1e9
            print(f"🚀 RENDIMIENTO ESTIMADO")
            print(f"   ~{gflops:.2f} GFLOPS (matriz multiplicación)")
 def main():
    print_header()
    # Verificar ROCm
    if not torch.cuda.is_available():
        print("❌ ERROR: ROCm/CUDA no está disponible!")
        print("   Ejecuta: export HSA_OVERRIDE_GFX_VERSION=10.3.0")
        sys.exit(1)
    # Ejecutar stress test
    duration = 120  # 2 minutos
    start = time.time()
    results = stress_test(duration)
    actual_duration = time.time() - start
    # Mostrar resumen
    print_summary(results, actual_duration)
    # Limpiar
    torch.cuda.empty_cache()
    print("🧹 Cache de GPU limpiado")
    print("\n✅ Stress test finalizado")
 if __name__ == "__main__":
    main()
--- a/api/init.py
+++ b/api/init.py
@@ -0,0 +1,7 @@
 """
 API package for CBCFacil
 """
 from .routes import create_app
 __all__ = ['create_app']
--- a/api/routes.py
+++ b/api/routes.py
@@ -0,0 +1,280 @@
 """
 Flask API routes for CBCFacil dashboard
 """
 import os
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, Any, List
 from flask import Flask, render_template, request, jsonify, send_from_directory
 from flask_cors import CORS
 from ..config import settings
 from ..storage import processed_registry
 from ..services.webdav_service import webdav_service
 from ..services import vram_manager
 def create_app() -> Flask:
    """Create and configure Flask application"""
    app = Flask(__name__)
    CORS(app)
    # Configure app
    app.config['SECRET_KEY'] = settings.DASHBOARD_SECRET_KEY or os.urandom(24)
    app.config['DOWNLOADS_FOLDER'] = str(settings.LOCAL_DOWNLOADS_PATH)
    @app.route('/')
    def index():
        """Dashboard home page"""
        return render_template('index.html')
    @app.route('/api/files')
    def get_files():
        """Get list of audio files"""
        try:
            files = get_audio_files()
            return jsonify({
                'success': True,
                'files': files,
                'total': len(files),
                'processed': sum(1 for f in files if f['processed']),
                'pending': sum(1 for f in files if not f['processed'])
            })
        except Exception as e:
            app.logger.error(f"Error getting files: {e}")
            return jsonify({
                'success': False,
                'message': f"Error: {str(e)}"
            }), 500
    @app.route('/api/reprocess', methods=['POST'])
    def reprocess_file():
        """Reprocess a file"""
        try:
            data = request.get_json()
            file_path = data.get('path')
            source = data.get('source', 'local')
            if not file_path:
                return jsonify({
                    'success': False,
                    'message': "Path del archivo es requerido"
                }), 400
            # TODO: Implement file reprocessing
            # This would trigger the main processing loop
            return jsonify({
                'success': True,
                'message': f"Archivo {Path(file_path).name} enviado a reprocesamiento"
            })
        except Exception as e:
            app.logger.error(f"Error reprocessing file: {e}")
            return jsonify({
                'success': False,
                'message': f"Error: {str(e)}"
            }), 500
    @app.route('/api/mark-unprocessed', methods=['POST'])
    def mark_unprocessed():
        """Mark file as unprocessed"""
        try:
            data = request.get_json()
            file_path = data.get('path')
            if not file_path:
                return jsonify({
                    'success': False,
                    'message': "Path del archivo es requerido"
                }), 400
            success = processed_registry.remove(file_path)
            if success:
                return jsonify({
                    'success': True,
                    'message': "Archivo marcado como no procesado"
                })
            else:
                return jsonify({
                    'success': False,
                    'message': "No se pudo marcar como no procesado"
                }), 500
        except Exception as e:
            app.logger.error(f"Error marking unprocessed: {e}")
            return jsonify({
                'success': False,
                'message': f"Error: {str(e)}"
            }), 500
    @app.route('/api/refresh')
    def refresh_files():
        """Refresh file list"""
        try:
            processed_registry.load()
            files = get_audio_files()
            return jsonify({
                'success': True,
                'message': "Lista de archivos actualizada",
                'files': files
            })
        except Exception as e:
            app.logger.error(f"Error refreshing files: {e}")
            return jsonify({
                'success': False,
                'message': f"Error: {str(e)}"
            }), 500
    @app.route('/downloads/<path:filename>')
    def download_file(filename):
        """Download file"""
        try:
            # Validate path to prevent traversal and injection attacks
            normalized = Path(filename).resolve()
            base_downloads = Path(str(settings.LOCAL_DOWNLOADS_PATH)).resolve()
            base_docx = Path(str(settings.LOCAL_DOCX)).resolve()
            if '..' in filename or filename.startswith('/') or \
               normalized.parts[0] in ['..', '...'] if len(normalized.parts) > 0 else False or \
               not (normalized == base_downloads or normalized.is_relative_to(base_downloads) or
                    normalized == base_docx or normalized.is_relative_to(base_docx)):
                return jsonify({'error': 'Invalid filename'}), 400
            # Try downloads directory
            downloads_path = settings.LOCAL_DOWNLOADS_PATH / filename
            if downloads_path.exists():
                return send_from_directory(str(settings.LOCAL_DOWNLOADS_PATH), filename)
            # Try resumenes_docx directory
            docx_path = settings.LOCAL_DOCX / filename
            if docx_path.exists():
                return send_from_directory(str(settings.LOCAL_DOCX), filename)
            return jsonify({'error': 'File not found'}), 404
        except Exception as e:
            app.logger.error(f"Error downloading file: {e}")
            return jsonify({'error': 'File not found'}), 404
    @app.route('/health')
    def health_check():
        """Health check endpoint"""
        gpu_info = vram_manager.get_usage()
        return jsonify({
            'status': 'healthy',
            'timestamp': datetime.now().isoformat(),
            'processed_files_count': processed_registry.count(),
            'gpu': gpu_info,
            'config': {
                'webdav_configured': settings.has_webdav_config,
                'ai_configured': settings.has_ai_config,
                'debug': settings.DEBUG
            }
        })
    return app
 def get_audio_files() -> List[Dict[str, Any]]:
    """Get list of audio files from WebDAV and local"""
    files = []
    # Get files from WebDAV
    if settings.has_webdav_config:
        try:
            webdav_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER)
            for file_path in webdav_files:
                normalized_path = webdav_service.normalize_path(file_path)
                base_name = Path(normalized_path).name
                if any(normalized_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
                    is_processed = processed_registry.is_processed(normalized_path)
                    files.append({
                        'filename': base_name,
                        'path': normalized_path,
                        'source': 'webdav',
                        'processed': is_processed,
                        'size': 'Unknown',
                        'last_modified': 'Unknown',
                        'available_formats': get_available_formats(base_name)
                    })
        except Exception as e:
            app.logger.error(f"Error getting WebDAV files: {e}")
    # Get local files
    try:
        if settings.LOCAL_DOWNLOADS_PATH.exists():
            for ext in settings.AUDIO_EXTENSIONS:
                for file_path in settings.LOCAL_DOWNLOADS_PATH.glob(f"*{ext}"):
                    stat = file_path.stat()
                    is_processed = processed_registry.is_processed(file_path.name)
                    files.append({
                        'filename': file_path.name,
                        'path': str(file_path),
                        'source': 'local',
                        'processed': is_processed,
                        'size': format_size(stat.st_size),
                        'last_modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
                        'available_formats': get_available_formats(file_path.name)
                    })
    except Exception as e:
        app.logger.error(f"Error getting local files: {e}")
    # Remove duplicates (WebDAV takes precedence)
    unique_files = {}
    for file in files:
        key = file['filename']
        if key not in unique_files or file['source'] == 'webdav':
            unique_files[key] = file
    return sorted(unique_files.values(), key=lambda x: x['filename'])
 def get_available_formats(audio_filename: str) -> Dict[str, bool]:
    """Check which output formats are available for an audio file"""
    base_name = Path(audio_filename).stem
    formats = {
        'txt': False,
        'md': False,
        'pdf': False,
        'docx': False
    }
    directories_to_check = [
        settings.LOCAL_DOWNLOADS_PATH,
        settings.LOCAL_DOCX
    ]
    for directory in directories_to_check:
        if not directory.exists():
            continue
        for ext in formats.keys():
            name_variants = [
                base_name,
                f"{base_name}_unificado",
                base_name.replace(' ', '_'),
                f"{base_name.replace(' ', '_')}_unificado",
            ]
            for name_variant in name_variants:
                file_path = directory / f"{name_variant}.{ext}"
                if file_path.exists():
                    formats[ext] = True
                    break
    return formats
 def format_size(size_bytes: int) -> str:
    """Format size in human-readable format"""
    for unit in ['B', 'KB', 'MB', 'GB']:
        if size_bytes < 1024.0:
            return f"{size_bytes:.1f} {unit}"
        size_bytes /= 1024.0
    return f"{size_bytes:.1f} TB"
--- a/backup/originals_20250109/ai_service.py
+++ b/backup/originals_20250109/ai_service.py
@@ -0,0 +1,69 @@
 """
 AI Service - Unified interface for AI providers
 """
 import logging
 from typing import Optional, Dict, Any
 from .config import settings
 from .core import AIProcessingError
 from .services.ai.provider_factory import AIProviderFactory, ai_provider_factory
 class AIService:
    """Unified service for AI operations with provider fallback"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._factory: Optional[AIProviderFactory] = None
    @property
    def factory(self) -> AIProviderFactory:
        """Lazy initialization of provider factory"""
        if self._factory is None:
            self._factory = ai_provider_factory
        return self._factory
    def generate_text(
        self,
        prompt: str,
        provider: Optional[str] = None,
        max_tokens: int = 4096
    ) -> str:
        """Generate text using AI provider"""
        try:
            ai_provider = self.factory.get_provider(provider or 'gemini')
            return ai_provider.generate(prompt, max_tokens=max_tokens)
        except AIProcessingError as e:
            self.logger.error(f"AI generation failed: {e}")
            return f"Error: {str(e)}"
    def summarize(self, text: str, **kwargs) -> str:
        """Generate summary of text"""
        try:
            provider = self.factory.get_best_provider()
            return provider.summarize(text, **kwargs)
        except AIProcessingError as e:
            self.logger.error(f"Summarization failed: {e}")
            return f"Error: {str(e)}"
    def correct_text(self, text: str, **kwargs) -> str:
        """Correct grammar and spelling in text"""
        try:
            provider = self.factory.get_best_provider()
            return provider.correct_text(text, **kwargs)
        except AIProcessingError as e:
            self.logger.error(f"Text correction failed: {e}")
            return text  # Return original on error
    def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
        """Classify content into categories"""
        try:
            provider = self.factory.get_best_provider()
            return provider.classify_content(text, **kwargs)
        except AIProcessingError as e:
            self.logger.error(f"Classification failed: {e}")
            return {"category": "otras_clases", "confidence": 0.0}
 # Global instance
 ai_service = AIService()
--- a/backup/originals_20250109/gemini_provider.py
+++ b/backup/originals_20250109/gemini_provider.py
@@ -0,0 +1,171 @@
 """
 Gemini AI Provider implementation
 """
 import logging
 import subprocess
 import shutil
 import requests
 import time
 from typing import Dict, Any, Optional
 from ..config import settings
 from ..core import AIProcessingError
 from .base_provider import AIProvider
 class GeminiProvider(AIProvider):
    """Gemini AI provider using CLI or API"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._cli_path = settings.GEMINI_CLI_PATH or shutil.which("gemini")
        self._api_key = settings.GEMINI_API_KEY
        self._flash_model = settings.GEMINI_FLASH_MODEL
        self._pro_model = settings.GEMINI_PRO_MODEL
        self._session = None
    @property
    def name(self) -> str:
        return "Gemini"
    def is_available(self) -> bool:
        """Check if Gemini is available"""
        return bool(self._cli_path or self._api_key)
    def _run_cli(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
        """Run Gemini CLI with prompt"""
        if not self._cli_path:
            raise AIProcessingError("Gemini CLI not available")
        model = self._flash_model if use_flash else self._pro_model
        cmd = [self._cli_path, model, prompt]
        try:
            process = subprocess.run(
                cmd,
                text=True,
                capture_output=True,
                timeout=timeout,
                shell=False
            )
            if process.returncode != 0:
                error_msg = process.stderr or "Unknown error"
                raise AIProcessingError(f"Gemini CLI failed: {error_msg}")
            return process.stdout.strip()
        except subprocess.TimeoutExpired:
            raise AIProcessingError(f"Gemini CLI timed out after {timeout}s")
        except Exception as e:
            raise AIProcessingError(f"Gemini CLI error: {e}")
    def _call_api(self, prompt: str, use_flash: bool = True, timeout: int = 180) -> str:
        """Call Gemini API"""
        if not self._api_key:
            raise AIProcessingError("Gemini API key not configured")
        model = self._flash_model if use_flash else self._pro_model
        # Initialize session if needed
        if self._session is None:
            self._session = requests.Session()
            adapter = requests.adapters.HTTPAdapter(
                pool_connections=10,
                pool_maxsize=20
            )
            self._session.mount('https://', adapter)
        url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
        payload = {
            "contents": [{
                "parts": [{"text": prompt}]
            }]
        }
        params = {"key": self._api_key}
        try:
            response = self._session.post(
                url,
                json=payload,
                params=params,
                timeout=timeout
            )
            response.raise_for_status()
            data = response.json()
            if "candidates" not in data or not data["candidates"]:
                raise AIProcessingError("Empty response from Gemini API")
            candidate = data["candidates"][0]
            if "content" not in candidate or "parts" not in candidate["content"]:
                raise AIProcessingError("Invalid response format from Gemini API")
            result = candidate["content"]["parts"][0]["text"]
            return result.strip()
        except requests.RequestException as e:
            raise AIProcessingError(f"Gemini API request failed: {e}")
        except (KeyError, IndexError, ValueError) as e:
            raise AIProcessingError(f"Gemini API response error: {e}")
    def _run(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
        """Run Gemini with fallback between CLI and API"""
        # Try CLI first if available
        if self._cli_path:
            try:
                return self._run_cli(prompt, use_flash, timeout)
            except Exception as e:
                self.logger.warning(f"Gemini CLI failed, trying API: {e}")
        # Fallback to API
        if self._api_key:
            api_timeout = timeout if timeout < 180 else 180
            return self._call_api(prompt, use_flash, api_timeout)
        raise AIProcessingError("No Gemini provider available (CLI or API)")
    def summarize(self, text: str, **kwargs) -> str:
        """Generate summary using Gemini"""
        prompt = f"""Summarize the following text:
 {text}
 Provide a clear, concise summary in Spanish."""
        return self._run(prompt, use_flash=True)
    def correct_text(self, text: str, **kwargs) -> str:
        """Correct text using Gemini"""
        prompt = f"""Correct the following text for grammar, spelling, and clarity:
 {text}
 Return only the corrected text, nothing else."""
        return self._run(prompt, use_flash=True)
    def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
        """Classify content using Gemini"""
        categories = ["historia", "analisis_contable", "instituciones_gobierno", "otras_clases"]
        prompt = f"""Classify the following text into one of these categories:
 - historia
 - analisis_contable
 - instituciones_gobierno
 - otras_clases
 Text: {text}
 Return only the category name, nothing else."""
        result = self._run(prompt, use_flash=True).lower()
        # Validate result
        if result not in categories:
            result = "otras_clases"
        return {
            "category": result,
            "confidence": 0.9,
            "provider": self.name
        }
--- a/backup/originals_20250109/processed_registry.py
+++ b/backup/originals_20250109/processed_registry.py
@@ -0,0 +1,137 @@
 """
 Processed files registry using repository pattern
 """
 import fcntl
 import logging
 from pathlib import Path
 from typing import Set, Optional
 from datetime import datetime, timedelta
 from ..config import settings
 class ProcessedRegistry:
    """Registry for tracking processed files with caching and file locking"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._cache: Set[str] = set()
        self._cache_time: Optional[datetime] = None
        self._cache_ttl = 60
        self._initialized = False
    def initialize(self) -> None:
        """Initialize the registry"""
        self.load()
        self._initialized = True
    def load(self) -> Set[str]:
        """Load processed files from disk with caching"""
        now = datetime.utcnow()
        if self._cache and self._cache_time and (now - self._cache_time).total_seconds() < self._cache_ttl:
            return self._cache.copy()
        processed = set()
        registry_path = settings.processed_files_path
        try:
            registry_path.parent.mkdir(parents=True, exist_ok=True)
            if registry_path.exists():
                with open(registry_path, 'r', encoding='utf-8') as f:
                    for raw_line in f:
                        line = raw_line.strip()
                        if line and not line.startswith('#'):
                            processed.add(line)
                            base_name = Path(line).name
                            processed.add(base_name)
        except Exception as e:
            self.logger.error(f"Error reading processed files registry: {e}")
        self._cache = processed
        self._cache_time = now
        return processed.copy()
    def save(self, file_path: str) -> None:
        """Add file to processed registry with file locking"""
        if not file_path:
            return
        registry_path = settings.processed_files_path
        try:
            registry_path.parent.mkdir(parents=True, exist_ok=True)
            with open(registry_path, 'a', encoding='utf-8') as f:
                fcntl.flock(f.fileno(), fcntl.LOCK_EX)
                try:
                    if file_path not in self._cache:
                        f.write(file_path + "\n")
                        self._cache.add(file_path)
                        self.logger.debug(f"Added {file_path} to processed registry")
                finally:
                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)
        except Exception as e:
            self.logger.error(f"Error saving to processed files registry: {e}")
            raise
    def is_processed(self, file_path: str) -> bool:
        """Check if file has been processed"""
        if not self._initialized:
            self.initialize()
        if file_path in self._cache:
            return True
        basename = Path(file_path).name
        if basename in self._cache:
            return True
        return False
    def remove(self, file_path: str) -> bool:
        """Remove file from processed registry"""
        registry_path = settings.processed_files_path
        try:
            if not registry_path.exists():
                return False
            lines_to_keep = []
            with open(registry_path, 'r', encoding='utf-8') as f:
                for line in f:
                    if line.strip() != file_path and Path(line.strip()).name != Path(file_path).name:
                        lines_to_keep.append(line)
            with open(registry_path, 'w', encoding='utf-8') as f:
                fcntl.flock(f.fileno(), fcntl.LOCK_EX)
                try:
                    f.writelines(lines_to_keep)
                    self._cache.discard(file_path)
                    self._cache.discard(Path(file_path).name)
                finally:
                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)
            return True
        except Exception as e:
            self.logger.error(f"Error removing from processed files registry: {e}")
            return False
    def clear(self) -> None:
        """Clear the entire registry"""
        registry_path = settings.processed_files_path
        try:
            if registry_path.exists():
                registry_path.unlink()
            self._cache.clear()
            self._cache_time = None
            self.logger.info("Processed files registry cleared")
        except Exception as e:
            self.logger.error(f"Error clearing processed files registry: {e}")
            raise
    def get_all(self) -> Set[str]:
        """Get all processed files"""
        if not self._initialized:
            self.initialize()
        return self._cache.copy()
    def count(self) -> int:
        """Get count of processed files"""
        if not self._initialized:
            self.initialize()
        return len(self._cache)
 # Global instance
 processed_registry = ProcessedRegistry()
--- a/config/init.py
+++ b/config/init.py
@@ -0,0 +1,8 @@
 """
 Configuration package for CBCFacil
 """
 from .settings import settings
 from .validators import validate_environment
 __all__ = ['settings', 'validate_environment']
--- a/config/settings.py
+++ b/config/settings.py
@@ -0,0 +1,211 @@
 """
 Centralized configuration management for CBCFacil
 """
 import os
 from pathlib import Path
 from typing import Optional, Set, Union
 class ConfigurationError(Exception):
    """Raised when configuration is invalid"""
    pass
 class Settings:
    """Application settings loaded from environment variables"""
    # Application
    APP_NAME: str = "CBCFacil"
    APP_VERSION: str = "8.0"
    DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
    # Nextcloud/WebDAV Configuration
    NEXTCLOUD_URL: str = os.getenv("NEXTCLOUD_URL", "")
    NEXTCLOUD_USER: str = os.getenv("NEXTCLOUD_USER", "")
    NEXTCLOUD_PASSWORD: str = os.getenv("NEXTCLOUD_PASSWORD", "")
    WEBDAV_ENDPOINT: str = NEXTCLOUD_URL
    # Remote folders
    REMOTE_AUDIOS_FOLDER: str = "Audios"
    REMOTE_DOCX_AUDIO_FOLDER: str = "Documentos"
    REMOTE_PDF_FOLDER: str = "Pdf"
    REMOTE_TXT_FOLDER: str = "Textos"
    RESUMENES_FOLDER: str = "Resumenes"
    DOCX_FOLDER: str = "Documentos"
    # Local paths
    BASE_DIR: Path = Path(__file__).resolve().parent.parent
    LOCAL_STATE_DIR: str = os.getenv("LOCAL_STATE_DIR", str(BASE_DIR))
    LOCAL_DOWNLOADS_PATH: Path = BASE_DIR / "downloads"
    LOCAL_RESUMENES: Path = LOCAL_DOWNLOADS_PATH
    LOCAL_DOCX: Path = BASE_DIR / "resumenes_docx"
    # Processing
    POLL_INTERVAL: int = int(os.getenv("POLL_INTERVAL", "5"))
    HTTP_TIMEOUT: int = int(os.getenv("HTTP_TIMEOUT", "30"))
    WEBDAV_MAX_RETRIES: int = int(os.getenv("WEBDAV_MAX_RETRIES", "3"))
    DOWNLOAD_CHUNK_SIZE: int = int(os.getenv("DOWNLOAD_CHUNK_SIZE", "65536"))  # 64KB for better performance
    MAX_FILENAME_LENGTH: int = int(os.getenv("MAX_FILENAME_LENGTH", "80"))
    MAX_FILENAME_BASE_LENGTH: int = int(os.getenv("MAX_FILENAME_BASE_LENGTH", "40"))
    MAX_FILENAME_TOPICS_LENGTH: int = int(os.getenv("MAX_FILENAME_TOPICS_LENGTH", "20"))
    # File extensions
    AUDIO_EXTENSIONS: Set[str] = {".mp3", ".wav", ".m4a", ".ogg", ".aac"}
    PDF_EXTENSIONS: Set[str] = {".pdf"}
    TXT_EXTENSIONS: Set[str] = {".txt"}
    # AI Providers
    ZAI_BASE_URL: str = os.getenv("ZAI_BASE_URL", "https://api.z.ai/api/anthropic")
    ZAI_DEFAULT_MODEL: str = os.getenv("ZAI_MODEL", "glm-4.6")
    ZAI_AUTH_TOKEN: Optional[str] = os.getenv("ANTHROPIC_AUTH_TOKEN") or os.getenv("ZAI_AUTH_TOKEN", "")
    # Gemini
    GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
    GEMINI_FLASH_MODEL: Optional[str] = os.getenv("GEMINI_FLASH_MODEL")
    GEMINI_PRO_MODEL: Optional[str] = os.getenv("GEMINI_PRO_MODEL")
    # CLI paths
    GEMINI_CLI_PATH: Optional[str] = os.getenv("GEMINI_CLI_PATH")
    CLAUDE_CLI_PATH: Optional[str] = os.getenv("CLAUDE_CLI_PATH")
    # Telegram
    TELEGRAM_TOKEN: Optional[str] = os.getenv("TELEGRAM_TOKEN")
    TELEGRAM_CHAT_ID: Optional[str] = os.getenv("TELEGRAM_CHAT_ID")
    # PDF Processing Configuration
    CPU_COUNT: int = os.cpu_count() or 1
    PDF_MAX_PAGES_PER_CHUNK: int = int(os.getenv("PDF_MAX_PAGES_PER_CHUNK", "2"))
    PDF_DPI: int = int(os.getenv("PDF_DPI", "200"))
    PDF_RENDER_THREAD_COUNT: int = int(os.getenv("PDF_RENDER_THREAD_COUNT", str(min(4, CPU_COUNT))))
    PDF_BATCH_SIZE: int = int(os.getenv("PDF_BATCH_SIZE", "2"))
    PDF_TROCR_MAX_BATCH: int = int(os.getenv("PDF_TROCR_MAX_BATCH", str(PDF_BATCH_SIZE)))
    PDF_TESSERACT_THREADS: int = int(os.getenv("PDF_TESSERACT_THREADS", str(max(1, min(2, max(1, CPU_COUNT // 3))))))
    PDF_PREPROCESS_THREADS: int = int(os.getenv("PDF_PREPROCESS_THREADS", str(PDF_TESSERACT_THREADS)))
    PDF_TEXT_DETECTION_MIN_RATIO: float = float(os.getenv("PDF_TEXT_DETECTION_MIN_RATIO", "0.6"))
    PDF_TEXT_DETECTION_MIN_AVG_CHARS: int = int(os.getenv("PDF_TEXT_DETECTION_MIN_AVG_CHARS", "120"))
    # Error handling
    ERROR_THROTTLE_SECONDS: int = int(os.getenv("ERROR_THROTTLE_SECONDS", "600"))
    # GPU/VRAM Management
    MODEL_TIMEOUT_SECONDS: int = int(os.getenv("MODEL_TIMEOUT_SECONDS", "300"))
    CUDA_VISIBLE_DEVICES: str = os.getenv("CUDA_VISIBLE_DEVICES", "all")
    PYTORCH_CUDA_ALLOC_CONF: str = os.getenv("PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:512")
    # GPU Detection (auto, nvidia, amd, cpu)
    GPU_PREFERENCE: str = os.getenv("GPU_PREFERENCE", "auto")
    # AMD ROCm HSA override for RX 6000 series (gfx1030)
    HSA_OVERRIDE_GFX_VERSION: str = os.getenv("HSA_OVERRIDE_GFX_VERSION", "10.3.0")
    # Dashboard
    DASHBOARD_SECRET_KEY: str = os.getenv("DASHBOARD_SECRET_KEY", "")
    DASHBOARD_PORT: int = int(os.getenv("DASHBOARD_PORT", "5000"))
    DASHBOARD_HOST: str = os.getenv("DASHBOARD_HOST", "0.0.0.0")
    # Logging
    LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
    LOG_FILE: Optional[str] = os.getenv("LOG_FILE")
    # Threading optimization
    OMP_NUM_THREADS: int = int(os.getenv("OMP_NUM_THREADS", "4"))
    MKL_NUM_THREADS: int = int(os.getenv("MKL_NUM_THREADS", "4"))
    # ========================================================================
    # PROPERTIES WITH VALIDATION
    # ========================================================================
    @property
    def is_production(self) -> bool:
        """Check if running in production mode"""
        return not self.DEBUG
    @property
    def has_webdav_config(self) -> bool:
        """Check if WebDAV credentials are configured"""
        return all([self.NEXTCLOUD_URL, self.NEXTCLOUD_USER, self.NEXTCLOUD_PASSWORD])
    @property
    def has_ai_config(self) -> bool:
        """Check if AI providers are configured"""
        return any([
            self.ZAI_AUTH_TOKEN,
            self.GEMINI_API_KEY,
            self.CLAUDE_CLI_PATH,
            self.GEMINI_CLI_PATH
        ])
    @property
    def processed_files_path(self) -> Path:
        """Get the path to the processed files registry"""
        return Path(os.getenv("PROCESSED_FILES_PATH", str(Path(self.LOCAL_STATE_DIR) / "processed_files.txt")))
    @property
    def nextcloud_url(self) -> str:
        """Get Nextcloud URL with validation"""
        if not self.NEXTCLOUD_URL and self.is_production:
            raise ConfigurationError("NEXTCLOUD_URL is required in production mode")
        return self.NEXTCLOUD_URL
    @property
    def nextcloud_user(self) -> str:
        """Get Nextcloud username with validation"""
        if not self.NEXTCLOUD_USER and self.is_production:
            raise ConfigurationError("NEXTCLOUD_USER is required in production mode")
        return self.NEXTCLOUD_USER
    @property
    def nextcloud_password(self) -> str:
        """Get Nextcloud password with validation"""
        if not self.NEXTCLOUD_PASSWORD and self.is_production:
            raise ConfigurationError("NEXTCLOUD_PASSWORD is required in production mode")
        return self.NEXTCLOUD_PASSWORD
    @property
    def valid_webdav_config(self) -> bool:
        """Validate WebDAV configuration completeness"""
        try:
            _ = self.nextcloud_url
            _ = self.nextcloud_user
            _ = self.nextcloud_password
            return True
        except ConfigurationError:
            return False
    @property
    def telegram_configured(self) -> bool:
        """Check if Telegram is properly configured"""
        return bool(self.TELEGRAM_TOKEN and self.TELEGRAM_CHAT_ID)
    @property
    def has_gpu_support(self) -> bool:
        """Check if GPU support is available"""
        try:
            import torch
            return torch.cuda.is_available()
        except ImportError:
            return False
    @property
    def environment_type(self) -> str:
        """Get environment type as string"""
        return "production" if self.is_production else "development"
    @property
    def config_summary(self) -> dict:
        """Get configuration summary for logging"""
        return {
            "app_name": self.APP_NAME,
            "version": self.APP_VERSION,
            "environment": self.environment_type,
            "debug": self.DEBUG,
            "webdav_configured": self.has_webdav_config,
            "ai_configured": self.has_ai_config,
            "telegram_configured": self.telegram_configured,
            "gpu_support": self.has_gpu_support,
            "cpu_count": self.CPU_COUNT,
            "poll_interval": self.POLL_INTERVAL
        }
 # Create global settings instance
 settings = Settings()
--- a/config/settings.py.backup
+++ b/config/settings.py.backup
@@ -0,0 +1,130 @@
 """
 Centralized configuration management for CBCFacil
 """
 import os
 from pathlib import Path
 from typing import Optional, Set
 class Settings:
    """Application settings loaded from environment variables"""
    # Application
    APP_NAME: str = "CBCFacil"
    APP_VERSION: str = "8.0"
    DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
    # Nextcloud/WebDAV Configuration
    NEXTCLOUD_URL: str = os.getenv("NEXTCLOUD_URL", "")
    NEXTCLOUD_USER: str = os.getenv("NEXTCLOUD_USER", "")
    NEXTCLOUD_PASSWORD: str = os.getenv("NEXTCLOUD_PASSWORD", "")
    WEBDAV_ENDPOINT: str = NEXTCLOUD_URL
    # Remote folders
    REMOTE_AUDIOS_FOLDER: str = "Audios"
    REMOTE_DOCX_AUDIO_FOLDER: str = "Documentos"
    REMOTE_PDF_FOLDER: str = "Pdf"
    REMOTE_TXT_FOLDER: str = "Textos"
    RESUMENES_FOLDER: str = "Resumenes"
    DOCX_FOLDER: str = "Documentos"
    # Local paths
    BASE_DIR: Path = Path(__file__).resolve().parent.parent
    LOCAL_STATE_DIR: str = os.getenv("LOCAL_STATE_DIR", str(BASE_DIR))
    LOCAL_DOWNLOADS_PATH: Path = BASE_DIR / "downloads"
    LOCAL_RESUMENES: Path = LOCAL_DOWNLOADS_PATH
    LOCAL_DOCX: Path = BASE_DIR / "resumenes_docx"
    # Processing
    POLL_INTERVAL: int = int(os.getenv("POLL_INTERVAL", "5"))
    HTTP_TIMEOUT: int = int(os.getenv("HTTP_TIMEOUT", "30"))
    WEBDAV_MAX_RETRIES: int = int(os.getenv("WEBDAV_MAX_RETRIES", "3"))
    DOWNLOAD_CHUNK_SIZE: int = int(os.getenv("DOWNLOAD_CHUNK_SIZE", "65536"))  # 64KB for better performance
    MAX_FILENAME_LENGTH: int = int(os.getenv("MAX_FILENAME_LENGTH", "80"))
    MAX_FILENAME_BASE_LENGTH: int = int(os.getenv("MAX_FILENAME_BASE_LENGTH", "40"))
    MAX_FILENAME_TOPICS_LENGTH: int = int(os.getenv("MAX_FILENAME_TOPICS_LENGTH", "20"))
    # File extensions
    AUDIO_EXTENSIONS: Set[str] = {".mp3", ".wav", ".m4a", ".ogg", ".aac"}
    PDF_EXTENSIONS: Set[str] = {".pdf"}
    TXT_EXTENSIONS: Set[str] = {".txt"}
    # AI Providers
    ZAI_BASE_URL: str = os.getenv("ZAI_BASE_URL", "https://api.z.ai/api/anthropic")
    ZAI_DEFAULT_MODEL: str = os.getenv("ZAI_MODEL", "glm-4.6")
    ZAI_AUTH_TOKEN: Optional[str] = os.getenv("ANTHROPIC_AUTH_TOKEN") or os.getenv("ZAI_AUTH_TOKEN", "")
    # Gemini
    GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
    GEMINI_FLASH_MODEL: Optional[str] = os.getenv("GEMINI_FLASH_MODEL")
    GEMINI_PRO_MODEL: Optional[str] = os.getenv("GEMINI_PRO_MODEL")
    # CLI paths
    GEMINI_CLI_PATH: Optional[str] = os.getenv("GEMINI_CLI_PATH")
    CLAUDE_CLI_PATH: Optional[str] = os.getenv("CLAUDE_CLI_PATH")
    # Telegram
    TELEGRAM_TOKEN: Optional[str] = os.getenv("TELEGRAM_TOKEN")
    TELEGRAM_CHAT_ID: Optional[str] = os.getenv("TELEGRAM_CHAT_ID")
    # PDF Processing Configuration
    CPU_COUNT: int = os.cpu_count() or 1
    PDF_MAX_PAGES_PER_CHUNK: int = int(os.getenv("PDF_MAX_PAGES_PER_CHUNK", "2"))
    PDF_DPI: int = int(os.getenv("PDF_DPI", "200"))
    PDF_RENDER_THREAD_COUNT: int = int(os.getenv("PDF_RENDER_THREAD_COUNT", str(min(4, CPU_COUNT))))
    PDF_BATCH_SIZE: int = int(os.getenv("PDF_BATCH_SIZE", "2"))
    PDF_TROCR_MAX_BATCH: int = int(os.getenv("PDF_TROCR_MAX_BATCH", str(PDF_BATCH_SIZE)))
    PDF_TESSERACT_THREADS: int = int(os.getenv("PDF_TESSERACT_THREADS", str(max(1, min(2, max(1, CPU_COUNT // 3))))))
    PDF_PREPROCESS_THREADS: int = int(os.getenv("PDF_PREPROCESS_THREADS", str(PDF_TESSERACT_THREADS)))
    PDF_TEXT_DETECTION_MIN_RATIO: float = float(os.getenv("PDF_TEXT_DETECTION_MIN_RATIO", "0.6"))
    PDF_TEXT_DETECTION_MIN_AVG_CHARS: int = int(os.getenv("PDF_TEXT_DETECTION_MIN_AVG_CHARS", "120"))
    # Error handling
    ERROR_THROTTLE_SECONDS: int = int(os.getenv("ERROR_THROTTLE_SECONDS", "600"))
    # GPU/VRAM Management
    MODEL_TIMEOUT_SECONDS: int = int(os.getenv("MODEL_TIMEOUT_SECONDS", "300"))
    CUDA_VISIBLE_DEVICES: str = os.getenv("CUDA_VISIBLE_DEVICES", "all")
    PYTORCH_CUDA_ALLOC_CONF: str = os.getenv("PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:512")
    # Dashboard
    DASHBOARD_SECRET_KEY: str = os.getenv("DASHBOARD_SECRET_KEY", "")
    DASHBOARD_PORT: int = int(os.getenv("DASHBOARD_PORT", "5000"))
    DASHBOARD_HOST: str = os.getenv("DASHBOARD_HOST", "0.0.0.0")
    # Logging
    LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
    LOG_FILE: Optional[str] = os.getenv("LOG_FILE")
    # Threading optimization
    OMP_NUM_THREADS: int = int(os.getenv("OMP_NUM_THREADS", "4"))
    MKL_NUM_THREADS: int = int(os.getenv("MKL_NUM_THREADS", "4"))
    @property
    def is_production(self) -> bool:
        """Check if running in production mode"""
        return not self.DEBUG
    @property
    def has_webdav_config(self) -> bool:
        """Check if WebDAV credentials are configured"""
        return all([self.NEXTCLOUD_URL, self.NEXTCLOUD_USER, self.NEXTCLOUD_PASSWORD])
    @property
    def has_ai_config(self) -> bool:
        """Check if AI providers are configured"""
        return any([
            self.ZAI_AUTH_TOKEN,
            self.GEMINI_API_KEY,
            self.CLAUDE_CLI_PATH,
            self.GEMINI_CLI_PATH
        ])
    @property
    def processed_files_path(self) -> Path:
        """Get the path to the processed files registry"""
        return Path(os.getenv("PROCESSED_FILES_PATH", str(Path(self.LOCAL_STATE_DIR) / "processed_files.txt")))
 # Create global settings instance
 settings = Settings()
--- a/config/validators.py
+++ b/config/validators.py
@@ -0,0 +1,64 @@
 """
 Configuration validators for CBCFacil
 """
 import logging
 from typing import List, Dict
 class ConfigurationError(Exception):
    """Raised when configuration is invalid"""
    pass
 def validate_environment() -> List[str]:
    """
    Validate required environment variables and configuration.
    Returns a list of warnings/errors.
    """
    from .settings import settings
    warnings = []
    errors = []
    # Check critical configurations
    if not settings.has_webdav_config:
        warnings.append("WebDAV credentials not configured - file sync will not work")
    if not settings.has_ai_config:
        warnings.append("No AI providers configured - summary generation will not work")
    # Validate API keys format if provided
    if settings.ZAI_AUTH_TOKEN:
        if len(settings.ZAI_AUTH_TOKEN) < 10:
            errors.append("ZAI_AUTH_TOKEN appears to be invalid (too short)")
    if settings.GEMINI_API_KEY:
        if len(settings.GEMINI_API_KEY) < 20:
            errors.append("GEMINI_API_KEY appears to be invalid (too short)")
    # Validate dashboard secret
    if not settings.DASHBOARD_SECRET_KEY:
        warnings.append("DASHBOARD_SECRET_KEY not set - using default is not recommended for production")
    if settings.DASHBOARD_SECRET_KEY == "dashboard-secret-key-change-in-production":
        warnings.append("Using default dashboard secret key - please change in production")
    # Check CUDA availability
    try:
        import torch
        if not torch.cuda.is_available():
            warnings.append("CUDA not available - GPU acceleration will be disabled")
    except ImportError:
        warnings.append("PyTorch not installed - GPU acceleration will be disabled")
    # Print warnings
    for warning in warnings:
        logging.warning(f"Configuration warning: {warning}")
    # Raise error if critical issues
    if errors:
        error_msg = "Configuration errors:\n" + "\n".join(f"- {e}" for e in errors)
        logging.error(error_msg)
        raise ConfigurationError(error_msg)
    return warnings
--- a/core/init.py
+++ b/core/init.py
@@ -0,0 +1,21 @@
 """
 Core package for CBCFacil
 """
 from .exceptions import (
    ProcessingError,
    WebDAVError,
    AIProcessingError,
    ConfigurationError,
    FileProcessingError
 )
 from .result import Result
 __all__ = [
    'ProcessingError',
    'WebDAVError',
    'AIProcessingError',
    'ConfigurationError',
    'FileProcessingError',
    'Result'
 ]
--- a/core/base_service.py
+++ b/core/base_service.py
@@ -0,0 +1,35 @@
 """
 Base service class for CBCFacil services
 """
 import logging
 from abc import ABC, abstractmethod
 from typing import Optional
 class BaseService(ABC):
    """Base class for all services"""
    def __init__(self, name: str):
        self.name = name
        self.logger = logging.getLogger(f"{__name__}.{name}")
    @abstractmethod
    def initialize(self) -> None:
        """Initialize the service"""
        pass
    @abstractmethod
    def cleanup(self) -> None:
        """Cleanup service resources"""
        pass
    def health_check(self) -> bool:
        """Perform health check"""
        return True
    def __enter__(self):
        self.initialize()
        return self
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.cleanup()
--- a/core/exceptions.py
+++ b/core/exceptions.py
@@ -0,0 +1,38 @@
 """
 Custom exceptions for CBCFacil
 """
 class ProcessingError(Exception):
    """Base exception for all processing errors"""
    pass
 class ConfigurationError(ProcessingError):
    """Raised when configuration is invalid"""
    pass
 class WebDAVError(ProcessingError):
    """Raised when WebDAV operations fail"""
    pass
 class AIProcessingError(ProcessingError):
    """Raised when AI processing fails"""
    pass
 class FileProcessingError(ProcessingError):
    """Raised when file processing fails"""
    pass
 class AuthenticationError(ProcessingError):
    """Raised when authentication fails"""
    pass
 class ValidationError(ProcessingError):
    """Raised when input validation fails"""
    pass
--- a/core/health_check.py
+++ b/core/health_check.py
@@ -0,0 +1,355 @@
 """
 Health check endpoint for CBCFacil service monitoring
 """
 import json
 import logging
 from datetime import datetime
 from typing import Dict, Any, List, Optional
 from pathlib import Path
 logger = logging.getLogger(__name__)
 class HealthChecker:
    """Comprehensive health check for all service dependencies"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
    def check_webdav_connection(self) -> Dict[str, Any]:
        """Check WebDAV service connectivity"""
        from config import settings
        result = {
            "service": "webdav",
            "status": "unknown",
            "timestamp": datetime.utcnow().isoformat()
        }
        try:
            from services.webdav_service import webdav_service
            if not settings.has_webdav_config:
                result["status"] = "not_configured"
                result["message"] = "WebDAV credentials not configured"
                return result
            # Test connection with a simple list operation
            webdav_service.list(".")
            result["status"] = "healthy"
            result["message"] = "WebDAV connection successful"
            result["endpoint"] = settings.NEXTCLOUD_URL
        except Exception as e:
            result["status"] = "unhealthy"
            result["error"] = str(e)
            self.logger.error(f"WebDAV health check failed: {e}")
        return result
    def check_ai_providers(self) -> Dict[str, Any]:
        """Check AI provider configurations"""
        from config import settings
        result = {
            "service": "ai_providers",
            "status": "unknown",
            "timestamp": datetime.utcnow().isoformat(),
            "providers": {}
        }
        try:
            # Check ZAI
            if settings.ZAI_AUTH_TOKEN:
                result["providers"]["zai"] = {
                    "configured": True,
                    "status": "unknown"
                }
            else:
                result["providers"]["zai"] = {
                    "configured": False,
                    "status": "not_configured"
                }
            # Check Gemini
            if settings.GEMINI_API_KEY:
                result["providers"]["gemini"] = {
                    "configured": True,
                    "status": "unknown"
                }
            else:
                result["providers"]["gemini"] = {
                    "configured": False,
                    "status": "not_configured"
                }
            # Check CLI providers
            if settings.CLAUDE_CLI_PATH:
                claude_path = Path(settings.CLAUDE_CLI_PATH)
                result["providers"]["claude_cli"] = {
                    "configured": True,
                    "path_exists": claude_path.exists(),
                    "status": "available" if claude_path.exists() else "path_invalid"
                }
            if settings.GEMINI_CLI_PATH:
                gemini_path = Path(settings.GEMINI_CLI_PATH)
                result["providers"]["gemini_cli"] = {
                    "configured": True,
                    "path_exists": gemini_path.exists(),
                    "status": "available" if gemini_path.exists() else "path_invalid"
                }
            # Overall status
            if settings.has_ai_config:
                result["status"] = "healthy"
                result["message"] = "At least one AI provider configured"
            else:
                result["status"] = "not_configured"
                result["message"] = "No AI providers configured"
        except Exception as e:
            result["status"] = "error"
            result["error"] = str(e)
            self.logger.error(f"AI providers health check failed: {e}")
        return result
    def check_vram_manager(self) -> Dict[str, Any]:
        """Check VRAM manager status"""
        result = {
            "service": "vram_manager",
            "status": "unknown",
            "timestamp": datetime.utcnow().isoformat()
        }
        try:
            from services.vram_manager import vram_manager
            vram_info = vram_manager.get_vram_info()
            result["status"] = "healthy"
            result["vram_info"] = {
                "total_gb": round(vram_info.get("total", 0) / (1024**3), 2),
                "free_gb": round(vram_info.get("free", 0) / (1024**3), 2),
                "allocated_gb": round(vram_info.get("allocated", 0) / (1024**3), 2)
            }
            result["cuda_available"] = vram_info.get("cuda_available", False)
        except Exception as e:
            result["status"] = "unavailable"
            result["error"] = str(e)
            self.logger.error(f"VRAM manager health check failed: {e}")
        return result
    def check_telegram_service(self) -> Dict[str, Any]:
        """Check Telegram service status"""
        from config import settings
        result = {
            "service": "telegram",
            "status": "unknown",
            "timestamp": datetime.utcnow().isoformat()
        }
        try:
            from services.telegram_service import telegram_service
            if telegram_service.is_configured:
                result["status"] = "healthy"
                result["message"] = "Telegram service configured"
            else:
                result["status"] = "not_configured"
                result["message"] = "Telegram credentials not configured"
        except Exception as e:
            result["status"] = "error"
            result["error"] = str(e)
            self.logger.error(f"Telegram service health check failed: {e}")
        return result
    def check_processed_registry(self) -> Dict[str, Any]:
        """Check processed files registry"""
        result = {
            "service": "processed_registry",
            "status": "unknown",
            "timestamp": datetime.utcnow().isoformat()
        }
        try:
            from storage.processed_registry import processed_registry
            # Try to load registry
            processed_registry.load()
            result["status"] = "healthy"
            result["registry_path"] = str(processed_registry.registry_path)
            # Check if registry file is writable
            registry_file = Path(processed_registry.registry_path)
            if registry_file.exists():
                result["registry_exists"] = True
                result["registry_writable"] = registry_file.is_file() and os.access(registry_file, os.W_OK)
            else:
                result["registry_exists"] = False
        except Exception as e:
            result["status"] = "unhealthy"
            result["error"] = str(e)
            self.logger.error(f"Processed registry health check failed: {e}")
        return result
    def check_disk_space(self) -> Dict[str, Any]:
        """Check available disk space"""
        result = {
            "service": "disk_space",
            "status": "unknown",
            "timestamp": datetime.utcnow().isoformat()
        }
        try:
            import shutil
            # Check main directory
            usage = shutil.disk_usage(Path(__file__).parent.parent)
            total_gb = usage.total / (1024**3)
            free_gb = usage.free / (1024**3)
            used_percent = (usage.used / usage.total) * 100
            result["status"] = "healthy"
            result["total_gb"] = round(total_gb, 2)
            result["free_gb"] = round(free_gb, 2)
            result["used_percent"] = round(used_percent, 2)
            # Warning if low disk space
            if free_gb < 1:  # Less than 1GB
                result["status"] = "warning"
                result["message"] = "Low disk space"
            elif free_gb < 5:  # Less than 5GB
                result["status"] = "degraded"
                result["message"] = "Disk space running low"
        except Exception as e:
            result["status"] = "error"
            result["error"] = str(e)
            self.logger.error(f"Disk space health check failed: {e}")
        return result
    def check_configuration(self) -> Dict[str, Any]:
        """Check configuration validity"""
        from config import settings
        result = {
            "service": "configuration",
            "status": "unknown",
            "timestamp": datetime.utcnow().isoformat()
        }
        try:
            warnings = []
            # Check for warnings
            if not settings.has_webdav_config:
                warnings.append("WebDAV not configured")
            if not settings.has_ai_config:
                warnings.append("AI providers not configured")
            if not settings.telegram_configured:
                warnings.append("Telegram not configured")
            if settings.DASHBOARD_SECRET_KEY == "":
                warnings.append("Dashboard secret key not set")
            if settings.DASHBOARD_SECRET_KEY == "dashboard-secret-key-change-in-production":
                warnings.append("Using default dashboard secret")
            result["status"] = "healthy" if not warnings else "warning"
            result["warnings"] = warnings
            result["environment"] = settings.environment_type
        except Exception as e:
            result["status"] = "error"
            result["error"] = str(e)
            self.logger.error(f"Configuration health check failed: {e}")
        return result
    def run_full_health_check(self) -> Dict[str, Any]:
        """Run all health checks and return comprehensive status"""
        checks = [
            ("configuration", self.check_configuration),
            ("webdav", self.check_webdav_connection),
            ("ai_providers", self.check_ai_providers),
            ("vram_manager", self.check_vram_manager),
            ("telegram", self.check_telegram_service),
            ("processed_registry", self.check_processed_registry),
            ("disk_space", self.check_disk_space)
        ]
        results = {}
        overall_status = "healthy"
        for check_name, check_func in checks:
            try:
                result = check_func()
                results[check_name] = result
                # Track overall status
                if result["status"] in ["unhealthy", "error"]:
                    overall_status = "unhealthy"
                elif result["status"] in ["warning", "degraded"] and overall_status == "healthy":
                    overall_status = "warning"
            except Exception as e:
                results[check_name] = {
                    "service": check_name,
                    "status": "error",
                    "error": str(e),
                    "timestamp": datetime.utcnow().isoformat()
                }
                overall_status = "unhealthy"
                self.logger.error(f"Health check {check_name} failed: {e}")
        return {
            "overall_status": overall_status,
            "timestamp": datetime.utcnow().isoformat(),
            "checks": results,
            "summary": {
                "total_checks": len(checks),
                "healthy": sum(1 for r in results.values() if r["status"] == "healthy"),
                "warning": sum(1 for r in results.values() if r["status"] == "warning"),
                "unhealthy": sum(1 for r in results.values() if r["status"] == "unhealthy")
            }
        }
 # Convenience function for CLI usage
 def get_health_status() -> Dict[str, Any]:
    """Get comprehensive health status"""
    checker = HealthChecker()
    return checker.run_full_health_check()
 if __name__ == "__main__":
    # CLI usage: python core/health_check.py
    import sys
    import os
    health = get_health_status()
    print(json.dumps(health, indent=2))
    # Exit with appropriate code
    if health["overall_status"] == "healthy":
        sys.exit(0)
    elif health["overall_status"] == "warning":
        sys.exit(1)
    else:
        sys.exit(2)
--- a/core/result.py
+++ b/core/result.py
@@ -0,0 +1,43 @@
 """
 Result type for handling success/error cases
 """
 from typing import TypeVar, Generic, Optional, Callable
 from dataclasses import dataclass
 T = TypeVar('T')
 E = TypeVar('E')
@dataclass
 class Success(Generic[T]):
    """Successful result with value"""
    value: T
    def is_success(self) -> bool:
        return True
    def is_error(self) -> bool:
        return False
    def map(self, func: Callable[[T], 'Success']) -> 'Success[T]':
        """Apply function to value"""
        return func(self.value)
@dataclass
 class Error(Generic[E]):
    """Error result with error value"""
    error: E
    def is_success(self) -> bool:
        return False
    def is_error(self) -> bool:
        return True
    def map(self, func: Callable) -> 'Error[E]':
        """Return self on error"""
        return self
 Result = Success[T] | Error[E]
--- a/dashboard.py
+++ b/dashboard.py
@@ -1,417 +0,0 @@
 #!/usr/bin/env python3
 """
 Dashboard Flask para gestión de archivos de audio
 Interfaz web simple para reprocesar archivos MP3 con 1 click
 """
 import os
 import logging
 import sys
 from datetime import datetime
 from pathlib import Path
 from typing import List, Dict, Any
 from flask import Flask, render_template, request, jsonify, send_from_directory
 from flask_cors import CORS
 # Importar configuraciones del main.py sin rutas absolutas
 PROJECT_ROOT = Path(__file__).resolve().parent
 if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))
 from main import (
    AUDIO_EXTENSIONS, LOCAL_DOWNLOADS_PATH, PROCESSED_FILES_PATH,
    load_processed_files, save_processed_file, process_audio_file,
    REMOTE_AUDIOS_FOLDER, webdav_list, normalize_remote_path
 )
 app = Flask(__name__)
 CORS(app)
 # Configuración
 app.config['SECRET_KEY'] = os.getenv('DASHBOARD_SECRET_KEY', 'dashboard-secret-key-change-in-production')
 app.config['DOWNLOADS_FOLDER'] = LOCAL_DOWNLOADS_PATH
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class FileManager:
    """Gestor de archivos para el dashboard"""
    def __init__(self):
        self.processed_files = set()
        self.load_processed_files()
    def load_processed_files(self):
        """Cargar archivos procesados desde el registro"""
        try:
            self.processed_files = load_processed_files()
            logger.info(f"Cargados {len(self.processed_files)} archivos procesados")
        except Exception as e:
            logger.error(f"Error cargando archivos procesados: {e}")
            self.processed_files = set()
    def get_audio_files(self) -> List[Dict[str, Any]]:
        """Obtener lista de archivos de audio disponibles"""
        files = []
        # Obtener archivos de WebDAV
        try:
            webdav_files = webdav_list(REMOTE_AUDIOS_FOLDER)
            for file_path in webdav_files:
                normalized_path = normalize_remote_path(file_path)
                base_name = os.path.basename(normalized_path)
                if any(normalized_path.lower().endswith(ext) for ext in AUDIO_EXTENSIONS):
                    available_formats = self._get_available_formats(base_name)
                    # Considerar procesado si está en el registro O si tiene archivos de salida
                    is_processed = (normalized_path in self.processed_files or
                                  base_name in self.processed_files or
                                  any(available_formats.values()))
                    files.append({
                        'filename': base_name,
                        'path': normalized_path,
                        'source': 'webdav',
                        'processed': is_processed,
                        'size': 'Unknown',
                        'last_modified': 'Unknown',
                        'available_formats': available_formats
                    })
        except Exception as e:
            logger.error(f"Error obteniendo archivos WebDAV: {e}")
        # Obtener archivos locales
        try:
            if os.path.exists(LOCAL_DOWNLOADS_PATH):
                local_files = []
                for ext in AUDIO_EXTENSIONS:
                    local_files.extend(Path(LOCAL_DOWNLOADS_PATH).glob(f"*{ext}"))
                for file_path in local_files:
                    stat = file_path.stat()
                    available_formats = self._get_available_formats(file_path.name)
                    # Considerar procesado si está en el registro O si tiene archivos de salida
                    is_processed = (file_path.name in self.processed_files or
                                  any(available_formats.values()))
                    files.append({
                        'filename': file_path.name,
                        'path': str(file_path),
                        'source': 'local',
                        'processed': is_processed,
                        'size': self._format_size(stat.st_size),
                        'last_modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
                        'available_formats': available_formats
                    })
        except Exception as e:
            logger.error(f"Error obteniendo archivos locales: {e}")
        # Eliminar duplicados y ordenar
        unique_files = {}
        for file in files:
            key = file['filename']
            if key not in unique_files or file['source'] == 'webdav':
                unique_files[key] = file
        return sorted(unique_files.values(), key=lambda x: x['filename'])
    def _get_available_formats(self, audio_filename: str) -> Dict[str, bool]:
        """Verificar qué formatos de salida existen para un archivo de audio"""
        # Obtener el nombre base sin extensión
        base_name = Path(audio_filename).stem
        # Extensiones a verificar
        formats = {
            'txt': False,
            'md': False,
            'pdf': False,
            'docx': False
        }
        # Verificar en directorio local y resumenes_docx
        directories_to_check = [LOCAL_DOWNLOADS_PATH, './resumenes_docx']
        for directory in directories_to_check:
            if not os.path.exists(directory):
                continue
            for ext in formats.keys():
                # Buscar variaciones del nombre del archivo
                name_variants = [
                    base_name,  # Nombre exacto
                    f"{base_name}_unificado",  # Con sufijo _unificado
                    f"{base_name.replace(' ', '_')}",  # Espacios reemplazados por guiones bajos
                    f"{base_name.replace(' ', '_')}_unificado",  # Ambas variaciones
                ]
                # También verificar variantes con espacios originales pero _unificado
                if ' ' in base_name:
                    name_variants.append(f"{base_name}_unificado")
                # Para cada variante, verificar si existe el archivo
                for name_variant in name_variants:
                    file_path = os.path.join(directory, f"{name_variant}.{ext}")
                    if os.path.exists(file_path):
                        formats[ext] = True
                        break  # Encontrado, pasar al siguiente formato
        return formats
    def _format_size(self, size_bytes: int) -> str:
        """Formatear tamaño de archivo"""
        for unit in ['B', 'KB', 'MB', 'GB']:
            if size_bytes < 1024.0:
                return f"{size_bytes:.1f} {unit}"
            size_bytes /= 1024.0
        return f"{size_bytes:.1f} TB"
    def reprocess_file(self, file_path: str, source: str) -> Dict[str, Any]:
        """Reprocesar un archivo específico"""
        try:
            # Verificar formatos existentes antes de reprocesar
            filename = os.path.basename(file_path)
            existing_formats = self._get_available_formats(filename)
            has_existing_files = any(existing_formats.values())
            if source == 'webdav':
                # Para archivos WebDAV, llamar directamente a process_audio_file
                logger.info(f"Iniciando reprocesamiento de WebDAV: {file_path}")
                process_audio_file(file_path)
            else:
                # Para archivos locales, procesar directamente
                logger.info(f"Iniciando reprocesamiento local: {file_path}")
                # Aquí podrías agregar lógica adicional para archivos locales
            return {
                'success': True,
                'message': f"Archivo {os.path.basename(file_path)} enviado a reprocesamiento",
                'had_existing_files': has_existing_files,
                'existing_formats': existing_formats
            }
        except Exception as e:
            logger.error(f"Error reprocesando {file_path}: {e}")
            return {
                'success': False,
                'message': f"Error: {str(e)}"
            }
    def mark_as_unprocessed(self, file_path: str) -> bool:
        """Marcar archivo como no procesado para forzar reprocesamiento"""
        try:
            # Eliminar del registro de procesados
            processed_files = load_processed_files()
            normalized_path = normalize_remote_path(file_path)
            base_name = os.path.basename(normalized_path)
            # Crear nuevo registro sin este archivo
            temp_path = PROCESSED_FILES_PATH + '.temp'
            with open(temp_path, 'w', encoding='utf-8') as f:
                for line in open(PROCESSED_FILES_PATH, 'r', encoding='utf-8'):
                    if (line.strip() != normalized_path and
                        os.path.basename(line.strip()) != base_name and
                        line.strip() != file_path):
                        f.write(line)
            # Reemplazar archivo original
            os.replace(temp_path, PROCESSED_FILES_PATH)
            self.load_processed_files()  # Recargar
            return True
        except Exception as e:
            logger.error(f"Error marcando como no procesado {file_path}: {e}")
            return False
 # Instancia global del gestor de archivos
 file_manager = FileManager()
@app.route('/')
 def index():
    """Página principal del dashboard"""
    return render_template('index.html')
@app.route('/api/files')
 def get_files():
    """API endpoint para obtener lista de archivos"""
    try:
        files = file_manager.get_audio_files()
        return jsonify({
            'success': True,
            'files': files,
            'total': len(files),
            'processed': sum(1 for f in files if f['processed']),
            'pending': sum(1 for f in files if not f['processed'])
        })
    except Exception as e:
        logger.error(f"Error obteniendo archivos: {e}")
        return jsonify({
            'success': False,
            'message': f"Error: {str(e)}"
        }), 500
@app.route('/api/reprocess', methods=['POST'])
 def reprocess_file():
    """API endpoint para reprocesar un archivo"""
    try:
        data = request.get_json()
        file_path = data.get('path')
        source = data.get('source', 'local')
        if not file_path:
            return jsonify({
                'success': False,
                'message': "Path del archivo es requerido"
            }), 400
        result = file_manager.reprocess_file(file_path, source)
        return jsonify(result)
    except Exception as e:
        logger.error(f"Error en endpoint reprocesar: {e}")
        return jsonify({
            'success': False,
            'message': f"Error: {str(e)}"
        }), 500
@app.route('/api/mark-unprocessed', methods=['POST'])
 def mark_unprocessed():
    """API endpoint para marcar archivo como no procesado"""
    try:
        data = request.get_json()
        file_path = data.get('path')
        if not file_path:
            return jsonify({
                'success': False,
                'message': "Path del archivo es requerido"
            }), 400
        success = file_manager.mark_as_unprocessed(file_path)
        if success:
            return jsonify({
                'success': True,
                'message': "Archivo marcado como no procesado"
            })
        else:
            return jsonify({
                'success': False,
                'message': "No se pudo marcar como no procesado"
            }), 500
    except Exception as e:
        logger.error(f"Error marcando como no procesado: {e}")
        return jsonify({
            'success': False,
            'message': f"Error: {str(e)}"
        }), 500
@app.route('/api/refresh')
 def refresh_files():
    """API endpoint para refrescar lista de archivos"""
    try:
        file_manager.load_processed_files()
        files = file_manager.get_audio_files()
        return jsonify({
            'success': True,
            'message': "Lista de archivos actualizada",
            'files': files
        })
    except Exception as e:
        logger.error(f"Error refrescando archivos: {e}")
        return jsonify({
            'success': False,
            'message': f"Error: {str(e)}"
        }), 500
@app.route('/downloads/find-file')
 def find_and_download_file():
    """Buscar y servir archivos con diferentes variaciones de nombre"""
    try:
        from flask import request
        filename = request.args.get('filename')
        ext = request.args.get('ext')
        if not filename or not ext:
            return jsonify({'error': 'Missing parameters'}), 400
        # Generar posibles variaciones del nombre del archivo
        from pathlib import Path
        base_name = Path(filename).stem
        possible_names = [
            f"{base_name}.{ext}",
            f"{base_name}_unificado.{ext}",
            f"{base_name.replace(' ', '_')}.{ext}",
            f"{base_name.replace(' ', '_')}_unificado.{ext}"
        ]
        # Directorios donde buscar
        directories = [LOCAL_DOWNLOADS_PATH, './resumenes_docx']
        # Intentar encontrar el archivo en cada directorio con cada variación
        for directory in directories:
            if not os.path.exists(directory):
                continue
            for name in possible_names:
                file_path = os.path.join(directory, name)
                if os.path.exists(file_path):
                    return send_from_directory(directory, name)
        # Si no se encuentra el archivo
        return jsonify({'error': 'File not found'}), 404
    except Exception as e:
        logger.error(f"Error buscando archivo: {e}")
        return jsonify({'error': 'File not found'}), 404
@app.route('/downloads/<path:filename>')
 def download_file(filename):
    """Servir archivos de descarga desde downloads o resumenes_docx"""
    try:
        # Primero intentar en downloads
        try:
            return send_from_directory(LOCAL_DOWNLOADS_PATH, filename)
        except FileNotFoundError:
            pass
        # Si no se encuentra en downloads, intentar en resumenes_docx
        try:
            return send_from_directory('./resumenes_docx', filename)
        except FileNotFoundError:
            pass
        # Si no se encuentra en ninguna ubicación
        return jsonify({'error': 'File not found'}), 404
    except Exception as e:
        logger.error(f"Error sirviendo archivo {filename}: {e}")
        return jsonify({'error': 'File not found'}), 404
@app.route('/health')
 def health_check():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy',
        'timestamp': datetime.now().isoformat(),
        'processed_files_count': len(file_manager.processed_files)
    })
 if __name__ == '__main__':
    logger.info("🚀 Iniciando Dashboard de Gestión de Audio")
    logger.info(f"📁 Carpeta de descargas: {LOCAL_DOWNLOADS_PATH}")
    logger.info(f"📊 Servidor web en http://localhost:5000")
    try:
        app.run(
            host='0.0.0.0',
            port=5000,
            debug=False,
            threaded=True,
            use_reloader=False  # Evitar problemas con threading
        )
    except KeyboardInterrupt:
        logger.info("🛑 Dashboard detenido por el usuario")
    except Exception as e:
        logger.error(f"❌ Error en dashboard: {e}")
        raise
--- a/docs/DEPLOYMENT.md
+++ b/docs/DEPLOYMENT.md
@@ -0,0 +1,418 @@
 # Guia de Despliegue - CBCFacil
 Esta guia describe las opciones y procedimientos para desplegar CBCFacil en diferentes entornos.
 ## Opciones de Despliegue
 | Metodo | Complejidad | Recomendado para |
 |--------|-------------|------------------|
 | Docker Compose | Baja | Desarrollo, Produccion ligera |
 | Docker Standalone | Media | Produccion con orquestacion |
 | Virtual Environment | Baja | Desarrollo local |
 | Kubernetes | Alta | Produccion a escala |
 ## Despliegue con Docker Compose
 ### Prerrequisitos
 - Docker 24.0+
 - Docker Compose 2.20+
 - NVIDIA Container Toolkit (para GPU)
 ### Configuracion
 1. Crear archivo de configuracion:
 ```bash
 cp .env.example .env.production
 nano .env.production
 ```
 2. Configurar variables sensibles:
 ```bash
 # .env.production
 NEXTCLOUD_URL=https://nextcloud.example.com/remote.php/webdav
 NEXTCLOUD_USER=tu_usuario
 NEXTCLOUD_PASSWORD=tu_contrasena_segura
 ANTHROPIC_AUTH_TOKEN=sk-ant-...
 GEMINI_API_KEY=AIza...
 TELEGRAM_TOKEN=bot_token
 TELEGRAM_CHAT_ID=chat_id
 CUDA_VISIBLE_DEVICES=all
 LOG_LEVEL=INFO
 ```
 3. Verificar docker-compose.yml:
 ```yaml
 # docker-compose.yml
 version: '3.8'
 services:
  cbcfacil:
    build: .
    container_name: cbcfacil
    restart: unless-stopped
    ports:
      - "5000:5000"
    volumes:
      - ./downloads:/app/downloads
      - ./resumenes_docx:/app/resumenes_docx
      - ./logs:/app/logs
      - ./data:/app/data
    environment:
      - NEXTCLOUD_URL=${NEXTCLOUD_URL}
      - NEXTCLOUD_USER=${NEXTCLOUD_USER}
      - NEXTCLOUD_PASSWORD=${NEXTCLOUD_PASSWORD}
      - ANTHROPIC_AUTH_TOKEN=${ANTHROPIC_AUTH_TOKEN}
      - GEMINI_API_KEY=${GEMINI_API_KEY}
      - TELEGRAM_TOKEN=${TELEGRAM_TOKEN}
      - TELEGRAM_CHAT_ID=${TELEGRAM_CHAT_ID}
      - CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
      - LOG_LEVEL=${LOG_LEVEL}
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:5000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
 ```
 ### Despliegue
 ```bash
 # Construir y levantar
 docker compose up -d --build
 # Ver logs
 docker compose logs -f cbcfacil
 # Ver estado
 docker compose ps
 # Reiniciar
 docker compose restart cbcfacil
 # Detener
 docker compose down
 ```
 ### Actualizacion
 ```bash
 # Hacer backup de datos
 docker cp cbcfacil:/app/data ./backup/data
 # Actualizar imagen
 docker compose pull
 docker compose up -d --build
 # Verificar
 docker compose logs -f cbcfacil
 ```
 ## Despliegue con Docker Standalone
 ### Construir Imagen
 ```bash
 # Construir imagen
 docker build -t cbcfacil:latest .
 # Verificar imagen
 docker images cbcfacil
 ```
 ### Ejecutar Contenedor
 ```bash
 # Con GPU
 docker run -d \
  --name cbcfacil \
  --gpus all \
  -p 5000:5000 \
  -v $(pwd)/downloads:/app/downloads \
  -v $(pwd)/resumenes_docx:/app/resumenes_docx \
  -v $(pwd)/logs:/app/logs \
  -e NEXTCLOUD_URL=${NEXTCLOUD_URL} \
  -e NEXTCLOUD_USER=${NEXTCLOUD_USER} \
  -e NEXTCLOUD_PASSWORD=${NEXTCLOUD_PASSWORD} \
  -e ANTHROPIC_AUTH_TOKEN=${ANTHROPIC_AUTH_TOKEN} \
  -e GEMINI_API_KEY=${GEMINI_API_KEY} \
  cbcfacil:latest
 # Ver logs
 docker logs -f cbcfacil
 # Detener
 docker stop cbcfacil && docker rm cbcfacil
 ```
 ## Despliegue Local (Virtual Environment)
 ### Prerrequisitos
 - Python 3.10+
 - NVIDIA drivers (opcional)
 ### Instalacion
 ```bash
 # Clonar y entrar al directorio
 git clone <repo_url>
 cd cbcfacil
 # Crear entorno virtual
 python3 -m venv .venv
 source .venv/bin/activate
 # Instalar dependencias
 pip install -r requirements.txt
 # Configurar variables de entorno
 cp .env.example .env.production
 nano .env.production
 # Crear directorios
 mkdir -p downloads resumenes_docx logs data
 # Ejecutar
 python main.py
 ```
 ### Como Servicio Systemd
 ```ini
 # /etc/systemd/system/cbcfacil.service
 [Unit]
 Description=CBCFacil AI Service
 After=network.target
 [Service]
 Type=simple
 User=cbcfacil
 WorkingDirectory=/opt/cbcfacil
 Environment="PATH=/opt/cbcfacil/.venv/bin"
 EnvironmentFile=/opt/cbcfacil/.env.production
 ExecStart=/opt/cbcfacil/.venv/bin/python main.py
 Restart=always
 RestartSec=10
 # Logging
 StandardOutput=journal
 StandardError=journal
 SyslogIdentifier=cbcfacil
 [Install]
 WantedBy=multi-user.target
 ```
 ```bash
 # Instalar servicio
 sudo cp cbcfacil.service /etc/systemd/system/
 sudo systemctl daemon-reload
 sudo systemctl enable cbcfacil
 sudo systemctl start cbcfacil
 # Verificar estado
 sudo systemctl status cbcfacil
 # Ver logs
 journalctl -u cbcfacil -f
 ```
 ## Configuracion de Produccion
 ### Variables de Entorno Criticas
 ```bash
 # Obligatorias
 NEXTCLOUD_URL=...
 NEXTCLOUD_USER=...
 NEXTCLOUD_PASSWORD=...
 # Recomendadas para produccion
 DEBUG=false
 LOG_LEVEL=WARNING
 POLL_INTERVAL=10
 # GPU
 CUDA_VISIBLE_DEVICES=all
 ```
 ### Optimizaciones
 ```bash
 # En .env.production
 # Reducir polling para menor carga
 POLL_INTERVAL=10
 # Optimizar memoria GPU
 PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
 # Limitar threads
 OMP_NUM_THREADS=4
 MKL_NUM_THREADS=4
 ```
 ### Seguridad
 ```bash
 # Crear usuario dedicado
 sudo useradd -r -s /bin/false cbcfacil
 # Asignar permisos
 sudo chown -R cbcfacil:cbcfacil /opt/cbcfacil
 # Proteger archivo de variables
 sudo chmod 600 /opt/cbcfacil/.env.production
 ```
 ## Monitoreo
 ### Health Check
 ```bash
 # Endpoint de salud
 curl http://localhost:5000/health
 # Respuesta esperada
 {"status": "healthy"}
 ```
 ### Logging
 ```bash
 # Ver logs en tiempo real
 docker logs -f cbcfacil
 # O con journalctl
 journalctl -u cbcfacil -f
 # Logs estructurados en JSON (produccion)
 LOG_LEVEL=WARNING
 ```
 ### Metricas
 El sistema expone metricas via API:
 ```bash
 # Estado del servicio
 curl http://localhost:5000/api/status
 ```
 ## Respaldo y Recuperacion
 ### Respaldo de Datos
 ```bash
 # Directorios a respaldar
 # - downloads/ (archivos procesados)
 # - resumenes_docx/ (documentos generados)
 # - data/ (registros y estados)
 # - logs/ (logs del sistema)
 # Script de backup
 #!/bin/bash
 DATE=$(date +%Y%m%d_%H%M%S)
 BACKUP_DIR=/backup/cbcfacil
 mkdir -p $BACKUP_DIR
 tar -czf $BACKUP_DIR/cbcfacil_$DATE.tar.gz \
  /opt/cbcfacil/downloads \
  /opt/cbcfacil/resumenes_docx \
  /opt/cbcfacil/data
 # Limpiar backups antiguos (mantener ultimos 7 dias)
 find $BACKUP_DIR -name "*.tar.gz" -mtime +7 -delete
 ```
 ### Recuperacion
 ```bash
 # Detener servicio
 docker compose down
 # Restaurar datos
 tar -xzf backup_*.tar.gz -C /opt/cbcfacil/
 # Verificar permisos
 chown -R cbcfacil:cbcfacil /opt/cbcfacil
 # Reiniciar servicio
 docker compose up -d
 ```
 ## Troubleshooting de Produccion
 ### Contenedor no Inicia
 ```bash
 # Verificar logs
 docker logs cbcfacil
 # Verificar configuracion
 docker exec -it cbcfacil python -c "from config import settings; print(settings.has_webdav_config)"
 ```
 ### Error de Memoria GPU
 ```bash
 # Verificar GPUs disponibles
 nvidia-smi
 # Liberar memoria
 sudo nvidia-smi --gpu-reset
 # O limitar GPU
 CUDA_VISIBLE_DEVICES=0
 ```
 ### WebDAV Connection Failed
 ```bash
 # Verificar conectividad
 curl -u $NEXTCLOUD_USER:$NEXTCLOUD_PASSWORD $NEXTCLOUD_URL
 # Verificar credenciales
 echo "URL: $NEXTCLOUD_URL"
 echo "User: $NEXTCLOUD_USER"
 ```
 ### High CPU Usage
 ```bash
 # Reducir threads
 OMP_NUM_THREADS=2
 MKL_NUM_THREADS=2
 # Aumentar intervalo de polling
 POLL_INTERVAL=15
 ```
 ## Checklist de Produccion
 - [ ] Variables de entorno configuradas
 - [ ] Credenciales seguras (.env.production)
 - [ ] Usuario dedicado creado
 - [ ] Permisos correctos asignados
 - [ ] Logs configurados
 - [ ] Health check funcionando
 - [ ] Backup automatizado configurado
 - [ ] Monitoreo activo
 - [ ] SSL/TLS configurado (si aplica)
 - [ ] Firewall configurado
 ## Recursos Adicionales
 - `docs/SETUP.md` - Guia de configuracion inicial
 - `docs/TESTING.md` - Guia de testing
 - `ARCHITECTURE.md` - Documentacion arquitectonica
--- a/docs/SETUP.md
+++ b/docs/SETUP.md
@@ -0,0 +1,337 @@
 # Guia de Configuracion - CBCFacil
 Esta guia describe los pasos para configurar el entorno de desarrollo de CBCFacil.
 ## Requisitos Previos
 ### Software Requerido
 | Componente | Version Minima | Recomendada |
 |------------|----------------|-------------|
 | Python | 3.10 | 3.11+ |
 | Git | 2.0 | Latest |
 | NVIDIA Driver | 535+ | 550+ (para CUDA 12.1) |
 | Docker | 24.0 | 25.0+ (opcional) |
 | Docker Compose | 2.20 | Latest (opcional) |
 ### Hardware Recomendado
 - **CPU**: 4+ nucleos
 - **RAM**: 8GB minimum (16GB+ recomendado)
 - **GPU**: NVIDIA con 4GB+ VRAM (opcional, soporta CPU)
 - **Almacenamiento**: 10GB+ libres
 ## Instalacion Paso a Paso
 ### 1. Clonar el Repositorio
 ```bash
 git clone <repo_url>
 cd cbcfacil
 ```
 ### 2. Crear Entorno Virtual
 ```bash
 # Crear venv
 python3 -m venv .venv
 # Activar (Linux/macOS)
 source .venv/bin/activate
 # Activar (Windows)
 .venv\Scripts\activate
 ```
 ### 3. Instalar Dependencias
 ```bash
 # Actualizar pip
 pip install --upgrade pip
 # Instalar dependencias de produccion
 pip install -r requirements.txt
 # Instalar dependencias de desarrollo (opcional)
 pip install -r requirements-dev.txt
 ```
 ### 4. Configurar Variables de Entorno
 ```bash
 # Copiar template de configuracion
 cp .env.example .env.secrets
 # Editar con tus credenciales
 nano .env.secrets
 ```
 ### 5. Estructura de Archivos Necesaria
 ```bash
 # Crear directorios requeridos
 mkdir -p downloads resumenes_docx logs
 # Verificar estructura
 ls -la
 ```
 ## Configuracion de Credenciales
 ### Nextcloud/WebDAV
 ```bash
 # Obligatorio para sincronizacion de archivos
 NEXTCLOUD_URL=https://tu-nextcloud.com/remote.php/webdav
 NEXTCLOUD_USER=tu_usuario
 NEXTCLOUD_PASSWORD=tu_contrasena
 ```
 ### AI Providers (Opcional)
 ```bash
 # Claude via Z.ai
 ANTHROPIC_AUTH_TOKEN=sk-ant-...
 # Google Gemini
 GEMINI_API_KEY=AIza...
 # Gemini CLI (opcional)
 GEMINI_CLI_PATH=/usr/local/bin/gemini
 ```
 ### Telegram (Opcional)
 ```bash
 TELEGRAM_TOKEN=bot_token
 TELEGRAM_CHAT_ID=chat_id
 ```
 ### GPU Configuration (Opcional)
 ```bash
 # Usar GPU especifica
 CUDA_VISIBLE_DEVICES=0
 # Usar todas las GPUs
 CUDA_VISIBLE_DEVICES=all
 # Forzar CPU
 CUDA_VISIBLE_DEVICES=
 ```
 ## Verificacion de Instalacion
 ### 1. Verificar Python
 ```bash
 python --version
 # Debe mostrar Python 3.10+
 ```
 ### 2. Verificar Dependencias
 ```bash
 python -c "import torch; print(f'PyTorch: {torch.__version__}')"
 python -c "import flask; print(f'Flask: {flask.__version__}')"
 python -c "import whisper; print('Whisper instalado correctamente')"
 ```
 ### 3. Verificar Configuracion
 ```bash
 python -c "from config import settings; print(f'WebDAV configurado: {settings.has_webdav_config}')"
 python -c "from config import settings; print(f'AI configurado: {settings.has_ai_config}')"
 ```
 ### 4. Test Rapido
 ```bash
 # Ejecutar tests basicos
 pytest tests/test_config.py -v
 ```
 ## Configuracion de Desarrollo
 ### IDE Recomendado
 #### VS Code
 ```json
 // .vscode/settings.json
 {
    "python.defaultInterpreterPath": ".venv/bin/python",
    "python.linting.enabled": true,
    "python.linting.pylintEnabled": true,
    "editor.formatOnSave": true,
    "python.formatting.provider": "black"
 }
 ```
 #### PyCharm
 1. Open Settings > Project > Python Interpreter
 2. Add Interpreter > Existing Environment
 3. Select `.venv/bin/python`
 ### Git Hooks (Opcional)
 ```bash
 # Instalar pre-commit
 pip install pre-commit
 pre-commit install
 # Verificar hooks
 pre-commit run --all-files
 ```
 ### Formateo de Codigo
 ```bash
 # Instalar formateadores
 pip install black isort
 # Formatear codigo
 black .
 isort .
 # Verificar estilo
 black --check .
 isort --check-only .
 ```
 ## Ejecucion del Servicio
 ### Modo Desarrollo
 ```bash
 # Activar entorno virtual
 source .venv/bin/activate
 # Ejecutar servicio completo
 python main.py
 # Con logging verbose
 LOG_LEVEL=DEBUG python main.py
 ```
 ### Comandos CLI Disponibles
 ```bash
 # Transcribir audio
 python main.py whisper <archivo_audio> <directorio_output>
 # Procesar PDF
 python main.py pdf <archivo_pdf> <directorio_output>
 ```
 ### Dashboard
 El dashboard estara disponible en:
 - URL: http://localhost:5000
 - API: http://localhost:5000/api/
 ## Solucion de Problemas Comunes
 ### Error: "Module not found"
 ```bash
 # Verificar que el venv esta activado
 which python
 # Debe mostrar path hacia .venv/bin/python
 # Reinstalar dependencias
 pip install -r requirements.txt
 ```
 ### Error: "CUDA out of memory"
 ```bash
 # Reducir uso de GPU
 export CUDA_VISIBLE_DEVICES=
 # O usar solo una GPU
 export CUDA_VISIBLE_DEVICES=0
 ```
 ### Error: "WebDAV connection failed"
 ```bash
 # Verificar credenciales
 echo $NEXTCLOUD_URL
 echo $NEXTCLOUD_USER
 # Probar conexion manualmente
 curl -u $NEXTCLOUD_USER:$NEXTCLOUD_PASSWORD $NEXTCLOUD_URL
 ```
 ### Error: "Telegram token invalid"
 ```bash
 # Verificar token con BotFather
 # https://t.me/BotFather
 # Verificar variables de entorno
 echo $TELEGRAM_TOKEN
 echo $TELEGRAM_CHAT_ID
 ```
 ### Error: "Whisper model not found"
 ```bash
 # El modelo se descarga automaticamente la primera vez
 # Para forzar recarga:
 rm -rf ~/.cache/whisper
 ```
 ## Configuracion de GPU (Opcional)
 ### Verificar Instalacion de CUDA
 ```bash
 # Verificar drivers NVIDIA
 nvidia-smi
 # Verificar CUDA Toolkit
 nvcc --version
 # Verificar PyTorch CUDA
 python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
 ```
 ### Configurar Memoria GPU
 ```bash
 # En .env.secrets
 PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
 ```
 ## Variables de Entorno Completa
 | Variable | Requerido | Default | Descripcion |
 |----------|-----------|---------|-------------|
 | NEXTCLOUD_URL | Si | - | URL WebDAV de Nextcloud |
 | NEXTCLOUD_USER | Si | - | Usuario Nextcloud |
 | NEXTCLOUD_PASSWORD | Si | - | Contrasena Nextcloud |
 | ANTHROPIC_AUTH_TOKEN | No | - | Token Claude/Z.ai |
 | GEMINI_API_KEY | No | - | API Key Gemini |
 | TELEGRAM_TOKEN | No | - | Token Bot Telegram |
 | TELEGRAM_CHAT_ID | No | - | Chat ID Telegram |
 | CUDA_VISIBLE_DEVICES | No | "all" | GPUs a usar |
 | POLL_INTERVAL | No | 5 | Segundos entre polls |
 | LOG_LEVEL | No | "INFO" | Nivel de logging |
 | DEBUG | No | false | Modo debug |
 | DASHBOARD_PORT | No | 5000 | Puerto del dashboard |
 | DASHBOARD_HOST | No | "0.0.0.0" | Host del dashboard |
 ## Siguientes Pasos
 1. Verificar instalacion con tests
 2. Configurar integracion con Nextcloud
 3. Probar procesamiento de archivos
 4. Configurar notificaciones Telegram (opcional)
 Ver juga:
 - `docs/TESTING.md` - Guia de testing
 - `docs/DEPLOYMENT.md` - Guia de despliegue
 - `ARCHITECTURE.md` - Documentacion arquitectonica
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -0,0 +1,482 @@
 # Guia de Testing - CBCFacil
 Esta guia describe como ejecutar y escribir tests para CBCFacil.
 ## Estructura de Tests
 ```
 tests/
 ├── conftest.py                    # Fixtures compartidos
 ├── test_config.py                 # Tests de configuracion
 ├── test_storage.py                # Tests de almacenamiento
 ├── test_webdav.py                 # Tests de WebDAV
 ├── test_processors.py             # Tests de procesadores
 ├── test_ai_providers.py           # Tests de AI providers
 ├── test_vram_manager.py           # Tests de VRAM manager
 └── test_main_integration.py       # Tests de integracion
 ```
 ## Instalacion de Dependencias de Test
 ```bash
 # Activar entorno virtual
 source .venv/bin/activate
 # Instalar dependencias de desarrollo
 pip install -r requirements-dev.txt
 # Verificar instalacion
 pytest --version
 ```
 ## Ejecutar Tests
 ### Todos los Tests
 ```bash
 # Ejecutar todos los tests
 pytest tests/
 # Con output detallado
 pytest tests/ -v
 ```
 ### Tests Especificos
 ```bash
 # Tests de configuracion
 pytest tests/test_config.py -v
 # Tests de almacenamiento
 pytest tests/test_storage.py -v
 # Tests de WebDAV
 pytest tests/test_webdav.py -v
 # Tests de procesadores
 pytest tests/test_processors.py -v
 # Tests de AI providers
 pytest tests/test_ai_providers.py -v
 # Tests de VRAM manager
 pytest tests/test_vram_manager.py -v
 # Tests de integracion
 pytest tests/test_main_integration.py -v
 ```
 ### Tests con Coverage
 ```bash
 # Coverage basico
 pytest tests/ --cov=cbcfacil
 # Coverage con reporte HTML
 pytest tests/ --cov=cbcfacil --cov-report=html
 # Coverage con reporte term-missing
 pytest tests/ --cov=cbcfacil --cov-report=term-missing
 # Coverage por modulo
 pytest tests/ --cov=cbcfacil --cov-report=term-missing --cov-report=annotate
 ```
 ### Tests en Modo Watch
 ```bash
 # Recargar automaticamente al detectar cambios
 pytest-watch tests/
 ```
 ### Tests Parallelos
 ```bash
 # Ejecutar tests en paralelo
 pytest tests/ -n auto
 # Con numero fijo de workers
 pytest tests/ -n 4
 ```
 ## Escribir Nuevos Tests
 ### Estructura Basica
 ```python
 # tests/test_ejemplo.py
 import pytest
 from pathlib import Path
 class TestEjemplo:
    """Clase de tests para un modulo"""
    def setup_method(self):
        """Setup antes de cada test"""
        pass
    def teardown_method(self):
        """Cleanup despues de cada test"""
        pass
    def test_funcion_basica(self):
        """Test de una funcion basica"""
        # Arrange
        input_value = "test"
        # Act
        result = mi_funcion(input_value)
        # Assert
        assert result is not None
        assert result == "expected"
 ```
 ### Usar Fixtures
 ```python
 # tests/conftest.py
 import pytest
 from pathlib import Path
@pytest.fixture
 def temp_directory(tmp_path):
    """Fixture para directorio temporal"""
    dir_path = tmp_path / "test_files"
    dir_path.mkdir()
    return dir_path
@pytest.fixture
 def mock_settings():
    """Fixture con settings de prueba"""
    class MockSettings:
        NEXTCLOUD_URL = "https://test.example.com"
        NEXTCLOUD_USER = "test_user"
        NEXTCLOUD_PASSWORD = "test_pass"
    return MockSettings()
 # En tu test
 def test_con_fixture(temp_directory, mock_settings):
    """Test usando fixtures"""
    assert temp_directory.exists()
    assert mock_settings.NEXTCLOUD_URL == "https://test.example.com"
 ```
 ### Tests de Configuracion
 ```python
 # tests/test_config.py
 import pytest
 from config import settings
 class TestSettings:
    """Tests para configuracion"""
    def test_has_webdav_config_true(self):
        """Test con WebDAV configurado"""
        # Verificar que las properties funcionan
        assert hasattr(settings, 'has_webdav_config')
        assert hasattr(settings, 'has_ai_config')
    def test_processed_files_path(self):
        """Test del path de archivos procesados"""
        path = settings.processed_files_path
        assert isinstance(path, Path)
        assert path.suffix == ".txt"
 ```
 ### Tests de WebDAV
 ```python
 # tests/test_webdav.py
 import pytest
 from unittest.mock import Mock, patch
 class TestWebDAVService:
    """Tests para WebDAV Service"""
    @pytest.fixture
    def webdav_service(self):
        """Crear instancia del servicio"""
        from services.webdav_service import webdav_service
        return webdav_service
    def test_list_remote_path(self, webdav_service):
        """Test de listado de archivos remotos"""
        # Mock del cliente WebDAV
        with patch('services.webdav_service.WebDAVClient') as mock_client:
            mock_instance = Mock()
            mock_instance.list.return_value = ['file1.pdf', 'file2.mp3']
            mock_client.return_value = mock_instance
            # Inicializar servicio
            webdav_service.initialize()
            # Test
            files = webdav_service.list("TestFolder")
            assert len(files) == 2
            assert "file1.pdf" in files
 ```
 ### Tests de Procesadores
 ```python
 # tests/test_processors.py
 import pytest
 from unittest.mock import Mock, patch
 class TestAudioProcessor:
    """Tests para Audio Processor"""
    @pytest.fixture
    def processor(self):
        """Crear procesador"""
        from processors.audio_processor import AudioProcessor
        return AudioProcessor()
    def test_process_audio_file(self, processor, tmp_path):
        """Test de procesamiento de audio"""
        # Crear archivo de prueba
        audio_file = tmp_path / "test.mp3"
        audio_file.write_bytes(b"fake audio content")
        # Mock de Whisper
        with patch('processors.audio_processor.whisper') as mock_whisper:
            mock_whisper.load_model.return_value.transcribe.return_value = {
                "text": "Texto transcrito de prueba"
            }
            # Ejecutar
            result = processor.process(str(audio_file))
            # Verificar
            assert result is not None
 ```
 ### Tests de AI Providers
 ```python
 # tests/test_ai_providers.py
 import pytest
 from unittest.mock import Mock, patch
 class TestClaudeProvider:
    """Tests para Claude Provider"""
    def test_summarize_text(self):
        """Test de resumen con Claude"""
        from services.ai.claude_provider import ClaudeProvider
        provider = ClaudeProvider()
        test_text = "Texto largo para resumir..."
        # Mock de la llamada API
        with patch.object(provider, '_call_api') as mock_call:
            mock_call.return_value = "Texto resumido"
            result = provider.summarize(test_text)
            assert result == "Texto resumido"
            mock_call.assert_called_once()
 ```
 ### Tests de Integracion
 ```python
 # tests/test_main_integration.py
 import pytest
 from unittest.mock import patch
 class TestMainIntegration:
    """Tests de integracion del main"""
    def test_main_loop_no_files(self):
        """Test del loop principal sin archivos nuevos"""
            with patch('main.webdav_service') as mock_webdav:
                with patch('main.processed_registry') as mock_registry:
                    mock_webdav.list.return_value = []
                    mock_registry.is_processed.return_value = True
                    # El loop no debe procesar nada
                    # Verificar que no se llama a procesadores
 ```
 ## Configuracion de pytest
 ```ini
 # pytest.ini o pyproject.toml
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 python_files = ["test_*.py"]
 python_classes = ["Test*"]
 python_functions = ["test_*"]
 addopts = [
    "-v",
    "--tb=short",
    "--strict-markers",
 ]
 filterwarnings = [
    "ignore::DeprecationWarning",
 ]
 ```
 ## Mejores Practicas
 ### 1. Nombrado de Tests
 ```python
 # BIEN
 def test_webdav_service_list_returns_files():
    ...
 def test_processed_registry_is_processed_true_for_processed_file():
    ...
 # MAL
 def test_list():
    ...
 def test_check():
    ...
 ```
 ### 2. Estructura AAA
 ```python
 def test_ejemplo_aaa():
    # Arrange
    input_data = {"key": "value"}
    expected = "result"
    # Act
    actual = function_under_test(input_data)
    # Assert
    assert actual == expected
 ```
 ### 3. Tests Aislados
 ```python
 # Cada test debe ser independiente
 def test_independent():
    # No depender de estado de otros tests
    # Usar fixtures para setup/cleanup
    pass
 ```
 ### 4. Evitar TestLego
 ```python
 # BIEN - Test del comportamiento, no la implementacion
 def test_registry_returns_true_for_processed_file():
    registry = ProcessedRegistry()
    registry.save("file.txt")
    assert registry.is_processed("file.txt") is True
 # MAL - Test de implementacion
 def test_registry_uses_set_internally():
    # No testar detalles de implementacion
    registry = ProcessedRegistry()
    assert hasattr(registry, '_processed_files')
 ```
 ### 5. Mocks Appropriados
 ```python
 # Usar mocks para dependencias externas
 from unittest.mock import Mock, patch, MagicMock
 def test_with_mocked_api():
    with patch('requests.get') as mock_get:
        mock_response = Mock()
        mock_response.json.return_value = {"key": "value"}
        mock_get.return_value = mock_response
        result = my_api_function()
        assert result == {"key": "value"}
 ```
 ## Coverage Objetivo
 | Componente | Coverage Minimo |
 |------------|-----------------|
 | config/ | 90% |
 | core/ | 90% |
 | services/ | 70% |
 | processors/ | 60% |
 | storage/ | 90% |
 | api/ | 80% |
 ## Integracion con CI/CD
 ```yaml
 # .github/workflows/tests.yml
 name: Tests
 on: [push, pull_request]
 jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt
          pip install -r requirements-dev.txt
      - name: Run tests
        run: |
          pytest tests/ --cov=cbcfacil --cov-report=xml
      - name: Upload coverage
        uses: codecov/codecov-action@v3
 ```
 ## Troubleshooting
 ### Tests Fallan por Imports
 ```bash
 # Verificar que el venv esta activado
 source .venv/bin/activate
 # Reinstalar el paquete en modo desarrollo
 pip install -e .
 ```
 ### Tests Muy Lentos
 ```bash
 # Ejecutar en paralelo
 pytest tests/ -n auto
 # O ejecutar solo tests rapidos
 pytest tests/ -m "not slow"
 ```
 ### Memory Errors
 ```bash
 # Reducir workers
 pytest tests/ -n 2
 # O ejecutar secuencial
 pytest tests/ -n 0
 ```
 ## Recursos Adicionales
 - [Documentacion de pytest](https://docs.pytest.org/)
 - [Documentacion de unittest.mock](https://docs.python.org/3/library/unittest.mock.html)
 - [pytest-cov](https://pytest-cov.readthedocs.io/)
--- a/document/init.py
+++ b/document/init.py
@@ -0,0 +1,7 @@
 """
 Document generation package for CBCFacil
 """
 from .generators import DocumentGenerator
 __all__ = ['DocumentGenerator']
--- a/document/generators.py
+++ b/document/generators.py
@@ -0,0 +1,164 @@
 """
 Document generation utilities
 """
 import logging
 import re
 from pathlib import Path
 from typing import Dict, Any, List, Tuple
 from ..core import FileProcessingError
 from ..config import settings
 from ..services.ai import ai_provider_factory
 class DocumentGenerator:
    """Generate documents from processed text"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self.ai_provider = ai_provider_factory.get_best_provider()
    def generate_summary(self, text: str, base_name: str) -> Tuple[bool, str, Dict[str, Any]]:
        """Generate unified summary"""
        self.logger.info(f"Generating summary for {base_name}")
        try:
            # Generate summary
            summary = self.ai_provider.summarize(text)
            # Generate filename
            filename = self._generate_filename(text, summary)
            # Create document
            markdown_path = self._create_markdown(summary, base_name)
            docx_path = self._create_docx(summary, base_name)
            pdf_path = self._create_pdf(summary, base_name)
            metadata = {
                'markdown_path': str(markdown_path),
                'docx_path': str(docx_path),
                'pdf_path': str(pdf_path),
                'docx_name': Path(docx_path).name,
                'summary': summary,
                'filename': filename
            }
            return True, summary, metadata
        except Exception as e:
            self.logger.error(f"Summary generation failed: {e}")
            return False, "", {}
    def _generate_filename(self, text: str, summary: str) -> str:
        """Generate intelligent filename"""
        try:
            # Use AI to extract key topics
            prompt = f"""Extract 2-3 key topics from this summary to create a filename.
 Summary: {summary}
 Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
            topics_text = self.ai_provider.sanitize_input(prompt) if hasattr(self.ai_provider, 'sanitize_input') else summary[:100]
            # Simple topic extraction
            topics = re.findall(r'\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b', topics_text)[:3]
            if not topics:
                topics = ['documento']
            # Limit topic length
            topics = [t[:settings.MAX_FILENAME_TOPICS_LENGTH] for t in topics]
            filename = '_'.join(topics)[:settings.MAX_FILENAME_LENGTH]
            return filename
        except Exception as e:
            self.logger.error(f"Filename generation failed: {e}")
            return base_name[:settings.MAX_FILENAME_BASE_LENGTH]
    def _create_markdown(self, summary: str, base_name: str) -> Path:
        """Create Markdown document"""
        output_dir = settings.LOCAL_DOWNLOADS_PATH
        output_dir.mkdir(parents=True, exist_ok=True)
        output_path = output_dir / f"{base_name}_unificado.md"
        content = f"""# {base_name.replace('_', ' ').title()}
 ## Resumen
 {summary}
 ---
 *Generado por CBCFacil*
 """
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(content)
        return output_path
    def _create_docx(self, summary: str, base_name: str) -> Path:
        """Create DOCX document"""
        try:
            from docx import Document
            from docx.shared import Inches
        except ImportError:
            raise FileProcessingError("python-docx not installed")
        output_dir = settings.LOCAL_DOCX
        output_dir.mkdir(parents=True, exist_ok=True)
        output_path = output_dir / f"{base_name}_unificado.docx"
        doc = Document()
        doc.add_heading(base_name.replace('_', ' ').title(), 0)
        doc.add_heading('Resumen', level=1)
        doc.add_paragraph(summary)
        doc.add_page_break()
        doc.add_paragraph(f"*Generado por CBCFacil*")
        doc.save(output_path)
        return output_path
    def _create_pdf(self, summary: str, base_name: str) -> Path:
        """Create PDF document"""
        try:
            from reportlab.lib.pagesizes import letter
            from reportlab.pdfgen import canvas
        except ImportError:
            raise FileProcessingError("reportlab not installed")
        output_dir = settings.LOCAL_DOWNLOADS_PATH
        output_dir.mkdir(parents=True, exist_ok=True)
        output_path = output_dir / f"{base_name}_unificado.pdf"
        c = canvas.Canvas(str(output_path), pagesize=letter)
        width, height = letter
        # Add title
        c.setFont("Helvetica-Bold", 16)
        title = base_name.replace('_', ' ').title()
        c.drawString(100, height - 100, title)
        # Add summary
        c.setFont("Helvetica", 12)
        y_position = height - 140
        # Simple text wrapping
        lines = summary.split('\n')
        for line in lines:
            if y_position < 100:
                c.showPage()
                y_position = height - 100
                c.setFont("Helvetica", 12)
            c.drawString(100, y_position, line)
            y_position -= 20
        c.showPage()
        c.save()
        return output_path
--- a/main.py
+++ b/main.py
--- a/main.py.backup
+++ b/main.py.backup
@@ -0,0 +1,148 @@
 #!/usr/bin/env python3
 """
 CBCFacil - Main Service Entry Point
 Unified AI service for document processing (audio, PDF, text)
 """
 import logging
 import sys
 import time
 import fcntl
 import os
 from pathlib import Path
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] - %(message)s"
 )
 logger = logging.getLogger(__name__)
 def acquire_lock() -> int:
    """Acquire single instance lock"""
    lock_file = Path(os.getenv("LOCAL_STATE_DIR", str(Path(__file__).parent))) / ".main_service.lock"
    lock_file.parent.mkdir(parents=True, exist_ok=True)
    lock_fd = open(lock_file, 'w')
    fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
    lock_fd.write(str(os.getpid()))
    lock_fd.flush()
    logger.info(f"Lock acquired. PID: {os.getpid()}")
    return lock_fd
 def release_lock(lock_fd) -> None:
    """Release lock"""
    try:
        fcntl.flock(lock_fd.fileno(), fcntl.LOCK_UN)
        lock_fd.close()
    except Exception as e:
        logger.warning(f"Could not release lock: {e}")
 def initialize_services() -> None:
    """Initialize all services"""
    from config import settings
    from services.webdav_service import webdav_service
    from services.vram_manager import vram_manager
    from services.telegram_service import telegram_service
    from storage.processed_registry import processed_registry
    # Configure Telegram if credentials available
    if settings.TELEGRAM_TOKEN and settings.TELEGRAM_CHAT_ID:
        telegram_service.configure(settings.TELEGRAM_TOKEN, settings.TELEGRAM_CHAT_ID)
        telegram_service.send_start_notification()
    # Initialize WebDAV if configured
    if settings.has_webdav_config:
        webdav_service.initialize()
    # Initialize VRAM manager
    vram_manager.initialize()
    # Initialize processed registry
    processed_registry.initialize()
    logger.info("All services initialized")
 def run_main_loop() -> None:
    """Main processing loop"""
    from config import settings
    from services.webdav_service import webdav_service
    from storage.processed_registry import processed_registry
    from processors.audio_processor import AudioProcessor
    from processors.pdf_processor import PDFProcessor
    from processors.text_processor import TextProcessor
    audio_processor = AudioProcessor()
    pdf_processor = PDFProcessor()
    text_processor = TextProcessor()
    while True:
        try:
            logger.info("--- Polling for new files ---")
            processed_registry.load()
            # Process PDFs
            if settings.has_webdav_config:
                webdav_service.mkdir(settings.REMOTE_PDF_FOLDER)
                pdf_files = webdav_service.list(settings.REMOTE_PDF_FOLDER)
                for file_path in pdf_files:
                    if file_path.lower().endswith('.pdf'):
                        if not processed_registry.is_processed(file_path):
                            pdf_processor.process(file_path)
                            processed_registry.save(file_path)
            # Process Audio files
            if settings.has_webdav_config:
                audio_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER)
                for file_path in audio_files:
                    if any(file_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
                        if not processed_registry.is_processed(file_path):
                            audio_processor.process(file_path)
                            processed_registry.save(file_path)
            # Process Text files
            if settings.has_webdav_config:
                text_files = webdav_service.list(settings.REMOTE_TXT_FOLDER)
                for file_path in text_files:
                    if any(file_path.lower().endswith(ext) for ext in settings.TXT_EXTENSIONS):
                        if not processed_registry.is_processed(file_path):
                            text_processor.process(file_path)
                            processed_registry.save(file_path)
        except Exception as e:
            logger.error(f"Error in main loop: {e}")
        logger.info(f"Cycle completed. Waiting {settings.POLL_INTERVAL} seconds...")
        time.sleep(settings.POLL_INTERVAL)
 def main():
    """Main entry point"""
    lock_fd = acquire_lock()
    try:
        logger.info("=== CBCFacil Service Started ===")
        initialize_services()
        run_main_loop()
    except KeyboardInterrupt:
        logger.info("Shutdown requested")
    finally:
        release_lock(lock_fd)
 if __name__ == "__main__":
    # Handle CLI commands
    if len(sys.argv) > 1:
        command = sys.argv[1]
        if command == "whisper" and len(sys.argv) == 4:
            from processors.audio_processor import AudioProcessor
            AudioProcessor().process(sys.argv[2])
        elif command == "pdf" and len(sys.argv) == 4:
            from processors.pdf_processor import PDFProcessor
            PDFProcessor().process(sys.argv[2])
        else:
            print("Usage: python main.py [whisper|pdf]")
            sys.exit(1)
    else:
        main()
--- a/processors/init.py
+++ b/processors/init.py
@@ -0,0 +1,15 @@
 """
 Processors package for CBCFacil
 """
 from .base_processor import FileProcessor
 from .audio_processor import AudioProcessor
 from .pdf_processor import PDFProcessor
 from .text_processor import TextProcessor
 __all__ = [
    'FileProcessor',
    'AudioProcessor',
    'PDFProcessor',
    'TextProcessor'
 ]
--- a/processors/audio_processor.py
+++ b/processors/audio_processor.py
@@ -0,0 +1,93 @@
 """
 Audio file processor using Whisper
 """
 import logging
 from pathlib import Path
 from typing import Dict, Any
 from ..core import FileProcessingError
 from ..config import settings
 from ..services import vram_manager
 from ..services.gpu_detector import gpu_detector
 from .base_processor import FileProcessor
 try:
    import whisper
    import torch
    WHISPER_AVAILABLE = True
 except ImportError:
    WHISPER_AVAILABLE = False
 class AudioProcessor(FileProcessor):
    """Processor for audio files using Whisper"""
    def __init__(self):
        super().__init__("AudioProcessor")
        self.logger = logging.getLogger(__name__)
        self._model = None
        self._model_name = "medium"  # Optimized for Spanish
    def can_process(self, file_path: str) -> bool:
        """Check if file is an audio file"""
        ext = self.get_file_extension(file_path)
        return ext in settings.AUDIO_EXTENSIONS
    def _load_model(self):
        """Load Whisper model lazily"""
        if not WHISPER_AVAILABLE:
            raise FileProcessingError("Whisper not installed")
        if self._model is None:
            device = gpu_detector.get_device()
            self.logger.info(f"Loading Whisper model: {self._model_name} on {device}")
            self._model = whisper.load_model(self._model_name, device=device)
            vram_manager.update_usage()
    def process(self, file_path: str) -> Dict[str, Any]:
        """Transcribe audio file"""
        self.validate_file(file_path)
        audio_path = Path(file_path)
        output_path = settings.LOCAL_DOWNLOADS_PATH / f"{audio_path.stem}.txt"
        self.logger.info(f"Processing audio file: {audio_path}")
        try:
            # Load model if needed
            self._load_model()
            # Update VRAM usage
            vram_manager.update_usage()
            # Transcribe with torch.no_grad() for memory efficiency
            with torch.inference_mode():
                result = self._model.transcribe(
                    str(audio_path),
                    language="es",
                    fp16=True,
                    verbose=False
                )
            # Save transcription
            output_path.parent.mkdir(parents=True, exist_ok=True)
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(result["text"])
            self.logger.info(f"Transcription completed: {output_path}")
            return {
                "success": True,
                "transcription_path": str(output_path),
                "text": result["text"],
                "model_used": self._model_name
            }
        except Exception as e:
            self.logger.error(f"Audio processing failed: {e}")
            raise FileProcessingError(f"Audio processing failed: {e}")
    def cleanup(self) -> None:
        """Cleanup model"""
        if self._model is not None:
            del self._model
            self._model = None
            vram_manager.cleanup()
--- a/processors/base_processor.py
+++ b/processors/base_processor.py
@@ -0,0 +1,40 @@
 """
 Base File Processor (Strategy Pattern)
 """
 from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Dict, Any, Optional
 from ..core import FileProcessingError
 class FileProcessor(ABC):
    """Abstract base class for file processors"""
    def __init__(self, name: str):
        self.name = name
    @abstractmethod
    def can_process(self, file_path: str) -> bool:
        """Check if processor can handle this file type"""
        pass
    @abstractmethod
    def process(self, file_path: str) -> Dict[str, Any]:
        """Process the file"""
        pass
    def get_file_extension(self, file_path: str) -> str:
        """Get file extension from path"""
        return Path(file_path).suffix.lower()
    def get_base_name(self, file_path: str) -> str:
        """Get base name without extension"""
        return Path(file_path).stem
    def validate_file(self, file_path: str) -> None:
        """Validate file exists and is accessible"""
        path = Path(file_path)
        if not path.exists():
            raise FileProcessingError(f"File not found: {file_path}")
        if not path.is_file():
            raise FileProcessingError(f"Path is not a file: {file_path}")
--- a/processors/pdf_processor.py
+++ b/processors/pdf_processor.py
@@ -0,0 +1,159 @@
 """
 PDF file processor with OCR
 """
 import logging
 from pathlib import Path
 from typing import Dict, Any
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from ..core import FileProcessingError
 from ..config import settings
 from ..services import vram_manager
 from ..services.gpu_detector import gpu_detector
 from .base_processor import FileProcessor
 try:
    import torch
    import pytesseract
    import easyocr
    import cv2
    import numpy as np
    from pdf2image import convert_from_path
    from PIL import Image
    PDF_OCR_AVAILABLE = True
 except ImportError:
    PDF_OCR_AVAILABLE = False
 class PDFProcessor(FileProcessor):
    """Processor for PDF files with OCR"""
    def __init__(self):
        super().__init__("PDFProcessor")
        self.logger = logging.getLogger(__name__)
        self._easyocr_reader = None
    def can_process(self, file_path: str) -> bool:
        """Check if file is a PDF"""
        return self.get_file_extension(file_path) == ".pdf"
    def _load_easyocr(self):
        """Load EasyOCR reader"""
        if self._easyocr_reader is None:
            use_gpu = gpu_detector.is_available()
            self.logger.info(f"Loading EasyOCR reader (GPU: {use_gpu})")
            self._easyocr_reader = easyocr.Reader(['es'], gpu=use_gpu)
            vram_manager.update_usage()
    def _preprocess_image(self, image: Image.Image) -> Image.Image:
        """Preprocess image for better OCR"""
        # Convert to grayscale
        if image.mode != 'L':
            image = image.convert('L')
        # Simple preprocessing
        image = image.resize((image.width * 2, image.height * 2), Image.Resampling.LANCZOS)
        return image
    def _run_ocr_parallel(self, pil_images) -> Dict[str, list]:
        """Run all OCR engines in parallel"""
        results = {
            'easyocr': [''] * len(pil_images),
            'tesseract': [''] * len(pil_images)
        }
        with ThreadPoolExecutor(max_workers=2) as executor:
            futures = {}
            # EasyOCR
            if self._easyocr_reader:
                futures['easyocr'] = executor.submit(
                    self._easyocr_reader.readtext_batched,
                    pil_images,
                    detail=0
                )
            # Tesseract
            futures['tesseract'] = executor.submit(
                lambda imgs: [pytesseract.image_to_string(img, lang='spa') for img in imgs],
                pil_images
            )
            # Collect results
            for name, future in futures.items():
                try:
                    results[name] = future.result()
                except Exception as e:
                    self.logger.error(f"OCR engine {name} failed: {e}")
                    results[name] = [''] * len(pil_images)
        return results
    def process(self, file_path: str) -> Dict[str, Any]:
        """Process PDF with OCR"""
        self.validate_file(file_path)
        pdf_path = Path(file_path)
        output_path = settings.LOCAL_DOWNLOADS_PATH / f"{pdf_path.stem}.txt"
        if not PDF_OCR_AVAILABLE:
            raise FileProcessingError("PDF OCR dependencies not installed")
        self.logger.info(f"Processing PDF file: {pdf_path}")
        try:
            # Load EasyOCR if needed
            self._load_easyocr()
            vram_manager.update_usage()
            # Convert PDF to images
            self.logger.debug("Converting PDF to images")
            pil_images = convert_from_path(
                str(pdf_path),
                dpi=settings.PDF_DPI,
                fmt='png',
                thread_count=settings.PDF_RENDER_THREAD_COUNT
            )
            # Process in batches
            all_text = []
            batch_size = settings.PDF_BATCH_SIZE
            for i in range(0, len(pil_images), batch_size):
                batch = pil_images[i:i + batch_size]
                self.logger.debug(f"Processing batch {i//batch_size + 1}/{(len(pil_images) + batch_size - 1)//batch_size}")
                # Preprocess images
                preprocessed_batch = [self._preprocess_image(img) for img in batch]
                # Run OCR in parallel
                ocr_results = self._run_ocr_parallel(preprocessed_batch)
                # Combine results
                for j, img in enumerate(batch):
                    # Take best result (simple approach: try EasyOCR first, then Tesseract)
                    text = ocr_results['easyocr'][j] if ocr_results['easyocr'][j] else ocr_results['tesseract'][j]
                    if text:
                        all_text.append(text)
            # Save combined text
            output_path.parent.mkdir(parents=True, exist_ok=True)
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write("\n\n".join(all_text))
            self.logger.info(f"PDF processing completed: {output_path}")
            return {
                "success": True,
                "text_path": str(output_path),
                "text": "\n\n".join(all_text),
                "pages_processed": len(pil_images)
            }
        except Exception as e:
            self.logger.error(f"PDF processing failed: {e}")
            raise FileProcessingError(f"PDF processing failed: {e}")
    def cleanup(self) -> None:
        """Cleanup OCR models"""
        self._easyocr_reader = None
        vram_manager.cleanup()
--- a/processors/text_processor.py
+++ b/processors/text_processor.py
@@ -0,0 +1,55 @@
 """
 Text file processor
 """
 import logging
 from pathlib import Path
 from typing import Dict, Any
 from ..core import FileProcessingError
 from ..config import settings
 from .base_processor import FileProcessor
 class TextProcessor(FileProcessor):
    """Processor for text files"""
    def __init__(self):
        super().__init__("TextProcessor")
        self.logger = logging.getLogger(__name__)
    def can_process(self, file_path: str) -> bool:
        """Check if file is a text file"""
        ext = self.get_file_extension(file_path)
        return ext in settings.TXT_EXTENSIONS
    def process(self, file_path: str) -> Dict[str, Any]:
        """Process text file (copy to downloads)"""
        self.validate_file(file_path)
        text_path = Path(file_path)
        output_path = settings.LOCAL_DOWNLOADS_PATH / text_path.name
        self.logger.info(f"Processing text file: {text_path}")
        try:
            # Copy file to downloads directory
            output_path.parent.mkdir(parents=True, exist_ok=True)
            with open(text_path, 'r', encoding='utf-8') as src:
                with open(output_path, 'w', encoding='utf-8') as dst:
                    dst.write(src.read())
            self.logger.info(f"Text file processing completed: {output_path}")
            return {
                "success": True,
                "text_path": str(output_path),
                "text": self._read_file(output_path)
            }
        except Exception as e:
            self.logger.error(f"Text processing failed: {e}")
            raise FileProcessingError(f"Text processing failed: {e}")
    def _read_file(self, file_path: Path) -> str:
        """Read file content"""
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -0,0 +1,23 @@
 # Development dependencies
 pytest>=7.4.0
 pytest-cov>=4.1.0
 pytest-mock>=3.11.0
 pytest-asyncio>=0.21.0
 coverage>=7.3.0
 # Code quality
 black>=23.0.0
 flake8>=6.0.0
 mypy>=1.5.0
 isort>=5.12.0
 # Security
 bandit>=1.7.5
 safety>=2.3.0
 # Performance testing
 pytest-benchmark>=4.0.0
 # Documentation
 mkdocs>=1.5.0
 mkdocs-material>=9.0.0
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,31 @@
-Flask
+# Core web framework
-Flask-CORS
+Flask>=3.0.0
-Pillow
+Flask-CORS>=4.0.0
-easyocr
+
-numpy
+# AI/ML dependencies
-openai-whisper
+torch>=2.0.0
-opencv-python-headless
+torchvision>=0.15.0
-pdf2image
+openai-whisper>=20231117
-pypdf
+transformers>=4.30.0
-python-docx
+easyocr>=1.7.0
-python-dotenv
+
-pytesseract
+# Image processing
-reportlab
+Pillow>=10.0.0
-requests
+opencv-python-headless>=4.8.0
-torch
+
-transformers
+# Document processing
-webdavclient3
+pdf2image>=1.17.0
 pypdf>=3.17.0
 python-docx>=0.8.11
 reportlab>=4.0.0
 pytesseract>=0.3.10
 # Utilities
 numpy>=1.24.0
 requests>=2.31.0
 python-dotenv>=1.0.0
 webdavclient3>=0.9.8
 # Optional: for enhanced functionality
 # unidecode>=1.3.7  # For filename normalization
 # python-magic>=0.4.27  # For file type detection
--- a/services/init.py
+++ b/services/init.py
@@ -0,0 +1,17 @@
 """
 Services package for CBCFacil
 """
 from .webdav_service import WebDAVService, webdav_service
 from .vram_manager import VRAMManager, vram_manager
 from .telegram_service import TelegramService, telegram_service
 from .gpu_detector import GPUDetector, GPUType, gpu_detector
 from .ai import ai_service
 __all__ = [
    'WebDAVService', 'webdav_service',
    'VRAMManager', 'vram_manager',
    'TelegramService', 'telegram_service',
    'GPUDetector', 'GPUType', 'gpu_detector',
    'ai_service'
 ]
--- a/services/ai/init.py
+++ b/services/ai/init.py
@@ -0,0 +1,15 @@
 """
 AI Providers package for CBCFacil
 """
 from .base_provider import AIProvider
 from .claude_provider import ClaudeProvider
 from .gemini_provider import GeminiProvider
 from .provider_factory import AIProviderFactory
 __all__ = [
    'AIProvider',
    'ClaudeProvider',
    'GeminiProvider',
    'AIProviderFactory'
 ]
--- a/services/ai/base_provider.py
+++ b/services/ai/base_provider.py
@@ -0,0 +1,35 @@
 """
 Base AI Provider interface (Strategy pattern)
 """
 from abc import ABC, abstractmethod
 from typing import Optional, Dict, Any
 class AIProvider(ABC):
    """Abstract base class for AI providers"""
    @abstractmethod
    def summarize(self, text: str, **kwargs) -> str:
        """Generate summary of text"""
        pass
    @abstractmethod
    def correct_text(self, text: str, **kwargs) -> str:
        """Correct grammar and spelling in text"""
        pass
    @abstractmethod
    def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
        """Classify content into categories"""
        pass
    @abstractmethod
    def is_available(self) -> bool:
        """Check if provider is available and configured"""
        pass
    @property
    @abstractmethod
    def name(self) -> str:
        """Provider name"""
        pass
--- a/services/ai/claude_provider.py
+++ b/services/ai/claude_provider.py
@@ -0,0 +1,108 @@
 """
 Claude AI Provider implementation
 """
 import logging
 import subprocess
 import shutil
 from typing import Dict, Any, Optional
 from ..config import settings
 from ..core import AIProcessingError
 from .base_provider import AIProvider
 class ClaudeProvider(AIProvider):
    """Claude AI provider using CLI"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._cli_path = settings.CLAUDE_CLI_PATH or shutil.which("claude")
        self._token = settings.ZAI_AUTH_TOKEN
        self._base_url = settings.ZAI_BASE_URL
    @property
    def name(self) -> str:
        return "Claude"
    def is_available(self) -> bool:
        """Check if Claude CLI is available"""
        return bool(self._cli_path and self._token)
    def _get_env(self) -> Dict[str, str]:
        """Get environment variables for Claude"""
        env = {
            'ANTHROPIC_AUTH_TOKEN': self._token,
            'ANTHROPIC_BASE_URL': self._base_url,
            'PYTHONUNBUFFERED': '1'
        }
        return env
    def _run_cli(self, prompt: str, timeout: int = 300) -> str:
        """Run Claude CLI with prompt"""
        if not self.is_available():
            raise AIProcessingError("Claude CLI not available or not configured")
        try:
            cmd = [self._cli_path]
            process = subprocess.run(
                cmd,
                input=prompt,
                env=self._get_env(),
                text=True,
                capture_output=True,
                timeout=timeout,
                shell=False
            )
            if process.returncode != 0:
                error_msg = process.stderr or "Unknown error"
                raise AIProcessingError(f"Claude CLI failed: {error_msg}")
            return process.stdout.strip()
        except subprocess.TimeoutExpired:
            raise AIProcessingError(f"Claude CLI timed out after {timeout}s")
        except Exception as e:
            raise AIProcessingError(f"Claude CLI error: {e}")
    def summarize(self, text: str, **kwargs) -> str:
        """Generate summary using Claude"""
        prompt = f"""Summarize the following text:
 {text}
 Provide a clear, concise summary in Spanish."""
        return self._run_cli(prompt)
    def correct_text(self, text: str, **kwargs) -> str:
        """Correct text using Claude"""
        prompt = f"""Correct the following text for grammar, spelling, and clarity:
 {text}
 Return only the corrected text, nothing else."""
        return self._run_cli(prompt)
    def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
        """Classify content using Claude"""
        categories = ["historia", "analisis_contable", "instituciones_gobierno", "otras_clases"]
        prompt = f"""Classify the following text into one of these categories:
 - historia
 - analisis_contable
 - instituciones_gobierno
 - otras_clases
 Text: {text}
 Return only the category name, nothing else."""
        result = self._run_cli(prompt).lower()
        # Validate result
        if result not in categories:
            result = "otras_clases"
        return {
            "category": result,
            "confidence": 0.9,
            "provider": self.name
        }
--- a/services/ai/gemini_provider.py
+++ b/services/ai/gemini_provider.py
@@ -0,0 +1,297 @@
 """
 Gemini AI Provider - Optimized version with rate limiting and retry
 """
 import logging
 import subprocess
 import shutil
 import requests
 import time
 from typing import Dict, Any, Optional
 from datetime import datetime, timedelta
 from ..config import settings
 from ..core import AIProcessingError
 from .base_provider import AIProvider
 class TokenBucket:
    """Token bucket rate limiter"""
    def __init__(self, rate: float = 10, capacity: int = 20):
        self.rate = rate  # tokens per second
        self.capacity = capacity
        self.tokens = capacity
        self.last_update = time.time()
        self._lock = None  # Lazy initialization
    def _get_lock(self):
        if self._lock is None:
            import threading
            self._lock = threading.Lock()
        return self._lock
    def acquire(self, tokens: int = 1) -> float:
        with self._get_lock():
            now = time.time()
            elapsed = now - self.last_update
            self.last_update = now
            self.tokens = min(self.capacity, self.tokens + elapsed * self.rate)
            if self.tokens >= tokens:
                self.tokens -= tokens
                return 0.0
            wait_time = (tokens - self.tokens) / self.rate
            self.tokens = 0
            return wait_time
 class CircuitBreaker:
    """Circuit breaker for API calls"""
    def __init__(self, failure_threshold: int = 5, recovery_timeout: int = 60):
        self.failure_threshold = failure_threshold
        self.recovery_timeout = recovery_timeout
        self.failures = 0
        self.last_failure: Optional[datetime] = None
        self.state = "closed"  # closed, open, half-open
        self._lock = None
    def _get_lock(self):
        if self._lock is None:
            import threading
            self._lock = threading.Lock()
        return self._lock
    def call(self, func, *args, **kwargs):
        with self._get_lock():
            if self.state == "open":
                if self.last_failure and (datetime.utcnow() - self.last_failure).total_seconds() > self.recovery_timeout:
                    self.state = "half-open"
                else:
                    raise AIProcessingError("Circuit breaker is open")
            try:
                result = func(*args, **kwargs)
                if self.state == "half-open":
                    self.state = "closed"
                    self.failures = 0
                return result
            except Exception as e:
                self.failures += 1
                self.last_failure = datetime.utcnow()
                if self.failures >= self.failure_threshold:
                    self.state = "open"
                raise
 class GeminiProvider(AIProvider):
    """Gemini AI provider with rate limiting and retry"""
    def __init__(self):
        super().__init__()
        self._cli_path = settings.GEMINI_CLI_PATH or shutil.which("gemini")
        self._api_key = settings.GEMINI_API_KEY
        self._flash_model = settings.GEMINI_FLASH_MODEL
        self._pro_model = settings.GEMINI_PRO_MODEL
        self._session = None
        self._rate_limiter = TokenBucket(rate=15, capacity=30)
        self._circuit_breaker = CircuitBreaker(failure_threshold=5, recovery_timeout=60)
        self._retry_config = {
            "max_attempts": 3,
            "base_delay": 1.0,
            "max_delay": 30.0,
            "exponential_base": 2
        }
    def _init_session(self) -> None:
        """Initialize HTTP session with connection pooling"""
        if self._session is None:
            self._session = requests.Session()
            adapter = requests.adapters.HTTPAdapter(
                pool_connections=10,
                pool_maxsize=20,
                max_retries=0  # We handle retries manually
            )
            self._session.mount('https://', adapter)
    def _run_with_retry(self, func, *args, **kwargs):
        """Execute function with exponential backoff retry"""
        max_attempts = self._retry_config["max_attempts"]
        base_delay = self._retry_config["base_delay"]
        last_exception = None
        for attempt in range(max_attempts):
            try:
                return self._circuit_breaker.call(func, *args, **kwargs)
            except requests.exceptions.RequestException as e:
                last_exception = e
                if attempt < max_attempts - 1:
                    delay = min(
                        base_delay * (2 ** attempt),
                        self._retry_config["max_delay"]
                    )
                    # Add jitter
                    delay += delay * 0.1 * (time.time() % 1)
                    self.logger.warning(f"Attempt {attempt + 1} failed: {e}, retrying in {delay:.2f}s")
                    time.sleep(delay)
        raise AIProcessingError(f"Max retries exceeded: {last_exception}")
    def _run_cli(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
        """Run Gemini CLI with prompt"""
        if not self._cli_path:
            raise AIProcessingError("Gemini CLI not available")
        model = self._flash_model if use_flash else self._pro_model
        cmd = [self._cli_path, model, prompt]
        try:
            # Apply rate limiting
            wait_time = self._rate_limiter.acquire()
            if wait_time > 0:
                time.sleep(wait_time)
            process = subprocess.run(
                cmd,
                text=True,
                capture_output=True,
                timeout=timeout,
                shell=False
            )
            if process.returncode != 0:
                error_msg = process.stderr or "Unknown error"
                raise AIProcessingError(f"Gemini CLI failed: {error_msg}")
            return process.stdout.strip()
        except subprocess.TimeoutExpired:
            raise AIProcessingError(f"Gemini CLI timed out after {timeout}s")
        except Exception as e:
            raise AIProcessingError(f"Gemini CLI error: {e}")
    def _call_api(self, prompt: str, use_flash: bool = True, timeout: int = 180) -> str:
        """Call Gemini API with rate limiting and retry"""
        if not self._api_key:
            raise AIProcessingError("Gemini API key not configured")
        self._init_session()
        model = self._flash_model if use_flash else self._pro_model
        url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
        payload = {
            "contents": [{
                "parts": [{"text": prompt}]
            }]
        }
        params = {"key": self._api_key}
        def api_call():
            # Apply rate limiting
            wait_time = self._rate_limiter.acquire()
            if wait_time > 0:
                time.sleep(wait_time)
            response = self._session.post(
                url,
                json=payload,
                params=params,
                timeout=timeout
            )
            response.raise_for_status()
            return response
        response = self._run_with_retry(api_call)
        data = response.json()
        if "candidates" not in data or not data["candidates"]:
            raise AIProcessingError("Empty response from Gemini API")
        candidate = data["candidates"][0]
        if "content" not in candidate or "parts" not in candidate["content"]:
            raise AIProcessingError("Invalid response format from Gemini API")
        result = candidate["content"]["parts"][0]["text"]
        return result.strip()
    def _run(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
        """Run Gemini with fallback between CLI and API"""
        # Try CLI first if available
        if self._cli_path:
            try:
                return self._run_cli(prompt, use_flash, timeout)
            except Exception as e:
                self.logger.warning(f"Gemini CLI failed, trying API: {e}")
        # Fallback to API
        if self._api_key:
            api_timeout = min(timeout, 180)
            return self._call_api(prompt, use_flash, api_timeout)
        raise AIProcessingError("No Gemini provider available (CLI or API)")
    def summarize(self, text: str, **kwargs) -> str:
        """Generate summary using Gemini"""
        prompt = f"""Summarize the following text:
 {text}
 Provide a clear, concise summary in Spanish."""
        return self._run(prompt, use_flash=True)
    def correct_text(self, text: str, **kwargs) -> str:
        """Correct text using Gemini"""
        prompt = f"""Correct the following text for grammar, spelling, and clarity:
 {text}
 Return only the corrected text, nothing else."""
        return self._run(prompt, use_flash=True)
    def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
        """Classify content using Gemini"""
        categories = ["historia", "analisis_contable", "instituciones_gobierno", "otras_clases"]
        prompt = f"""Classify the following text into one of these categories:
 - historia
 - analisis_contable
 - instituciones_gobierno
 - otras_clases
 Text: {text}
 Return only the category name, nothing else."""
        result = self._run(prompt, use_flash=True).lower()
        # Validate result
        if result not in categories:
            result = "otras_clases"
        return {
            "category": result,
            "confidence": 0.9,
            "provider": self.name
        }
    def get_stats(self) -> Dict[str, Any]:
        """Get provider statistics"""
        return {
            "rate_limiter": {
                "tokens": round(self._rate_limiter.tokens, 2),
                "capacity": self._rate_limiter.capacity,
                "rate": self._rate_limiter.rate
            },
            "circuit_breaker": {
                "state": self._circuit_breaker.state,
                "failures": self._circuit_breaker.failures,
                "failure_threshold": self._circuit_breaker.failure_threshold
            },
            "cli_available": bool(self._cli_path),
            "api_available": bool(self._api_key)
        }
 # Global instance is created in __init__.py
--- a/services/ai/provider_factory.py
+++ b/services/ai/provider_factory.py
@@ -0,0 +1,54 @@
 """
 AI Provider Factory (Factory Pattern)
 """
 import logging
 from typing import Dict, Type
 from ..core import AIProcessingError
 from .base_provider import AIProvider
 from .claude_provider import ClaudeProvider
 from .gemini_provider import GeminiProvider
 class AIProviderFactory:
    """Factory for creating AI providers with fallback"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._providers: Dict[str, AIProvider] = {
            'claude': ClaudeProvider(),
            'gemini': GeminiProvider()
        }
    def get_provider(self, preferred: str = 'gemini') -> AIProvider:
        """Get available provider with fallback"""
        # Try preferred provider first
        if preferred in self._providers:
            provider = self._providers[preferred]
            if provider.is_available():
                self.logger.info(f"Using {preferred} provider")
                return provider
        # Fallback to any available provider
        for name, provider in self._providers.items():
            if provider.is_available():
                self.logger.info(f"Falling back to {name} provider")
                return provider
        raise AIProcessingError("No AI providers available")
    def get_all_available(self) -> Dict[str, AIProvider]:
        """Get all available providers"""
        return {
            name: provider
            for name, provider in self._providers.items()
            if provider.is_available()
        }
    def get_best_provider(self) -> AIProvider:
        """Get the best available provider (Gemini > Claude)"""
        return self.get_provider('gemini')
 # Global instance
 ai_provider_factory = AIProviderFactory()
--- a/services/ai_service.py
+++ b/services/ai_service.py
@@ -0,0 +1,256 @@
 """
 AI Service - Unified interface for AI providers with caching
 """
 import logging
 import hashlib
 import time
 from typing import Optional, Dict, Any
 from threading import Lock
 from ..config import settings
 from ..core import AIProcessingError
 from .ai.provider_factory import AIProviderFactory, ai_provider_factory
 class LRUCache:
    """Thread-safe LRU Cache implementation"""
    def __init__(self, max_size: int = 100, ttl: int = 3600):
        self.max_size = max_size
        self.ttl = ttl
        self._cache: Dict[str, tuple[str, float]] = {}
        self._order: list[str] = []
        self._lock = Lock()
    def _is_expired(self, timestamp: float) -> bool:
        return (time.time() - timestamp) > self.ttl
    def get(self, key: str) -> Optional[str]:
        with self._lock:
            if key not in self._cache:
                return None
            value, timestamp = self._cache[key]
            if self._is_expired(timestamp):
                del self._cache[key]
                self._order.remove(key)
                return None
            # Move to end (most recently used)
            self._order.remove(key)
            self._order.append(key)
            return value
    def set(self, key: str, value: str) -> None:
        with self._lock:
            if key in self._cache:
                self._order.remove(key)
            elif len(self._order) >= self.max_size:
                # Remove least recently used
                oldest = self._order.pop(0)
                del self._cache[oldest]
            self._cache[key] = (value, time.time())
            self._order.append(key)
    def stats(self) -> Dict[str, int]:
        with self._lock:
            return {
                "size": len(self._cache),
                "max_size": self.max_size,
                "hits": sum(1 for _, t in self._cache.values() if not self._is_expired(t))
            }
 class RateLimiter:
    """Token bucket rate limiter"""
    def __init__(self, rate: float = 10, capacity: int = 20):
        self.rate = rate  # tokens per second
        self.capacity = capacity
        self.tokens = capacity
        self.last_update = time.time()
        self._lock = Lock()
    def acquire(self, tokens: int = 1) -> float:
        with self._lock:
            now = time.time()
            elapsed = now - self.last_update
            self.last_update = now
            self.tokens = min(self.capacity, self.tokens + elapsed * self.rate)
            if self.tokens >= tokens:
                self.tokens -= tokens
                return 0.0
            wait_time = (tokens - self.tokens) / self.rate
            self.tokens = 0
            return wait_time
 class AIService:
    """Unified service for AI operations with caching and rate limiting"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._factory: Optional[AIProviderFactory] = None
        self._prompt_cache = LRUCache(max_size=100, ttl=3600)  # 1 hour TTL
        self._rate_limiter = RateLimiter(rate=15, capacity=30)
        self._stats = {
            "total_requests": 0,
            "cache_hits": 0,
            "api_calls": 0
        }
    @property
    def factory(self) -> AIProviderFactory:
        """Lazy initialization of provider factory"""
        if self._factory is None:
            self._factory = ai_provider_factory
        return self._factory
    def _get_cache_key(self, prompt: str, operation: str) -> str:
        """Generate cache key from prompt and operation"""
        content = f"{operation}:{prompt[:500]}"  # Limit prompt length
        return hashlib.sha256(content.encode()).hexdigest()
    def generate_text(
        self,
        prompt: str,
        provider: Optional[str] = None,
        max_tokens: int = 4096
    ) -> str:
        """Generate text using AI provider with caching"""
        self._stats["total_requests"] += 1
        cache_key = self._get_cache_key(prompt, f"generate:{provider or 'default'}")
        # Check cache
        cached_result = self._prompt_cache.get(cache_key)
        if cached_result:
            self._stats["cache_hits"] += 1
            self.logger.debug(f"Cache hit for generate_text ({len(cached_result)} chars)")
            return cached_result
        # Apply rate limiting
        wait_time = self._rate_limiter.acquire()
        if wait_time > 0:
            time.sleep(wait_time)
        try:
            self._stats["api_calls"] += 1
            ai_provider = self.factory.get_provider(provider or 'gemini')
            result = ai_provider.generate(prompt, max_tokens=max_tokens)
            # Cache result
            self._prompt_cache.set(cache_key, result)
            return result
        except AIProcessingError as e:
            self.logger.error(f"AI generation failed: {e}")
            return f"Error: {str(e)}"
    def summarize(self, text: str, **kwargs) -> str:
        """Generate summary of text with caching"""
        self._stats["total_requests"] += 1
        cache_key = self._get_cache_key(text, "summarize")
        cached_result = self._prompt_cache.get(cache_key)
        if cached_result:
            self._stats["cache_hits"] += 1
            self.logger.debug(f"Cache hit for summarize ({len(cached_result)} chars)")
            return cached_result
        wait_time = self._rate_limiter.acquire()
        if wait_time > 0:
            time.sleep(wait_time)
        try:
            self._stats["api_calls"] += 1
            provider = self.factory.get_best_provider()
            result = provider.summarize(text, **kwargs)
            self._prompt_cache.set(cache_key, result)
            return result
        except AIProcessingError as e:
            self.logger.error(f"Summarization failed: {e}")
            return f"Error: {str(e)}"
    def correct_text(self, text: str, **kwargs) -> str:
        """Correct grammar and spelling with caching"""
        self._stats["total_requests"] += 1
        cache_key = self._get_cache_key(text, "correct")
        cached_result = self._prompt_cache.get(cache_key)
        if cached_result:
            self._stats["cache_hits"] += 1
            return cached_result
        wait_time = self._rate_limiter.acquire()
        if wait_time > 0:
            time.sleep(wait_time)
        try:
            self._stats["api_calls"] += 1
            provider = self.factory.get_best_provider()
            result = provider.correct_text(text, **kwargs)
            self._prompt_cache.set(cache_key, result)
            return result
        except AIProcessingError as e:
            self.logger.error(f"Text correction failed: {e}")
            return text
    def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
        """Classify content into categories with caching"""
        self._stats["total_requests"] += 1
        # For classification, use a shorter text for cache key
        short_text = text[:200]
        cache_key = self._get_cache_key(short_text, "classify")
        cached_result = self._prompt_cache.get(cache_key)
        if cached_result:
            self._stats["cache_hits"] += 1
            import json
            return json.loads(cached_result)
        wait_time = self._rate_limiter.acquire()
        if wait_time > 0:
            time.sleep(wait_time)
        try:
            self._stats["api_calls"] += 1
            provider = self.factory.get_best_provider()
            result = provider.classify_content(text, **kwargs)
            import json
            self._prompt_cache.set(cache_key, json.dumps(result))
            return result
        except AIProcessingError as e:
            self.logger.error(f"Classification failed: {e}")
            return {"category": "otras_clases", "confidence": 0.0}
    def get_stats(self) -> Dict[str, Any]:
        """Get service statistics"""
        cache_stats = self._prompt_cache.stats()
        hit_rate = (self._stats["cache_hits"] / self._stats["total_requests"] * 100) if self._stats["total_requests"] > 0 else 0
        return {
            **self._stats,
            "cache_size": cache_stats["size"],
            "cache_max_size": cache_stats["max_size"],
            "cache_hit_rate": round(hit_rate, 2),
            "rate_limiter": {
                "tokens": self._rate_limiter.tokens,
                "capacity": self._rate_limiter.capacity
            }
        }
    def clear_cache(self) -> None:
        """Clear the prompt cache"""
        self._prompt_cache = LRUCache(max_size=100, ttl=3600)
        self.logger.info("AI service cache cleared")
 # Global instance
 ai_service = AIService()
--- a/services/gpu_detector.py
+++ b/services/gpu_detector.py
@@ -0,0 +1,247 @@
 """
 GPU Detection and Management Service
 Provides unified interface for detecting and using NVIDIA (CUDA), AMD (ROCm), or CPU.
 Fallback order: NVIDIA -> AMD -> CPU
 """
 import logging
 import os
 import subprocess
 import shutil
 from enum import Enum
 from typing import Dict, Any, Optional
 logger = logging.getLogger(__name__)
 # Try to import torch
 try:
    import torch
    TORCH_AVAILABLE = True
 except ImportError:
    TORCH_AVAILABLE = False
 class GPUType(Enum):
    """Supported GPU types"""
    NVIDIA = "nvidia"
    AMD = "amd"
    CPU = "cpu"
 class GPUDetector:
    """
    Service for detecting and managing GPU resources.
    Detects GPU type with fallback order: NVIDIA -> AMD -> CPU
    Provides unified interface regardless of GPU vendor.
    """
    def __init__(self):
        self._gpu_type: Optional[GPUType] = None
        self._device: Optional[str] = None
        self._initialized: bool = False
    def initialize(self) -> None:
        """Initialize GPU detection"""
        if self._initialized:
            return
        self._gpu_type = self._detect_gpu_type()
        self._device = self._get_device_string()
        self._setup_environment()
        self._initialized = True
        logger.info(f"GPU Detector initialized: {self._gpu_type.value} -> {self._device}")
    def _detect_gpu_type(self) -> GPUType:
        """
        Detect available GPU type.
        Order: NVIDIA -> AMD -> CPU
        """
        # Check user preference first
        preference = os.getenv("GPU_PREFERENCE", "auto").lower()
        if preference == "cpu":
            logger.info("GPU preference set to CPU, skipping GPU detection")
            return GPUType.CPU
        if not TORCH_AVAILABLE:
            logger.warning("PyTorch not available, using CPU")
            return GPUType.CPU
        # Check NVIDIA first
        if preference in ("auto", "nvidia"):
            if self._check_nvidia():
                logger.info("NVIDIA GPU detected via nvidia-smi")
                return GPUType.NVIDIA
        # Check AMD second
        if preference in ("auto", "amd"):
            if self._check_amd():
                logger.info("AMD GPU detected via ROCm")
                return GPUType.AMD
        # Fallback to checking torch.cuda (works for both NVIDIA and ROCm)
        if torch.cuda.is_available():
            device_name = torch.cuda.get_device_name(0).lower()
            if "nvidia" in device_name or "geforce" in device_name or "rtx" in device_name or "gtx" in device_name:
                return GPUType.NVIDIA
            elif "amd" in device_name or "radeon" in device_name or "rx" in device_name:
                return GPUType.AMD
            else:
                # Unknown GPU vendor but CUDA works
                logger.warning(f"Unknown GPU vendor: {device_name}, treating as NVIDIA-compatible")
                return GPUType.NVIDIA
        logger.info("No GPU detected, using CPU")
        return GPUType.CPU
    def _check_nvidia(self) -> bool:
        """Check if NVIDIA GPU is available using nvidia-smi"""
        nvidia_smi = shutil.which("nvidia-smi")
        if not nvidia_smi:
            return False
        try:
            result = subprocess.run(
                [nvidia_smi, "--query-gpu=name", "--format=csv,noheader"],
                capture_output=True,
                text=True,
                timeout=5
            )
            return result.returncode == 0 and result.stdout.strip()
        except Exception as e:
            logger.debug(f"nvidia-smi check failed: {e}")
            return False
    def _check_amd(self) -> bool:
        """Check if AMD GPU is available using rocm-smi"""
        rocm_smi = shutil.which("rocm-smi")
        if not rocm_smi:
            return False
        try:
            result = subprocess.run(
                [rocm_smi, "--showproductname"],
                capture_output=True,
                text=True,
                timeout=5
            )
            return result.returncode == 0 and "GPU" in result.stdout
        except Exception as e:
            logger.debug(f"rocm-smi check failed: {e}")
            return False
    def _setup_environment(self) -> None:
        """Set up environment variables for detected GPU"""
        if self._gpu_type == GPUType.AMD:
            # Set HSA override for AMD RX 6000 series (gfx1030)
            hsa_version = os.getenv("HSA_OVERRIDE_GFX_VERSION", "10.3.0")
            os.environ.setdefault("HSA_OVERRIDE_GFX_VERSION", hsa_version)
            logger.info(f"Set HSA_OVERRIDE_GFX_VERSION={hsa_version}")
    def _get_device_string(self) -> str:
        """Get PyTorch device string"""
        if self._gpu_type in (GPUType.NVIDIA, GPUType.AMD):
            return "cuda"
        return "cpu"
    @property
    def gpu_type(self) -> GPUType:
        """Get detected GPU type"""
        if not self._initialized:
            self.initialize()
        return self._gpu_type
    @property
    def device(self) -> str:
        """Get device string for PyTorch"""
        if not self._initialized:
            self.initialize()
        return self._device
    def get_device(self) -> "torch.device":
        """Get PyTorch device object"""
        if not TORCH_AVAILABLE:
            raise RuntimeError("PyTorch not available")
        if not self._initialized:
            self.initialize()
        return torch.device(self._device)
    def is_available(self) -> bool:
        """Check if GPU is available"""
        if not self._initialized:
            self.initialize()
        return self._gpu_type in (GPUType.NVIDIA, GPUType.AMD)
    def is_nvidia(self) -> bool:
        """Check if NVIDIA GPU is being used"""
        if not self._initialized:
            self.initialize()
        return self._gpu_type == GPUType.NVIDIA
    def is_amd(self) -> bool:
        """Check if AMD GPU is being used"""
        if not self._initialized:
            self.initialize()
        return self._gpu_type == GPUType.AMD
    def is_cpu(self) -> bool:
        """Check if CPU is being used"""
        if not self._initialized:
            self.initialize()
        return self._gpu_type == GPUType.CPU
    def get_device_name(self) -> str:
        """Get GPU device name"""
        if not self._initialized:
            self.initialize()
        if self._gpu_type == GPUType.CPU:
            return "CPU"
        if TORCH_AVAILABLE and torch.cuda.is_available():
            return torch.cuda.get_device_name(0)
        return "Unknown"
    def get_memory_info(self) -> Dict[str, Any]:
        """Get GPU memory information"""
        if not self._initialized:
            self.initialize()
        if self._gpu_type == GPUType.CPU:
            return {"type": "cpu", "error": "No GPU available"}
        if not TORCH_AVAILABLE or not torch.cuda.is_available():
            return {"type": self._gpu_type.value, "error": "CUDA not available"}
        try:
            props = torch.cuda.get_device_properties(0)
            total = props.total_memory / 1024**3
            allocated = torch.cuda.memory_allocated(0) / 1024**3
            reserved = torch.cuda.memory_reserved(0) / 1024**3
            return {
                "type": self._gpu_type.value,
                "device_name": props.name,
                "total_gb": round(total, 2),
                "allocated_gb": round(allocated, 2),
                "reserved_gb": round(reserved, 2),
                "free_gb": round(total - allocated, 2),
                "usage_percent": round((allocated / total) * 100, 1)
            }
        except Exception as e:
            return {"type": self._gpu_type.value, "error": str(e)}
    def empty_cache(self) -> None:
        """Clear GPU memory cache"""
        if not self._initialized:
            self.initialize()
        if TORCH_AVAILABLE and torch.cuda.is_available():
            torch.cuda.empty_cache()
            logger.debug("GPU cache cleared")
 # Global singleton instance
 gpu_detector = GPUDetector()
--- a/services/metrics_collector.py
+++ b/services/metrics_collector.py
@@ -0,0 +1,137 @@
 """
 Performance metrics collector for CBCFacil
 """
 import time
 import threading
 import psutil
 import logging
 from typing import Dict, Any, Optional
 from datetime import datetime, timedelta
 from contextlib import contextmanager
 class MetricsCollector:
    """Collect and aggregate performance metrics"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._start_time = time.time()
        self._request_count = 0
        self._error_count = 0
        self._total_latency = 0.0
        self._latencies = []
        self._lock = threading.Lock()
        self._process = psutil.Process()
    def record_request(self, latency: float, success: bool = True) -> None:
        """Record a request with latency"""
        with self._lock:
            self._request_count += 1
            self._total_latency += latency
            self._latencies.append(latency)
            # Keep only last 1000 latencies for memory efficiency
            if len(self._latencies) > 1000:
                self._latencies = self._latencies[-1000:]
            if not success:
                self._error_count += 1
    def get_latency_percentiles(self) -> Dict[str, float]:
        """Calculate latency percentiles"""
        with self._lock:
            if not self._latencies:
                return {"p50": 0, "p95": 0, "p99": 0}
            sorted_latencies = sorted(self._latencies)
            n = len(sorted_latencies)
            return {
                "p50": sorted_latencies[int(n * 0.50)],
                "p95": sorted_latencies[int(n * 0.95)],
                "p99": sorted_latencies[int(n * 0.99)]
            }
    def get_system_metrics(self) -> Dict[str, Any]:
        """Get system resource metrics"""
        try:
            memory = self._process.memory_info()
            cpu_percent = self._process.cpu_percent(interval=0.1)
            return {
                "cpu_percent": cpu_percent,
                "memory_rss_mb": memory.rss / 1024 / 1024,
                "memory_vms_mb": memory.vms / 1024 / 1024,
                "thread_count": self._process.num_threads(),
                "open_files": self._process.open_files(),
            }
        except Exception as e:
            self.logger.warning(f"Error getting system metrics: {e}")
            return {}
    def get_summary(self) -> Dict[str, Any]:
        """Get metrics summary"""
        with self._lock:
            uptime = time.time() - self._start_time
            latency_pcts = self.get_latency_percentiles()
            return {
                "uptime_seconds": round(uptime, 2),
                "total_requests": self._request_count,
                "error_count": self._error_count,
                "error_rate": round(self._error_count / max(1, self._request_count) * 100, 2),
                "requests_per_second": round(self._request_count / max(1, uptime), 2),
                "average_latency_ms": round(self._total_latency / max(1, self._request_count) * 1000, 2),
                "latency_p50_ms": round(latency_pcts["p50"] * 1000, 2),
                "latency_p95_ms": round(latency_pcts["p95"] * 1000, 2),
                "latency_p99_ms": round(latency_pcts["p99"] * 1000, 2),
            }
    def reset(self) -> None:
        """Reset metrics"""
        with self._lock:
            self._request_count = 0
            self._error_count = 0
            self._total_latency = 0.0
            self._latencies = []
            self._start_time = time.time()
 class LatencyTracker:
    """Context manager for tracking operation latency"""
    def __init__(self, collector: MetricsCollector, operation: str):
        self.collector = collector
        self.operation = operation
        self.start_time: Optional[float] = None
        self.success = True
    def __enter__(self):
        self.start_time = time.time()
        return self
    def __exit__(self, exc_type, exc_val, exc_tb):
        latency = time.time() - self.start_time
        success = exc_type is None
        self.collector.record_request(latency, success)
        return False  # Don't suppress exceptions
 # Global metrics collector
 metrics_collector = MetricsCollector()
@contextmanager
 def track_latency(operation: str = "unknown"):
    """Convenience function for latency tracking"""
    with LatencyTracker(metrics_collector, operation):
        yield
 def get_performance_report() -> Dict[str, Any]:
    """Generate comprehensive performance report"""
    return {
        "metrics": metrics_collector.get_summary(),
        "system": metrics_collector.get_system_metrics(),
        "timestamp": datetime.utcnow().isoformat()
    }
--- a/services/telegram_service.py
+++ b/services/telegram_service.py
@@ -0,0 +1,91 @@
 """
 Telegram notification service
 """
 import logging
 import time
 from typing import Optional
 from datetime import datetime
 from ..config import settings
 try:
    import requests
    REQUESTS_AVAILABLE = True
 except ImportError:
    REQUESTS_AVAILABLE = False
 class TelegramService:
    """Service for sending Telegram notifications"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._token: Optional[str] = None
        self._chat_id: Optional[str] = None
        self._last_error_cache: dict = {}
    def configure(self, token: str, chat_id: str) -> None:
        """Configure Telegram credentials"""
        self._token = token
        self._chat_id = chat_id
        self.logger.info("Telegram service configured")
    @property
    def is_configured(self) -> bool:
        """Check if Telegram is configured"""
        return bool(self._token and self._chat_id)
    def _send_request(self, endpoint: str, data: dict, retries: int = 3, delay: int = 2) -> bool:
        """Make API request to Telegram"""
        if not REQUESTS_AVAILABLE:
            self.logger.warning("requests library not available")
            return False
        url = f"https://api.telegram.org/bot{self._token}/{endpoint}"
        for attempt in range(retries):
            try:
                resp = requests.post(url, data=data, timeout=10)
                if resp.status_code == 200:
                    return True
                else:
                    self.logger.error(f"Telegram API error: {resp.status_code}")
            except Exception as e:
                self.logger.error(f"Telegram request failed (attempt {attempt+1}/{retries}): {e}")
            time.sleep(delay)
        return False
    def send_message(self, message: str) -> bool:
        """Send a text message to Telegram"""
        if not self.is_configured:
            self.logger.warning("Telegram not configured, skipping notification")
            return False
        data = {"chat_id": self._chat_id, "text": message}
        return self._send_request("sendMessage", data)
    def send_start_notification(self) -> bool:
        """Send service start notification"""
        message = "CBCFacil Service Started - AI document processing active"
        return self.send_message(message)
    def send_error_notification(self, error_key: str, error_message: str) -> bool:
        """Send error notification with throttling"""
        now = datetime.utcnow()
        prev = self._last_error_cache.get(error_key)
        if prev is None:
            self._last_error_cache[error_key] = (error_message, now)
        else:
            prev_msg, prev_time = prev
            if error_message != prev_msg or (now - prev_time).total_seconds() > settings.ERROR_THROTTLE_SECONDS:
                self._last_error_cache[error_key] = (error_message, now)
            else:
                return False
        return self.send_message(f"Error: {error_message}")
 # Global instance
 telegram_service = TelegramService()
 def send_telegram_message(message: str, retries: int = 3, delay: int = 2) -> bool:
    """Legacy function for backward compatibility"""
    return telegram_service.send_message(message)
--- a/services/vram_manager.py
+++ b/services/vram_manager.py
@@ -0,0 +1,172 @@
 """
 VRAM/GPU memory management service
 """
 import gc
 import logging
 import os
 import time
 from datetime import datetime, timedelta
 from typing import Optional, Dict, Any
 from ..core import BaseService
 from ..config import settings
 try:
    import torch
    TORCH_AVAILABLE = True
 except ImportError:
    TORCH_AVAILABLE = False
 # Import gpu_detector after torch check
 from .gpu_detector import gpu_detector, GPUType
 class VRAMManager(BaseService):
    """Service for managing GPU VRAM usage"""
    def __init__(self):
        super().__init__("VRAMManager")
        self._whisper_model = None
        self._ocr_models = None
        self._trocr_models = None
        self._models_last_used: Optional[datetime] = None
        self._cleanup_threshold = 0.7
        self._cleanup_interval = 300
        self._last_cleanup: Optional[datetime] = None
    def initialize(self) -> None:
        """Initialize VRAM manager"""
        # Initialize GPU detector first
        gpu_detector.initialize()
        if not TORCH_AVAILABLE:
            self.logger.warning("PyTorch not available - VRAM management disabled")
            return
        if gpu_detector.is_available():
            gpu_type = gpu_detector.gpu_type
            device_name = gpu_detector.get_device_name()
            if gpu_type == GPUType.AMD:
                self.logger.info(f"VRAM Manager initialized with AMD ROCm: {device_name}")
            elif gpu_type == GPUType.NVIDIA:
                os.environ['CUDA_VISIBLE_DEVICES'] = settings.CUDA_VISIBLE_DEVICES
                if settings.PYTORCH_CUDA_ALLOC_CONF:
                    torch.backends.cuda.max_split_size_mb = int(settings.PYTORCH_CUDA_ALLOC_CONF.split(':')[1])
                self.logger.info(f"VRAM Manager initialized with NVIDIA CUDA: {device_name}")
        else:
            self.logger.warning("No GPU available - GPU acceleration disabled")
    def cleanup(self) -> None:
        """Cleanup all GPU models"""
        if not TORCH_AVAILABLE or not torch.cuda.is_available():
            return
        models_freed = []
        if self._whisper_model is not None:
            try:
                del self._whisper_model
                self._whisper_model = None
                models_freed.append("Whisper")
            except Exception as e:
                self.logger.error(f"Error freeing Whisper VRAM: {e}")
        if self._ocr_models is not None:
            try:
                self._ocr_models = None
                models_freed.append("OCR")
            except Exception as e:
                self.logger.error(f"Error freeing OCR VRAM: {e}")
        if self._trocr_models is not None:
            try:
                if isinstance(self._trocr_models, dict):
                    model = self._trocr_models.get('model')
                    if model is not None:
                        model.to('cpu')
                        models_freed.append("TrOCR")
                torch.cuda.empty_cache()
            except Exception as e:
                self.logger.error(f"Error freeing TrOCR VRAM: {e}")
        self._whisper_model = None
        self._ocr_models = None
        self._trocr_models = None
        self._models_last_used = None
        if models_freed:
            self.logger.info(f"Freed VRAM for models: {', '.join(models_freed)}")
        self._force_aggressive_cleanup()
    def update_usage(self) -> None:
        """Update usage timestamp"""
        self._models_last_used = datetime.utcnow()
        self.logger.debug(f"VRAM usage timestamp updated")
    def should_cleanup(self) -> bool:
        """Check if cleanup should be performed"""
        if not TORCH_AVAILABLE or not torch.cuda.is_available():
            return False
        if self._last_cleanup is None:
            return True
        if (datetime.utcnow() - self._last_cleanup).total_seconds() < self._cleanup_interval:
            return False
        allocated = torch.cuda.memory_allocated(0)
        total = torch.cuda.get_device_properties(0).total_memory
        return allocated / total > self._cleanup_threshold
    def lazy_cleanup(self) -> None:
        """Perform cleanup if needed"""
        if self.should_cleanup():
            self.cleanup()
            self._last_cleanup = datetime.utcnow()
    def _force_aggressive_cleanup(self) -> None:
        """Force aggressive VRAM cleanup"""
        if not TORCH_AVAILABLE or not torch.cuda.is_available():
            return
        try:
            before_allocated = torch.cuda.memory_allocated(0) / 1024**3
            before_reserved = torch.cuda.memory_reserved(0) / 1024**3
            self.logger.debug(f"Before cleanup - Allocated: {before_allocated:.2f}GB, Reserved: {before_reserved:.2f}GB")
            gc.collect(0)
            torch.cuda.empty_cache()
            after_allocated = torch.cuda.memory_allocated(0) / 1024**3
            after_reserved = torch.cuda.memory_reserved(0) / 1024**3
            self.logger.debug(f"After cleanup - Allocated: {after_allocated:.2f}GB, Reserved: {after_reserved:.2f}GB")
            if after_reserved < before_reserved:
                self.logger.info(f"VRAM freed: {(before_reserved - after_reserved):.2f}GB")
        except Exception as e:
            self.logger.error(f"Error in aggressive VRAM cleanup: {e}")
    def get_usage(self) -> Dict[str, Any]:
        """Get VRAM usage information"""
        if not TORCH_AVAILABLE:
            return {'error': 'PyTorch not available'}
        if not torch.cuda.is_available():
            return {'error': 'CUDA not available'}
        total = torch.cuda.get_device_properties(0).total_memory / 1024**3
        allocated = torch.cuda.memory_allocated(0) / 1024**3
        cached = torch.cuda.memory_reserved(0) / 1024**3
        free = total - allocated
        return {
            'total_gb': round(total, 2),
            'allocated_gb': round(allocated, 2),
            'cached_gb': round(cached, 2),
            'free_gb': round(free, 2),
            'whisper_loaded': self._whisper_model is not None,
            'ocr_models_loaded': self._ocr_models is not None,
            'trocr_models_loaded': self._trocr_models is not None,
            'last_used': self._models_last_used.isoformat() if self._models_last_used else None,
            'timeout_seconds': settings.MODEL_TIMEOUT_SECONDS
        }
    def force_free(self) -> str:
        """Force immediate VRAM free"""
        self.cleanup()
        return "VRAM freed successfully"
 # Global instance
 vram_manager = VRAMManager()
--- a/services/webdav_service.py
+++ b/services/webdav_service.py
@@ -0,0 +1,200 @@
 """
 WebDAV service for Nextcloud integration
 """
 import logging
 import os
 import time
 import unicodedata
 import re
 from pathlib import Path
 from typing import Optional, List, Dict
 from contextlib import contextmanager
 import requests
 from requests.auth import HTTPBasicAuth
 from requests.adapters import HTTPAdapter
 from ..config import settings
 from ..core import WebDAVError
 class WebDAVService:
    """Service for WebDAV operations with Nextcloud"""
    def __init__(self):
        self.session: Optional[requests.Session] = None
        self.logger = logging.getLogger(__name__)
        self._retry_delay = 1
        self._max_retries = settings.WEBDAV_MAX_RETRIES
    def initialize(self) -> None:
        """Initialize WebDAV session"""
        if not settings.has_webdav_config:
            raise WebDAVError("WebDAV credentials not configured")
        self.session = requests.Session()
        self.session.auth = HTTPBasicAuth(settings.NEXTCLOUD_USER, settings.NEXTCLOUD_PASSWORD)
        # Configure HTTP adapter with retry strategy
        adapter = HTTPAdapter(
            max_retries=0,  # We'll handle retries manually
            pool_connections=10,
            pool_maxsize=20
        )
        self.session.mount('https://', adapter)
        self.session.mount('http://', adapter)
        # Test connection
        try:
            self._request('GET', '', timeout=5)
            self.logger.info("WebDAV connection established")
        except Exception as e:
            raise WebDAVError(f"Failed to connect to WebDAV: {e}")
    def cleanup(self) -> None:
        """Cleanup WebDAV session"""
        if self.session:
            self.session.close()
            self.session = None
    @staticmethod
    def normalize_path(path: str) -> str:
        """Normalize remote paths to a consistent representation"""
        if not path:
            return ""
        normalized = unicodedata.normalize("NFC", str(path)).strip()
        if not normalized:
            return ""
        normalized = normalized.replace("\\", "/")
        normalized = re.sub(r"/+", "/", normalized)
        return normalized.lstrip("/")
    def _build_url(self, remote_path: str) -> str:
        """Build WebDAV URL"""
        path = self.normalize_path(remote_path)
        base_url = settings.WEBDAV_ENDPOINT.rstrip('/')
        return f"{base_url}/{path}"
    def _request(self, method: str, remote_path: str, **kwargs) -> requests.Response:
        """Make HTTP request to WebDAV with retries"""
        if not self.session:
            raise WebDAVError("WebDAV session not initialized")
        url = self._build_url(remote_path)
        timeout = kwargs.pop('timeout', settings.HTTP_TIMEOUT)
        for attempt in range(self._max_retries):
            try:
                response = self.session.request(method, url, timeout=timeout, **kwargs)
                if response.status_code < 400:
                    return response
                elif response.status_code == 404:
                    raise WebDAVError(f"Resource not found: {remote_path}")
                else:
                    raise WebDAVError(f"HTTP {response.status_code}: {response.text}")
            except (requests.RequestException, requests.Timeout) as e:
                if attempt == self._max_retries - 1:
                    raise WebDAVError(f"Request failed after {self._max_retries} retries: {e}")
                delay = self._retry_delay * (2 ** attempt)
                self.logger.warning(f"Request failed (attempt {attempt + 1}/{self._max_retries}), retrying in {delay}s...")
                time.sleep(delay)
        raise WebDAVError("Max retries exceeded")
    def list(self, remote_path: str = "") -> List[str]:
        """List files in remote directory"""
        self.logger.debug(f"Listing remote directory: {remote_path}")
        response = self._request('PROPFIND', remote_path, headers={'Depth': '1'})
        return self._parse_propfind_response(response.text)
    def _parse_propfind_response(self, xml_response: str) -> List[str]:
        """Parse PROPFIND XML response"""
        # Simple parser for PROPFIND response
        files = []
        try:
            import xml.etree.ElementTree as ET
            root = ET.fromstring(xml_response)
            # Find all href elements
            for href in root.findall('.//{DAV:}href'):
                href_text = href.text or ""
                # Remove base URL from href
                base_url = settings.NEXTCLOUD_URL.rstrip('/')
                if href_text.startswith(base_url):
                    href_text = href_text[len(base_url):]
                files.append(href_text.lstrip('/'))
        except Exception as e:
            self.logger.error(f"Error parsing PROPFIND response: {e}")
        return files
    def download(self, remote_path: str, local_path: Path) -> None:
        """Download file from WebDAV"""
        self.logger.info(f"Downloading {remote_path} to {local_path}")
        # Ensure local directory exists
        local_path.parent.mkdir(parents=True, exist_ok=True)
        response = self._request('GET', remote_path, stream=True)
        # Use larger buffer size for better performance
        with open(local_path, 'wb', buffering=65536) as f:
            for chunk in response.iter_content(chunk_size=settings.DOWNLOAD_CHUNK_SIZE):
                if chunk:
                    f.write(chunk)
        self.logger.debug(f"Download completed: {local_path}")
    def upload(self, local_path: Path, remote_path: str) -> None:
        """Upload file to WebDAV"""
        self.logger.info(f"Uploading {local_path} to {remote_path}")
        # Ensure remote directory exists
        remote_dir = self.normalize_path(remote_path)
        if '/' in remote_dir:
            dir_path = '/'.join(remote_dir.split('/')[:-1])
            self.makedirs(dir_path)
        with open(local_path, 'rb') as f:
            self._request('PUT', remote_path, data=f)
        self.logger.debug(f"Upload completed: {remote_path}")
    def mkdir(self, remote_path: str) -> None:
        """Create directory on WebDAV"""
        self.makedirs(remote_path)
    def makedirs(self, remote_path: str) -> None:
        """Create directory and parent directories on WebDAV"""
        path = self.normalize_path(remote_path)
        if not path:
            return
        parts = path.split('/')
        current = ""
        for part in parts:
            current = f"{current}/{part}" if current else part
            try:
                self._request('MKCOL', current)
                self.logger.debug(f"Created directory: {current}")
            except WebDAVError as e:
                # Directory might already exist (409 Conflict is OK)
                if '409' not in str(e):
                    raise
    def delete(self, remote_path: str) -> None:
        """Delete file or directory from WebDAV"""
        self.logger.info(f"Deleting remote path: {remote_path}")
        self._request('DELETE', remote_path)
    def exists(self, remote_path: str) -> bool:
        """Check if remote path exists"""
        try:
            self._request('HEAD', remote_path)
            return True
        except WebDAVError:
            return False
 # Global instance
 webdav_service = WebDAVService()
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,165 @@
 #!/usr/bin/env python3
 """
 Setup script for CBCFacil
 """
 import os
 import sys
 import subprocess
 import platform
 from pathlib import Path
 def check_python_version():
    """Check if Python version is 3.10 or higher"""
    if sys.version_info < (3, 10):
        print("❌ Error: Python 3.10 or higher is required")
        print(f"   Current version: {sys.version}")
        sys.exit(1)
    print(f"✓ Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
 def check_system_dependencies():
    """Check and install system dependencies"""
    system = platform.system().lower()
    print("\n📦 Checking system dependencies...")
    if system == "linux":
        # Check for CUDA (optional)
        if os.path.exists("/usr/local/cuda"):
            print("✓ CUDA found")
        else:
            print("⚠ CUDA not found - GPU acceleration will be disabled")
        # Check for tesseract
        try:
            subprocess.run(["tesseract", "--version"], check=True, capture_output=True)
            print("✓ Tesseract OCR installed")
        except (subprocess.CalledProcessError, FileNotFoundError):
            print("⚠ Tesseract OCR not found - PDF processing may not work")
        # Check for ffmpeg
        try:
            subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
            print("✓ FFmpeg installed")
        except (subprocess.CalledProcessError, FileNotFoundError):
            print("⚠ FFmpeg not found - audio processing may not work")
    elif system == "darwin":  # macOS
        # Check for tesseract
        try:
            subprocess.run(["brew", "list", "tesseract"], check=True, capture_output=True)
            print("✓ Tesseract OCR installed")
        except (subprocess.CalledProcessError, FileNotFoundError):
            print("⚠ Tesseract not found. Install with: brew install tesseract")
    print()
 def create_virtual_environment():
    """Create Python virtual environment"""
    venv_path = Path("venv")
    if venv_path.exists():
        print("✓ Virtual environment already exists")
        return venv_path
    print("📦 Creating virtual environment...")
    subprocess.run([sys.executable, "-m", "venv", "venv"], check=True)
    print("✓ Virtual environment created")
    return venv_path
 def install_requirements(venv_path):
    """Install Python requirements"""
    pip_path = venv_path / ("Scripts" if platform.system() == "Windows" else "bin") / "pip"
    print("📦 Installing Python requirements...")
    subprocess.run([str(pip_path), "install", "--upgrade", "pip"], check=True)
    subprocess.run([str(pip_path), "install", "-r", "requirements.txt"], check=True)
    print("✓ Python requirements installed")
 def create_directories():
    """Create necessary directories"""
    directories = [
        "downloads",
        "resumenes_docx",
        "logs",
        "processed"
    ]
    print("\n📁 Creating directories...")
    for directory in directories:
        Path(directory).mkdir(exist_ok=True)
        print(f"  ✓ {directory}")
    print()
 def create_env_file():
    """Create .env file if it doesn't exist"""
    env_path = Path(".env")
    example_path = Path(".env.example")
    if env_path.exists():
        print("✓ .env file already exists")
        return
    if not example_path.exists():
        print("⚠ .env.example not found")
        return
    print("\n📝 Creating .env file from template...")
    print("   Please edit .env file and add your API keys")
    with open(example_path, "r") as src:
        content = src.read()
    with open(env_path, "w") as dst:
        dst.write(content)
    print("✓ .env file created from .env.example")
    print("   ⚠ Please edit .env and add your API keys!")
 def main():
    """Main setup function"""
    print("=" * 60)
    print("CBCFacil Setup Script")
    print("=" * 60)
    print()
    # Check Python version
    check_python_version()
    # Check system dependencies
    check_system_dependencies()
    # Create virtual environment
    venv_path = create_virtual_environment()
    # Install requirements
    install_requirements(venv_path)
    # Create directories
    create_directories()
    # Create .env file
    create_env_file()
    print("\n" + "=" * 60)
    print("✓ Setup complete!")
    print("=" * 60)
    print("\nNext steps:")
    print("  1. Edit .env file and add your API keys")
    print("  2. Run: source venv/bin/activate  (Linux/macOS)")
    print("           or venv\\Scripts\\activate     (Windows)")
    print("  3. Run: python main_refactored.py")
    print("\nFor dashboard only:")
    print("  python -c \"from api.routes import create_app; app = create_app(); app.run(port=5000)\"")
    print()
 if __name__ == "__main__":
    main()
--- a/storage/init.py
+++ b/storage/init.py
@@ -0,0 +1,7 @@
 """
 Storage package for CBCFacil
 """
 from .processed_registry import ProcessedRegistry
 __all__ = ['ProcessedRegistry']
--- a/storage/processed_registry.py
+++ b/storage/processed_registry.py
@@ -0,0 +1,235 @@
 """
 Processed files registry - Optimized version with bloom filter and better caching
 """
 import fcntl
 import logging
 import time
 from pathlib import Path
 from typing import Set, Optional
 from datetime import datetime, timedelta
 from ..config import settings
 class BloomFilter:
    """Simple Bloom Filter for fast membership testing"""
    def __init__(self, size: int = 10000, hash_count: int = 3):
        self.size = size
        self.hash_count = hash_count
        self.bit_array = [0] * size
    def _hashes(self, item: str) -> list[int]:
        """Generate hash positions for item"""
        import hashlib
        digest = hashlib.md5(item.encode()).digest()
        return [
            int.from_bytes(digest[i:i+4], 'big') % self.size
            for i in range(0, min(self.hash_count * 4, len(digest)), 4)
        ]
    def add(self, item: str) -> None:
        for pos in self._hashes(item):
            self.bit_array[pos] = 1
    def might_contain(self, item: str) -> bool:
        return all(self.bit_array[pos] for pos in self._hashes(item))
 class ProcessedRegistry:
    """Registry for tracking processed files with caching and file locking"""
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._cache: Set[str] = set()
        self._cache_time: Optional[float] = None
        self._cache_ttl = 300  # 5 minutos (antes era 60s)
        self._initialized = False
        self._bloom_filter = BloomFilter(size=10000, hash_count=3)
        self._write_lock = False  # Write batching
    def initialize(self) -> None:
        """Initialize the registry"""
        self.load()
        self._initialized = True
        self.logger.info(f"Processed registry initialized ({self.count()} files)")
    def load(self) -> Set[str]:
        """Load processed files from disk with caching"""
        now = time.time()
        # Return cached data if still valid
        if self._cache and self._cache_time:
            age = now - self._cache_time
            if age < self._cache_ttl:
                return self._cache  # Return reference, not copy for read-only
        processed = set()
        registry_path = settings.processed_files_path
        try:
            registry_path.parent.mkdir(parents=True, exist_ok=True)
            if registry_path.exists():
                with open(registry_path, 'r', encoding='utf-8') as f:
                    for raw_line in f:
                        line = raw_line.strip()
                        if line and not line.startswith('#'):
                            processed.add(line)
                            # Add basename for both path and basename lookups
                            base_name = Path(line).name
                            processed.add(base_name)
                            # Update bloom filter
                            self._bloom_filter.add(line)
                            self._bloom_filter.add(base_name)
        except Exception as e:
            self.logger.error(f"Error reading processed files registry: {e}")
        self._cache = processed
        self._cache_time = now
        return processed  # Return reference, not copy
    def save(self, file_path: str) -> None:
        """Add file to processed registry with file locking"""
        if not file_path:
            return
        registry_path = settings.processed_files_path
        try:
            registry_path.parent.mkdir(parents=True, exist_ok=True)
            # Check cache first
            if file_path in self._cache:
                return
            # Append to file
            with open(registry_path, 'a', encoding='utf-8') as f:
                fcntl.flock(f.fileno(), fcntl.LOCK_EX)
                try:
                    f.write(file_path + "\n")
                    # Update in-memory structures
                    self._cache.add(file_path)
                    self._bloom_filter.add(file_path)
                    self._cache_time = time.time()
                    self.logger.debug(f"Added {file_path} to processed registry")
                finally:
                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)
        except Exception as e:
            self.logger.error(f"Error saving to processed files registry: {e}")
            raise
    def is_processed(self, file_path: str) -> bool:
        """Check if file has been processed - O(1) with bloom filter"""
        if not self._initialized:
            self.initialize()
        # Fast bloom filter check first
        if not self._bloom_filter.might_contain(file_path):
            return False
        # Check cache (O(1) for both full path and basename)
        if file_path in self._cache:
            return True
        basename = Path(file_path).name
        if basename in self._cache:
            return True
        return False
    def save_batch(self, file_paths: list[str]) -> int:
        """Add multiple files to registry efficiently"""
        saved_count = 0
        try:
            registry_path = settings.processed_files_path
            registry_path.parent.mkdir(parents=True, exist_ok=True)
            with open(registry_path, 'a', encoding='utf-8') as f:
                fcntl.flock(f.fileno(), fcntl.LOCK_EX)
                try:
                    lines_to_write = []
                    for file_path in file_paths:
                        if file_path and file_path not in self._cache:
                            lines_to_write.append(file_path + "\n")
                            self._cache.add(file_path)
                            self._bloom_filter.add(file_path)
                            saved_count += 1
                    if lines_to_write:
                        f.writelines(lines_to_write)
                        self._cache_time = time.time()
                        self.logger.debug(f"Added {saved_count} files to processed registry")
                finally:
                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)
        except Exception as e:
            self.logger.error(f"Error saving batch to processed files registry: {e}")
        return saved_count
    def remove(self, file_path: str) -> bool:
        """Remove file from processed registry"""
        registry_path = settings.processed_files_path
        try:
            if not registry_path.exists():
                return False
            lines_to_keep = []
            with open(registry_path, 'r', encoding='utf-8') as f:
                for line in f:
                    stripped = line.strip()
                    if stripped != file_path and Path(stripped).name != Path(file_path).name:
                        lines_to_keep.append(line)
            with open(registry_path, 'w', encoding='utf-8') as f:
                fcntl.flock(f.fileno(), fcntl.LOCK_EX)
                try:
                    f.writelines(lines_to_keep)
                    self._cache.discard(file_path)
                    self._cache.discard(Path(file_path).name)
                    # Rebuild bloom filter
                    self._bloom_filter = BloomFilter(size=10000, hash_count=3)
                    for item in self._cache:
                        self._bloom_filter.add(item)
                finally:
                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)
            return True
        except Exception as e:
            self.logger.error(f"Error removing from processed files registry: {e}")
            return False
    def clear(self) -> None:
        """Clear the entire registry"""
        registry_path = settings.processed_files_path
        try:
            if registry_path.exists():
                registry_path.unlink()
            self._cache.clear()
            self._cache_time = None
            self._bloom_filter = BloomFilter(size=10000, hash_count=3)
            self.logger.info("Processed files registry cleared")
        except Exception as e:
            self.logger.error(f"Error clearing processed files registry: {e}")
            raise
    def get_all(self) -> Set[str]:
        """Get all processed files"""
        if not self._initialized:
            self.initialize()
        return self._cache.copy()
    def count(self) -> int:
        """Get count of processed files"""
        if not self._initialized:
            self.initialize()
        return len(self._cache)
    def get_stats(self) -> dict:
        """Get registry statistics"""
        return {
            "total_files": len(self._cache),
            "cache_age_seconds": time.time() - self._cache_time if self._cache_time else 0,
            "cache_ttl_seconds": self._cache_ttl,
            "bloom_filter_size": self._bloom_filter.size,
            "initialized": self._initialized
        }
 # Global instance
 processed_registry = ProcessedRegistry()
--- a/test_audio/imperio.mp3
+++ b/test_audio/imperio.mp3
--- a/tests/init.py
+++ b/tests/init.py
@@ -0,0 +1,3 @@
 """
 Test package for CBCFacil
 """
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,20 @@
 """
 Pytest configuration for CBCFacil tests
 Ensures proper Python path for module imports
 """
 import sys
 import os
 from pathlib import Path
 # Add project root to path
 PROJECT_ROOT = Path(__file__).parent.parent
 sys.path.insert(0, str(PROJECT_ROOT))
 # Set environment variables for testing
 os.environ.setdefault('LOCAL_STATE_DIR', str(PROJECT_ROOT / 'state'))
 os.environ.setdefault('LOCAL_DOWNLOADS_PATH', str(PROJECT_ROOT / 'downloads'))
 # Disable external service connections during tests
 os.environ.setdefault('NEXTCLOUD_URL', '')
 os.environ.setdefault('ANTHROPIC_AUTH_TOKEN', '')
 os.environ.setdefault('GEMINI_API_KEY', '')
--- a/verify_improvements.py
+++ b/verify_improvements.py
@@ -0,0 +1,112 @@
 #!/usr/bin/env python3
 """
 Script de verificación para mejoras de la Fase 3
 Verifica que todas las mejoras están correctamente implementadas
 """
 import sys
 import os
 from pathlib import Path
 def check_file_exists(filepath, description):
    """Verificar que un archivo existe"""
    if Path(filepath).exists():
        print(f"✅ {description}: {filepath}")
        return True
    else:
        print(f"❌ {description}: {filepath} NO ENCONTRADO")
        return False
 def check_function_in_file(filepath, function_name, description):
    """Verificar que una función existe en un archivo"""
    try:
        with open(filepath, 'r') as f:
            content = f.read()
            if function_name in content:
                print(f"✅ {description}: Encontrado '{function_name}'")
                return True
            else:
                print(f"❌ {description}: NO encontrado '{function_name}'")
                return False
    except Exception as e:
        print(f"❌ Error leyendo {filepath}: {e}")
        return False
 def check_class_in_file(filepath, class_name, description):
    """Verificar que una clase existe en un archivo"""
    return check_function_in_file(filepath, f"class {class_name}", description)
 def main():
    print("=" * 70)
    print("🔍 VERIFICACIÓN DE MEJORAS FASE 3 - CBCFacil")
    print("=" * 70)
    print()
    results = []
    # 1. Verificar archivos modificados/creados
    print("📁 ARCHIVOS:")
    print("-" * 70)
    results.append(check_file_exists("main.py", "main.py modificado"))
    results.append(check_file_exists("config/settings.py", "config/settings.py modificado"))
    results.append(check_file_exists("core/health_check.py", "core/health_check.py creado"))
    results.append(check_file_exists("IMPROVEMENTS_LOG.md", "IMPROVEMENTS_LOG.md creado"))
    print()
    # 2. Verificar mejoras en main.py
    print("🔧 MEJORAS EN main.py:")
    print("-" * 70)
    results.append(check_function_in_file("main.py", "logger.exception", "logger.exception() implementado"))
    results.append(check_function_in_file("main.py", "class JSONFormatter", "JSONFormatter implementado"))
    results.append(check_function_in_file("main.py", "def validate_configuration", "validate_configuration() implementado"))
    results.append(check_function_in_file("main.py", "def check_service_health", "check_service_health() implementado"))
    results.append(check_function_in_file("main.py", "def send_error_notification", "send_error_notification() implementado"))
    results.append(check_function_in_file("main.py", "def setup_logging", "setup_logging() implementado"))
    print()
    # 3. Verificar mejoras en config/settings.py
    print("⚙️  MEJORAS EN config/settings.py:")
    print("-" * 70)
    results.append(check_function_in_file("config/settings.py", "class ConfigurationError", "ConfigurationError definido"))
    results.append(check_function_in_file("config/settings.py", "def nextcloud_url", "Propiedad nextcloud_url con validación"))
    results.append(check_function_in_file("config/settings.py", "def valid_webdav_config", "Propiedad valid_webdav_config"))
    results.append(check_function_in_file("config/settings.py", "def telegram_configured", "Propiedad telegram_configured"))
    results.append(check_function_in_file("config/settings.py", "def has_gpu_support", "Propiedad has_gpu_support"))
    results.append(check_function_in_file("config/settings.py", "def config_summary", "Propiedad config_summary"))
    print()
    # 4. Verificar core/health_check.py
    print("❤️  HEALTH CHECKS:")
    print("-" * 70)
    results.append(check_function_in_file("core/health_check.py", "class HealthChecker", "Clase HealthChecker"))
    results.append(check_function_in_file("core/health_check.py", "def check_webdav_connection", "check_webdav_connection()"))
    results.append(check_function_in_file("core/health_check.py", "def check_ai_providers", "check_ai_providers()"))
    results.append(check_function_in_file("core/health_check.py", "def check_vram_manager", "check_vram_manager()"))
    results.append(check_function_in_file("core/health_check.py", "def check_disk_space", "check_disk_space()"))
    results.append(check_function_in_file("core/health_check.py", "def run_full_health_check", "run_full_health_check()"))
    print()
    # Resumen
    print("=" * 70)
    print("📊 RESUMEN:")
    print("=" * 70)
    passed = sum(results)
    total = len(results)
    percentage = (passed / total) * 100
    print(f"Verificaciones pasadas: {passed}/{total} ({percentage:.1f}%)")
    print()
    if percentage == 100:
        print("🎉 ¡TODAS LAS MEJORAS ESTÁN CORRECTAMENTE IMPLEMENTADAS!")
        print()
        print("Puedes probar:")
        print("  python main.py health")
        print()
        return 0
    else:
        print("⚠️  Algunas verificaciones fallaron")
        print()
        return 1
 if __name__ == "__main__":
    sys.exit(main())