Actualización: cambios varios y archivos nuevos

- Modificaciones en main.py - Agregado kubectl - Agregado plus.md y todo.md
feat(dashboard): agregar panel de versiones y corregir carga de transcripciones
2026-01-10 19:25:37 +00:00 · 2026-01-10 19:18:14 +00:00
6 changed files with 3652 additions and 47 deletions
--- a/api/routes.py
+++ b/api/routes.py
@@ -2,6 +2,7 @@
 Flask API routes for CBCFacil dashboard
 """
 import os
 import time
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, Any, List
@@ -9,14 +10,20 @@ from flask import Flask, render_template, request, jsonify, send_from_directory
 from flask_cors import CORS
 from config import settings
-from storage import processed_registry
+from storage.processed_registry import processed_registry
 from services.webdav_service import webdav_service
 from services import vram_manager
 from document.generators import DocumentGenerator
 def create_app() -> Flask:
    """Create and configure Flask application"""
-    app = Flask(__name__)
+    # Get the project root directory (parent of api/)
    current_dir = Path(__file__).parent
    project_root = current_dir.parent
    template_dir = project_root / 'templates'
    app = Flask(__name__, template_folder=str(template_dir))
    CORS(app)
    # Configure app
@@ -157,6 +164,60 @@ def create_app() -> Flask:
            app.logger.error(f"Error downloading file: {e}")
            return jsonify({'error': 'File not found'}), 404
    @app.route('/downloads/find-file')
    def find_and_download_file():
        """Find and download file with various name variants"""
        try:
            filename = request.args.get('filename', '')
            ext = request.args.get('ext', 'txt')
            if not filename:
                return jsonify({'error': 'Filename required'}), 400
            # Validate to prevent path traversal
            if '..' in filename or filename.startswith('/'):
                return jsonify({'error': 'Invalid filename'}), 400
            # Try various name variants
            base_name = filename.replace('_unificado', '').replace('_unified', '')
            name_variants = [
                f"{base_name}.{ext}",
                f"{base_name}_unificado.{ext}",
                f"{base_name}_unified.{ext}",
                f"{base_name.replace(' ', '_')}.{ext}",
                f"{base_name.replace(' ', '_')}_unificado.{ext}",
            ]
            # Directories to search
            directories = [
                settings.LOCAL_DOWNLOADS_PATH,
                settings.LOCAL_DOCX
            ]
            # Search for file
            for directory in directories:
                if not directory.exists():
                    continue
                for variant in name_variants:
                    file_path = directory / variant
                    if file_path.exists():
                        # Determinar mimetype para que se abra en el navegador
                        mimetype = None
                        if ext == 'pdf':
                            mimetype = 'application/pdf'
                        elif ext == 'md':
                            mimetype = 'text/markdown'
                        elif ext == 'txt':
                            mimetype = 'text/plain'
                        # as_attachment=False para abrir en navegador, no descargar
                        return send_from_directory(str(directory), variant, as_attachment=False, mimetype=mimetype)
            return jsonify({'error': f'File not found: {filename}.{ext}'}), 404
        except Exception as e:
            app.logger.error(f"Error finding file: {e}")
            return jsonify({'error': 'File not found'}), 404
    @app.route('/health')
    def health_check():
        """Health check endpoint"""
@@ -174,11 +235,316 @@ def create_app() -> Flask:
            }
        })
    @app.route('/api/transcription/<filename>')
    def get_transcription(filename: str):
        """Get transcription content for a specific file"""
        try:
            # Validate filename to prevent path traversal
            if '..' in filename or filename.startswith('/'):
                return jsonify({
                    'success': False,
                    'message': 'Invalid filename'
                }), 400
            # Extract base name without extension (handle .mp3, .wav, .txt, etc.)
            base_name = Path(filename).stem
            # Construct file path for transcription
            file_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt"
            # Check if file exists
            if not file_path.exists():
                return jsonify({
                    'success': False,
                    'message': f'Transcription file not found: {base_name}.txt'
                }), 404
            # Read file content
            with open(file_path, 'r', encoding='utf-8') as f:
                transcription_text = f.read()
            # Calculate statistics
            word_count = len(transcription_text.split())
            char_count = len(transcription_text)
            return jsonify({
                'success': True,
                'filename': filename,
                'transcription': transcription_text,
                'file_path': str(file_path),
                'word_count': word_count,
                'char_count': char_count
            })
        except Exception as e:
            app.logger.error(f"Error reading transcription: {e}")
            return jsonify({
                'success': False,
                'message': f"Error reading transcription: {str(e)}"
            }), 500
    @app.route('/api/summary/<filename>')
    def get_summary(filename: str):
        """Get summary content for a specific file"""
        try:
            # Validate filename to prevent path traversal
            if '..' in filename or filename.startswith('/'):
                return jsonify({
                    'success': False,
                    'message': 'Invalid filename'
                }), 400
            # Extract base name without extension (handle .mp3, .wav, etc.)
            base_name = Path(filename).stem
            # Also remove _unificado/_unified suffixes if present
            base_name = base_name.replace('_unificado', '').replace('_unified', '')
            # Try different file path variants
            possible_paths = [
                settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unificado.md",
                settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unified.md",
                settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.md",
            ]
            file_path = None
            for path in possible_paths:
                if path.exists():
                    file_path = path
                    break
            if not file_path:
                return jsonify({
                    'success': False,
                    'message': f'Summary file not found for: {filename}'
                }), 404
            # Read file content
            with open(file_path, 'r', encoding='utf-8') as f:
                summary_text = f.read()
            # Get available formats
            formats_available = get_available_formats(base_name)
            return jsonify({
                'success': True,
                'filename': base_name,
                'summary': summary_text,
                'file_path': str(file_path),
                'formats_available': formats_available
            })
        except Exception as e:
            app.logger.error(f"Error reading summary: {e}")
            return jsonify({
                'success': False,
                'message': f"Error reading summary: {str(e)}"
            }), 500
    @app.route('/api/versions/<filename>')
    def get_versions(filename: str):
        """Get all summary versions for a file"""
        try:
            # Validate filename
            if '..' in filename or filename.startswith('/'):
                return jsonify({'success': False, 'message': 'Invalid filename'}), 400
            # Extract base name
            base_name = Path(filename).stem
            base_name = base_name.replace('_unificado', '').replace('_unified', '')
            versions = []
            downloads_path = settings.LOCAL_DOWNLOADS_PATH
            docx_path = settings.LOCAL_DOCX
            # Check for transcription (original)
            txt_path = downloads_path / f"{base_name}.txt"
            if txt_path.exists():
                stat = txt_path.stat()
                versions.append({
                    'type': 'transcription',
                    'label': '📝 Transcripción Original',
                    'filename': txt_path.name,
                    'path': f"/downloads/find-file?filename={base_name}&ext=txt",
                    'date': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M'),
                    'size': f"{stat.st_size / 1024:.1f} KB"
                })
            # Check for summary versions (md, docx, pdf)
            summary_patterns = [
                (f"{base_name}_unificado.md", "📋 Resumen MD"),
                (f"{base_name}_unificado.docx", "📄 Documento DOCX"),
                (f"{base_name}_unificado.pdf", "📑 PDF"),
            ]
            for pattern, label in summary_patterns:
                # Check downloads path
                file_path = downloads_path / pattern
                if not file_path.exists():
                    file_path = docx_path / pattern
                if file_path.exists():
                    stat = file_path.stat()
                    ext = file_path.suffix[1:]  # Remove the dot
                    versions.append({
                        'type': 'summary',
                        'label': label,
                        'filename': pattern,
                        'path': f"/downloads/find-file?filename={base_name}&ext={ext}",
                        'date': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M'),
                        'size': f"{stat.st_size / 1024:.1f} KB"
                    })
            # Sort by date descending
            versions.sort(key=lambda x: x['date'], reverse=True)
            return jsonify({
                'success': True,
                'base_name': base_name,
                'versions': versions,
                'count': len(versions)
            })
        except Exception as e:
            app.logger.error(f"Error getting versions: {e}")
            return jsonify({'success': False, 'message': str(e)}), 500
    @app.route('/api/regenerate-summary', methods=['POST'])
    def regenerate_summary():
        """Regenerate summary from existing transcription"""
        start_time = time.time()
        try:
            data = request.get_json()
            filename = data.get('filename')
            custom_prompt = data.get('custom_prompt')
            if not filename:
                return jsonify({
                    'success': False,
                    'message': 'Filename is required'
                }), 400
            # Validate filename to prevent path traversal
            if '..' in filename or filename.startswith('/'):
                return jsonify({
                    'success': False,
                    'message': 'Invalid filename'
                }), 400
            # Get base name (remove extension if present)
            base_name = Path(filename).stem
            # Read transcription from .txt file
            transcription_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt"
            if not transcription_path.exists():
                # Try without .txt extension if already included
                transcription_path = settings.LOCAL_DOWNLOADS_PATH / filename
                if not transcription_path.exists():
                    return jsonify({
                        'success': False,
                        'message': f'Transcription file not found for: {filename}'
                    }), 404
            with open(transcription_path, 'r', encoding='utf-8') as f:
                transcription_text = f.read()
            # Generate new summary using DocumentGenerator
            doc_generator = DocumentGenerator()
            success, new_summary, metadata = doc_generator.generate_summary(
                transcription_text,
                base_name
            )
            if not success:
                return jsonify({
                    'success': False,
                    'message': 'Failed to generate summary'
                }), 500
            # Upload to WebDAV if configured
            files_updated = []
            if settings.has_webdav_config:
                try:
                    # Upload markdown
                    if 'markdown_path' in metadata:
                        md_path = Path(metadata['markdown_path'])
                        if md_path.exists():
                            remote_md_path = f"{settings.REMOTE_TXT_FOLDER}/{md_path.name}"
                            webdav_service.upload(str(md_path), remote_md_path)
                            files_updated.append(remote_md_path)
                    # Upload DOCX
                    if 'docx_path' in metadata:
                        docx_path = Path(metadata['docx_path'])
                        if docx_path.exists():
                            remote_docx_path = f"{settings.DOCX_FOLDER}/{docx_path.name}"
                            webdav_service.upload(str(docx_path), remote_docx_path)
                            files_updated.append(remote_docx_path)
                    # Upload PDF if available
                    if 'pdf_path' in metadata:
                        pdf_path = Path(metadata['pdf_path'])
                        if pdf_path.exists():
                            remote_pdf_path = f"{settings.REMOTE_PDF_FOLDER}/{pdf_path.name}"
                            webdav_service.upload(str(pdf_path), remote_pdf_path)
                            files_updated.append(remote_pdf_path)
                except Exception as e:
                    app.logger.warning(f"WebDAV upload failed: {e}")
                    # Continue even if upload fails
            processing_time = time.time() - start_time
            return jsonify({
                'success': True,
                'message': 'Summary regenerated successfully',
                'new_summary': new_summary,
                'files_updated': files_updated,
                'processing_time': f"{processing_time:.2f}s",
                'metadata': metadata
            })
        except Exception as e:
            app.logger.error(f"Error regenerating summary: {e}")
            return jsonify({
                'success': False,
                'message': f"Error regenerating summary: {str(e)}"
            }), 500
    @app.route('/api/files-detailed')
    def get_files_detailed():
        """Get detailed list of files with transcription and summary info"""
        try:
            files = get_audio_files_detailed()
            # Calculate statistics
            total = len(files)
            with_transcription = sum(1 for f in files if f['has_transcription'])
            with_summary = sum(1 for f in files if f['has_summary'])
            return jsonify({
                'success': True,
                'files': files,
                'total': total,
                'with_transcription': with_transcription,
                'with_summary': with_summary
            })
        except Exception as e:
            app.logger.error(f"Error getting detailed files: {e}")
            return jsonify({
                'success': False,
                'message': f"Error: {str(e)}"
            }), 500
    return app
 def get_audio_files() -> List[Dict[str, Any]]:
    """Get list of audio files from WebDAV and local"""
    import logging
    logger = logging.getLogger(__name__)
    files = []
    # Get files from WebDAV
@@ -202,7 +568,7 @@ def get_audio_files() -> List[Dict[str, Any]]:
                        'available_formats': get_available_formats(base_name)
                    })
        except Exception as e:
-            app.logger.error(f"Error getting WebDAV files: {e}")
+            logger.warning(f"Error getting WebDAV files: {e}")
    # Get local files
    try:
@@ -222,13 +588,113 @@ def get_audio_files() -> List[Dict[str, Any]]:
                        'available_formats': get_available_formats(file_path.name)
                    })
    except Exception as e:
-        app.logger.error(f"Error getting local files: {e}")
+        logger.error(f"Error getting local files: {e}")
-    # Remove duplicates (WebDAV takes precedence)
+    # Remove duplicates (keep both local and webdav - distinguish by source)
    unique_files = {}
    for file in files:
-        key = file['filename']
+        # Use (filename, source) as key to keep both local and webdav files
-        if key not in unique_files or file['source'] == 'webdav':
+        key = (file['filename'], file['source'])
        unique_files[key] = file
    return sorted(unique_files.values(), key=lambda x: (x['source'], x['filename']))
 def get_audio_files_detailed() -> List[Dict[str, Any]]:
    """Get detailed list of audio files with transcription and summary information"""
    files = []
    # Get local audio files only for detailed view
    try:
        if settings.LOCAL_DOWNLOADS_PATH.exists():
            for ext in settings.AUDIO_EXTENSIONS:
                for file_path in settings.LOCAL_DOWNLOADS_PATH.glob(f"*{ext}"):
                    stat = file_path.stat()
                    filename = file_path.name
                    base_name = file_path.stem
                    # Check for transcription
                    transcription_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt"
                    has_transcription = transcription_path.exists()
                    transcription_words = 0
                    if has_transcription:
                        try:
                            with open(transcription_path, 'r', encoding='utf-8') as f:
                                transcription_text = f.read()
                                transcription_words = len(transcription_text.split())
                        except Exception:
                            pass
                    # Check for summary and formats
                    formats = get_available_formats(filename)
                    has_summary = formats.get('md', False)
                    # Get summary path
                    summary_path = None
                    if has_summary:
                        summary_variants = [
                            settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unificado.md",
                            settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unified.md",
                            settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.md",
                        ]
                        for variant in summary_variants:
                            if variant.exists():
                                summary_path = str(variant)
                                break
                    files.append({
                        'filename': filename,
                        'base_name': base_name,
                        'audio_path': str(file_path),
                        'has_transcription': has_transcription,
                        'transcription_path': str(transcription_path) if has_transcription else None,
                        'transcription_words': transcription_words,
                        'has_summary': has_summary,
                        'summary_path': summary_path,
                        'formats': formats,
                        'processed': processed_registry.is_processed(filename),
                        'last_modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
                        'size': format_size(stat.st_size)
                    })
    except Exception as e:
        pass  # Error logged in endpoint
    # Get WebDAV files
    if settings.has_webdav_config:
        try:
            webdav_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER)
            for file_path in webdav_files:
                normalized_path = webdav_service.normalize_path(file_path)
                base_name = Path(normalized_path).stem
                if any(normalized_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
                    # Check if already in local files
                    if not any(f['base_name'] == base_name for f in files):
                        formats = get_available_formats(base_name)
                        files.append({
                            'filename': Path(normalized_path).name,
                            'base_name': base_name,
                            'audio_path': normalized_path,
                            'has_transcription': formats.get('txt', False),
                            'transcription_path': None,
                            'transcription_words': 0,
                            'has_summary': formats.get('md', False),
                            'summary_path': None,
                            'formats': formats,
                            'processed': processed_registry.is_processed(normalized_path),
                            'last_modified': 'Unknown',
                            'size': 'Unknown'
                        })
        except Exception as e:
            pass  # Error logged in endpoint
    # Remove duplicates and sort
    unique_files = {}
    for file in files:
        key = file['base_name']
        if key not in unique_files:
            unique_files[key] = file
    return sorted(unique_files.values(), key=lambda x: x['filename'])
--- a/BIN
+++ b/BIN
--- a/main.py
+++ b/main.py
@@ -9,10 +9,15 @@ import time
 import fcntl
 import os
 import json
 import threading
 from pathlib import Path
 from datetime import datetime
 from typing import Optional
 # Load environment variables from .env file
 from dotenv import load_dotenv
 load_dotenv()
 # Configure logging with JSON formatter for production
 class JSONFormatter(logging.Formatter):
    """JSON formatter for structured logging in production"""
@@ -234,6 +239,41 @@ def send_error_notification(error_type: str, error_message: str) -> None:
        logger.warning(f"Failed to send error notification: {e}")
 def run_dashboard_thread() -> None:
    """Run Flask dashboard in a separate thread"""
    try:
        from api.routes import create_app
        app = create_app()
        # Run Flask in production mode with threaded=True
        app.run(
            host='0.0.0.0',
            port=5000,
            debug=False,
            threaded=True,
            use_reloader=False  # Important: disable reloader in thread
        )
    except Exception as e:
        logger.error(f"Dashboard thread error: {e}")
        logger.exception("Dashboard thread exception details")
 def start_dashboard() -> threading.Thread:
    """Start dashboard in a background daemon thread"""
    dashboard_port = int(os.getenv('DASHBOARD_PORT', '5000'))
    logger.info(f"Starting dashboard on port {dashboard_port}...")
    # Create daemon thread so it doesn't block shutdown
    dashboard_thread = threading.Thread(
        target=run_dashboard_thread,
        name="DashboardThread",
        daemon=True
    )
    dashboard_thread.start()
    logger.info(f"Dashboard thread started (Thread-ID: {dashboard_thread.ident})")
    return dashboard_thread
 def run_main_loop() -> None:
    """Main processing loop with improved error handling"""
    from config import settings
@@ -418,13 +458,19 @@ def run_main_loop() -> None:
 def main():
    """Main entry point"""
    lock_fd = None
    dashboard_thread = None
    try:
        logger.info("=== CBCFacil Service Started ===")
        logger.info(f"Version: {os.getenv('APP_VERSION', '8.0')}")
        logger.info(f"Environment: {'production' if os.getenv('DEBUG', 'false').lower() != 'true' else 'development'}")
-        
+
        lock_fd = acquire_lock()
        initialize_services()
        # Start dashboard in background thread
        dashboard_thread = start_dashboard()
        # Run main processing loop
        run_main_loop()
    except KeyboardInterrupt:
--- a/plus.md
+++ b/plus.md
@@ -0,0 +1,799 @@
 # 🚀 CBCFacil - Mejoras y Extensiones Recomendadas
 Documento con recomendaciones para hacer el proyecto más complejo, robusto y profesional.
 ---
 ## 📋 Resumen Ejecutivo
 Después de analizar todo el proyecto, identifiqué las siguientes áreas principales de mejora:
 | Área | Prioridad | Complejidad | Estado Actual |
 |------|-----------|-------------|---------------|
 | Testing | 🔴 Alta | Media | Solo `conftest.py` existe |
 | Frontend Dashboard | 🔴 Alta | Alta | Template básico sin JS |
 | Sistema de Colas | 🟡 Media | Alta | Loop síncrono simple |
 | Autenticación API | 🔴 Alta | Media | Sin autenticación |
 | Base de Datos | 🟡 Media | Media | Solo archivo TXT |
 | Métricas/Observabilidad | 🟡 Media | Media | Básico |
 | Video Processor | 🟢 Baja | Alta | No existe |
 | WebSockets | 🟢 Baja | Media | No existe |
 | Internacionalización | 🟢 Baja | Baja | Solo español |
 ---
 ## 🧪 1. Testing Completo (CRÍTICO)
 ### Estado Actual
 - Solo existe `tests/conftest.py` y `tests/__init__.py`
 - No hay tests unitarios ni de integración implementados
 - Arquitectura mencionada en `ARCHITECTURE.md` indica ~60% cobertura (falso)
 ### Recomendaciones
 #### 1.1 Tests Unitarios
 ```
 tests/
 ├── unit/
 │   ├── test_settings.py          # Validar configuración
 │   ├── test_validators.py        # Validar validators.py
 │   ├── test_result.py            # Patrón Result
 │   ├── test_exceptions.py        # Excepciones personalizadas
 │   ├── test_bloom_filter.py      # BloomFilter en registry
 │   ├── test_token_bucket.py      # Rate limiter
 │   └── test_circuit_breaker.py   # Circuit breaker
 ```
 #### 1.2 Tests de Integración
 ```
 tests/
 ├── integration/
 │   ├── test_webdav_service.py    # Mock de Nextcloud
 │   ├── test_telegram_service.py  # Mock de Telegram API
 │   ├── test_ai_providers.py      # Mock de APIs AI
 │   ├── test_audio_processor.py   # Con audio de prueba
 │   ├── test_pdf_processor.py     # Con PDF de prueba
 │   └── test_document_generator.py
 ```
 #### 1.3 Tests E2E
 ```
 tests/
 ├── e2e/
 │   ├── test_full_audio_workflow.py
 │   ├── test_full_pdf_workflow.py
 │   └── test_api_endpoints.py
 ```
 #### 1.4 Fixtures de Prueba
 ```python
 # tests/fixtures/
 # - sample_audio.mp3 (5 segundos de audio en español)
 # - sample_pdf.pdf (2 páginas con texto)
 # - expected_transcription.txt
 # - expected_summary.md
 ```
 ---
 ## 🖥️ 2. Dashboard Frontend Completo
 ### Estado Actual
 - Solo existe `templates/` con un archivo básico
 - API REST sin interfaz visual
 - Sin JavaScript interactivo
 ### Recomendaciones
 #### 2.1 Estructura Frontend
 ```
 frontend/
 ├── src/
 │   ├── components/
 │   │   ├── FileList.js           # Lista de archivos
 │   │   ├── FileCard.js           # Tarjeta individual
 │   │   ├── ProcessingStatus.js   # Estado en tiempo real
 │   │   ├── GPUMonitor.js         # Monitor VRAM
 │   │   ├── QueueViewer.js        # Cola de procesamiento
 │   │   └── NotificationBell.js   # Notificaciones
 │   ├── pages/
 │   │   ├── Dashboard.js          # Vista principal
 │   │   ├── Files.js              # Gestión de archivos
 │   │   ├── Settings.js           # Configuración
 │   │   └── Logs.js               # Visor de logs
 │   └── services/
 │       ├── api.js                # Cliente API
 │       └── websocket.js          # Conexión WS
 ├── public/
 │   └── index.html
 └── package.json
 ```
 #### 2.2 Funcionalidades
 - [ ] Drag & drop para subir archivos
 - [ ] Preview de PDFs y audio
 - [ ] Visor de transcripciones lado a lado
 - [ ] Editor de resúmenes con Markdown preview
 - [ ] Gráficas de uso de GPU/CPU
 - [ ] Historial de procesamiento
 - [ ] Búsqueda en contenido
 - [ ] Dark mode / Light mode
 ---
 ## 📬 3. Sistema de Colas (Celery/RQ)
 ### Estado Actual
 - Loop infinito síncrono en `main.py`
 - Sin priorización de tareas
 - Sin reintentos configurables
 - Sin distribución de carga
 ### Recomendaciones
 #### 3.1 Implementar Celery
 ```python
 # services/queue/
 ├── __init__.py
 ├── celery_app.py           # Configuración Celery
 ├── tasks/
 │   ├── __init__.py
 │   ├── audio_tasks.py      # Tareas de audio
 │   ├── pdf_tasks.py        # Tareas de PDF
 │   └── notification_tasks.py
 └── workers/
    └── worker_config.py
 ```
 #### 3.2 Estructura de Tareas
 ```python
 # tasks/audio_tasks.py
 from celery import shared_task
@shared_task(bind=True, max_retries=3, default_retry_delay=60)
 def process_audio(self, file_path: str, options: dict) -> dict:
    """Procesar audio con reintentos automáticos"""
    ...
@shared_task
 def transcribe_audio(file_path: str) -> str:
    """Transcribir audio con Whisper"""
    ...
@shared_task
 def generate_summary(transcription: str, base_name: str) -> dict:
    """Generar resumen con IA"""
    ...
 ```
 #### 3.3 Prioridades de Cola
 - `high`: Archivos pequeños (<10MB)
 - `default`: Archivos normales
 - `low`: Archivos grandes (>100MB)
 ---
 ## 🔐 4. Autenticación y Autorización
 ### Estado Actual
 - API completamente abierta
 - Sin manejo de sesiones
 - Sin roles de usuario
 ### Recomendaciones
 #### 4.1 Implementar JWT
 ```python
 # api/auth/
 ├── __init__.py
 ├── jwt_handler.py          # Generación/validación JWT
 ├── middleware.py           # Middleware de autenticación
 ├── decorators.py           # @require_auth, @require_admin
 └── models.py               # User, Role, Permission
 ```
 #### 4.2 Endpoints de Auth
 ```python
 # api/routes_auth.py
 POST /api/auth/login        # Login con usuario/password
 POST /api/auth/refresh      # Refrescar token
 POST /api/auth/logout       # Invalidar token
 GET  /api/auth/me           # Perfil del usuario
 ```
 #### 4.3 Roles Sugeridos
 - `admin`: Acceso completo
 - `processor`: Puede procesar archivos
 - `viewer`: Solo lectura
 - `api`: Acceso solo API (para integraciones)
 ---
 ## 🗄️ 5. Base de Datos (SQLite/PostgreSQL)
 ### Estado Actual
 - Solo `processed_files.txt` como registro
 - Sin historial de procesamiento
 - Sin metadatos de archivos
 ### Recomendaciones
 #### 5.1 Modelos de Base de Datos
 ```python
 # storage/models/
 ├── __init__.py
 ├── base.py                 # SQLAlchemy base
 ├── file.py                 # Modelo File
 ├── processing_job.py       # Modelo ProcessingJob
 ├── user.py                 # Modelo User
 └── audit_log.py           # Modelo AuditLog
 ```
 #### 5.2 Esquema Propuesto
 ```sql
 -- files
 CREATE TABLE files (
    id SERIAL PRIMARY KEY,
    filename VARCHAR(255) NOT NULL,
    original_path TEXT,
    file_type VARCHAR(20),
    file_size BIGINT,
    checksum VARCHAR(64),
    status VARCHAR(20) DEFAULT 'pending',
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW()
 );
 -- processing_jobs
 CREATE TABLE processing_jobs (
    id SERIAL PRIMARY KEY,
    file_id INTEGER REFERENCES files(id),
    job_type VARCHAR(50),
    status VARCHAR(20),
    started_at TIMESTAMP,
    completed_at TIMESTAMP,
    error_message TEXT,
    result_path TEXT,
    metadata JSONB
 );
 -- audit_logs
 CREATE TABLE audit_logs (
    id SERIAL PRIMARY KEY,
    user_id INTEGER,
    action VARCHAR(50),
    resource_type VARCHAR(50),
    resource_id INTEGER,
    details JSONB,
    timestamp TIMESTAMP DEFAULT NOW()
 );
 ```
 ---
 ## 📊 6. Métricas y Observabilidad
 ### Estado Actual
 - `services/metrics_collector.py` básico
 - Sin exportación a sistemas externos
 - Sin dashboards de monitoreo
 ### Recomendaciones
 #### 6.1 Prometheus Metrics
 ```python
 # services/observability/
 ├── __init__.py
 ├── prometheus_exporter.py   # Endpoint /metrics
 ├── metrics.py              # Definición de métricas
 └── tracing.py              # Tracing distribuido
 ```
 #### 6.2 Métricas a Implementar
 ```python
 from prometheus_client import Counter, Histogram, Gauge
 # Contadores
 files_processed_total = Counter('files_processed_total', 'Total files processed', ['type', 'status'])
 ai_requests_total = Counter('ai_requests_total', 'AI API requests', ['provider', 'operation'])
 # Histogramas
 processing_duration = Histogram('processing_duration_seconds', 'Processing time', ['type'])
 ai_response_time = Histogram('ai_response_time_seconds', 'AI response time', ['provider'])
 # Gauges
 active_jobs = Gauge('active_jobs', 'Currently processing jobs')
 vram_usage = Gauge('vram_usage_bytes', 'GPU memory usage')
 queue_size = Gauge('queue_size', 'Jobs in queue', ['priority'])
 ```
 #### 6.3 Integración
 - [ ] Grafana dashboard preconfigurado
 - [ ] Alertas con AlertManager
 - [ ] Logs estructurados con Loki
 - [ ] Tracing con Jaeger/Zipkin
 ---
 ## 🎬 7. Video Processor (NUEVO)
 ### Recomendaciones
 #### 7.1 Estructura
 ```python
 # processors/video_processor.py
 class VideoProcessor(FileProcessor):
    """Processor for video files"""
    def extract_audio(self, video_path: str) -> str:
        """Extraer audio de video con ffmpeg"""
        ...
    def extract_frames(self, video_path: str, interval: int = 60) -> List[str]:
        """Extraer frames cada N segundos para análisis"""
        ...
    def analyze_frames(self, frames: List[str]) -> Dict[str, Any]:
        """Analizar frames con visión AI (Gemini Vision)"""
        ...
    def process(self, file_path: str) -> Dict[str, Any]:
        """Pipeline completo: audio + frames + análisis"""
        ...
 ```
 #### 7.2 Extensiones de Video
 ```python
 VIDEO_EXTENSIONS = {".mp4", ".avi", ".mkv", ".mov", ".webm"}
 ```
 #### 7.3 Funcionalidades
 - [ ] Transcripción de audio del video
 - [ ] Extracción de frames clave
 - [ ] Análisis visual con IA (slides, pizarra)
 - [ ] Generación de índice por escenas
 - [ ] Subtítulos automáticos (SRT/VTT)
 ---
 ## 🔌 8. WebSockets para Tiempo Real
 ### Estado Actual
 - Solo API REST
 - Sin actualizaciones en tiempo real
 - Polling pesado para estado
 ### Recomendaciones
 #### 8.1 Implementación
 ```python
 # api/websocket/
 ├── __init__.py
 ├── manager.py              # ConnectionManager
 ├── events.py               # Tipos de eventos
 └── handlers.py             # Event handlers
 ```
 #### 8.2 Eventos a Implementar
 ```python
 # Eventos del servidor -> cliente
 {
    "type": "file.processing_started",
    "data": {"file_id": 1, "filename": "audio.mp3"}
 }
 {
    "type": "file.processing_progress",
    "data": {"file_id": 1, "progress": 45, "stage": "transcribing"}
 }
 {
    "type": "file.processing_completed",
    "data": {"file_id": 1, "result_path": "/path/to/result.docx"}
 }
 {
    "type": "system.gpu_usage",
    "data": {"vram_used": 4.5, "vram_total": 8.0}
 }
 ```
 #### 8.3 Integración con Flask
 ```python
 from flask_socketio import SocketIO, emit
 socketio = SocketIO(app, cors_allowed_origins="*")
@socketio.on('connect')
 def handle_connect():
    emit('connected', {'status': 'ok'})
@socketio.on('subscribe')
 def handle_subscribe(data):
    join_room(data['file_id'])
 ```
 ---
 ## 🌐 9. API versioning y OpenAPI
 ### Estado Actual
 - API sin versionado
 - Sin documentación OpenAPI/Swagger
 - Endpoints inconsistentes
 ### Recomendaciones
 #### 9.1 Versionado de API
 ```
 /api/v1/files
 /api/v1/process
 /api/v1/health
 /api/v2/files  (futura versión)
 ```
 #### 9.2 OpenAPI Spec
 ```python
 # api/openapi/
 ├── spec.yaml               # Especificación OpenAPI 3.0
 └── swagger_ui.py           # Swagger UI integration
 # Usar flask-restx o flasgger
 from flask_restx import Api, Resource, fields
 api = Api(app, 
    version='1.0', 
    title='CBCFacil API',
    description='API para procesamiento de documentos'
 )
 ```
 ---
 ## 🐳 10. Containerización Mejorada
 ### Estado Actual
 - `.dockerignore` existe pero no Dockerfile completo
 - Sin docker-compose
 - Sin multi-stage builds
 ### Recomendaciones
 #### 10.1 Docker Multi-stage
 ```dockerfile
 # Dockerfile
 FROM python:3.11-slim as builder
 WORKDIR /app
 COPY requirements.txt .
 RUN pip wheel --no-cache-dir -w /wheels -r requirements.txt
 FROM nvidia/cuda:12.1-runtime-ubuntu22.04 as runtime
 # ... instalación optimizada
 ```
 #### 10.2 Docker Compose
 ```yaml
 # docker-compose.yml
 version: '3.8'
 services:
  app:
    build: .
    ports:
      - "5000:5000"
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
  redis:
    image: redis:7-alpine
  celery-worker:
    build: .
    command: celery -A celery_app worker -l info
    depends_on:
      - redis
  prometheus:
    image: prom/prometheus
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
  grafana:
    image: grafana/grafana
    ports:
      - "3000:3000"
 ```
 ---
 ## 🔧 11. Unificación y Refactoring
 ### 11.1 AIProvider Unificado
 Actualmente existe lógica duplicada entre:
 - `services/ai_service.py`
 - `services/ai/gemini_provider.py`
 - `services/ai/claude_provider.py`
 **Recomendación**: Crear interfaz unificada con Chain of Responsibility:
 ```python
 class AIProviderChain:
    """Cadena de proveedores con fallback automático"""
    def __init__(self, providers: List[AIProvider]):
        self.providers = providers
    def generate(self, prompt: str) -> str:
        for provider in self.providers:
            try:
                if provider.is_available():
                    return provider.generate_text(prompt)
            except Exception as e:
                logging.warning(f"{provider.name} failed: {e}")
        raise AllProvidersFailedError()
 ```
 ### 11.2 Procesadores Unificados
 Crear pipeline unificado:
 ```python
 class ProcessingPipeline:
    def __init__(self):
        self.steps = []
    def add_step(self, processor: FileProcessor):
        self.steps.append(processor)
        return self
    def process(self, file_path: str) -> Dict[str, Any]:
        result = {}
        for step in self.steps:
            if step.can_process(file_path):
                result.update(step.process(file_path))
        return result
 ```
 ---
 ## 📱 12. Notificaciones Mejoradas
 ### Estado Actual
 - Solo Telegram
 - Sin templates de mensajes
 - Sin notificaciones push
 ### Recomendaciones
 #### 12.1 Multi-canal
 ```python
 # services/notifications/
 ├── __init__.py
 ├── base_notifier.py        # Interface base
 ├── telegram_notifier.py    # Actual optimizado
 ├── email_notifier.py       # Nuevo
 ├── slack_notifier.py       # Nuevo
 ├── webhook_notifier.py     # Para integraciones
 └── notification_manager.py # Orquestador
 ```
 #### 12.2 Templates de Mensaje
 ```python
 TEMPLATES = {
    "processing_started": "🎵 Procesando: {filename}\n⏱️ Estimado: {eta}",
    "processing_completed": "✅ Completado: {filename}\n📄 Resumen: {summary_url}",
    "processing_failed": "❌ Error en {filename}\n🔍 Detalles: {error}",
    "daily_summary": "📊 Resumen del día:\n- Procesados: {count}\n- Tiempo total: {time}"
 }
 ```
 ---
 ## 🗂️ 13. Sistema de Plugins
 ### Recomendaciones
 ```python
 # plugins/
 ├── __init__.py
 ├── base_plugin.py          # Interface de plugin
 ├── plugin_manager.py       # Gestor de plugins
 └── examples/
    ├── custom_ocr/         # Plugin OCR personalizado
    ├── s3_storage/         # Plugin para AWS S3
    └── discord_notifier/   # Plugin Discord
 ```
 #### Interfaz de Plugin
 ```python
 class BasePlugin(ABC):
    """Base class for plugins"""
    @property
    @abstractmethod
    def name(self) -> str: ...
    @property
    @abstractmethod
    def version(self) -> str: ...
    @abstractmethod
    def initialize(self, config: dict) -> None: ...
    @abstractmethod
    def execute(self, context: dict) -> dict: ...
    @abstractmethod
    def cleanup(self) -> None: ...
 ```
 ---
 ## 📈 14. Mejoras de Rendimiento
 ### 14.1 Caching Avanzado
 ```python
 # services/cache/
 ├── __init__.py
 ├── cache_manager.py        # Gestor de cache
 ├── redis_cache.py          # Cache en Redis
 └── file_cache.py           # Cache en disco
 ```
 ### 14.2 Batch Processing
 ```python
 class BatchProcessor:
    """Procesar múltiples archivos en paralelo"""
    def process_batch(self, files: List[str], max_workers: int = 4) -> List[dict]:
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = {executor.submit(self.process_single, f): f for f in files}
            results = []
            for future in as_completed(futures):
                results.append(future.result())
            return results
 ```
 ### 14.3 Streaming de Archivos Grandes
 ```python
 def stream_process(file_path: str, chunk_size: int = 1024*1024):
    """Procesar archivos grandes en streaming"""
    with open(file_path, 'rb') as f:
        while chunk := f.read(chunk_size):
            yield process_chunk(chunk)
 ```
 ---
 ## 🔒 15. Seguridad Adicional
 ### 15.1 Validación de Archivos
 ```python
 # services/security/
 ├── __init__.py
 ├── file_validator.py       # Validación de archivos
 ├── malware_scanner.py      # Escaneo de malware
 └── rate_limiter.py         # Rate limiting por IP
 ```
 ### 15.2 Checks de Seguridad
 - [ ] Validar tipos MIME reales (no solo extensiones)
 - [ ] Limitar tamaño máximo de archivo
 - [ ] Sanitizar nombres de archivo
 - [ ] Escanear con ClamAV
 - [ ] Rate limiting por usuario/IP
 - [ ] Logs de auditoría
 ---
 ## 📝 16. CLI Mejorado
 ### Estado Actual
 - Solo comandos básicos en `main.py`
 ### Recomendaciones
 ```python
 # cli/
 ├── __init__.py
 ├── main.py                 # Click/Typer app
 ├── commands/
 │   ├── process.py          # cbcfacil process audio.mp3
 │   ├── queue.py            # cbcfacil queue list/stats
 │   ├── config.py           # cbcfacil config show/set
 │   └── db.py               # cbcfacil db migrate/seed
 ```
 #### Ejemplo con Typer
 ```python
 import typer
 app = typer.Typer()
@app.command()
 def process(
    file: str,
    output_dir: str = ".",
    format: str = "docx",
    ai_provider: str = "auto"
 ):
    """Procesar archivo de audio o PDF"""
    ...
@app.command()
 def status():
    """Mostrar estado del servicio"""
    ...
@app.command()
 def queue(action: str):
    """Gestionar cola de procesamiento"""
    ...
 ```
 ---
 ## 📁 Resumen de Nuevos Archivos/Directorios
 ```
 cbc/
 ├── tests/
 │   ├── unit/
 │   ├── integration/
 │   ├── e2e/
 │   └── fixtures/
 ├── frontend/
 │   ├── src/
 │   └── public/
 ├── services/
 │   ├── queue/
 │   ├── cache/
 │   ├── notifications/
 │   ├── observability/
 │   └── security/
 ├── api/
 │   ├── auth/
 │   ├── websocket/
 │   └── openapi/
 ├── storage/
 │   └── models/
 ├── processors/
 │   └── video_processor.py
 ├── plugins/
 ├── cli/
 ├── docker-compose.yml
 ├── prometheus.yml
 └── grafana/
 ```
 ---
 ## ✅ Checklist de Implementación
 ### Fase 1 - Fundamentos (2-3 semanas)
 - [ ] Implementar tests unitarios básicos
 - [ ] Agregar autenticación JWT
 - [ ] Migrar a base de datos SQLite
 ### Fase 2 - Mejoras Core (3-4 semanas)
 - [ ] Implementar sistema de colas con Celery
 - [ ] Agregar WebSockets
 - [ ] Crear dashboard frontend básico
 ### Fase 3 - Observabilidad (1-2 semanas)
 - [ ] Prometheus metrics
 - [ ] Grafana dashboards
 - [ ] Logging estructurado
 ### Fase 4 - Extensiones (2-3 semanas)
 - [ ] Video processor
 - [ ] Multi-canal de notificaciones
 - [ ] Sistema de plugins
 ### Fase 5 - Producción (2 semanas)
 - [ ] Docker compose completo
 - [ ] CI/CD pipeline
 - [ ] Documentación completa
 ---
 *Documento generado por análisis exhaustivo del proyecto CBCFacil v9*
--- a/templates/index.html
+++ b/templates/index.html
--- a/todo.md
+++ b/todo.md