Actualización: cambios varios y archivos nuevos

- Modificaciones en main.py - Agregado kubectl - Agregado plus.md y todo.md
feat(dashboard): agregar panel de versiones y corregir carga de transcripciones
2026-01-10 19:25:37 +00:00 · 2026-01-10 19:18:14 +00:00
6 changed files with 3652 additions and 47 deletions
--- a/api/routes.py
+++ b/api/routes.py
@@ -2,6 +2,7 @@
 Flask API routes for CBCFacil dashboard
 """
 import os
+import time
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, Any, List
@@ -9,14 +10,20 @@ from flask import Flask, render_template, request, jsonify, send_from_directory
 from flask_cors import CORS

 from config import settings
-from storage import processed_registry
+from storage.processed_registry import processed_registry
 from services.webdav_service import webdav_service
 from services import vram_manager
+from document.generators import DocumentGenerator


 def create_app() -> Flask:
    """Create and configure Flask application"""
-    app = Flask(__name__)
+    # Get the project root directory (parent of api/)
+    current_dir = Path(__file__).parent
+    project_root = current_dir.parent
+    template_dir = project_root / 'templates'
+
+    app = Flask(__name__, template_folder=str(template_dir))
    CORS(app)

    # Configure app
@@ -157,6 +164,60 @@ def create_app() -> Flask:
            app.logger.error(f"Error downloading file: {e}")
            return jsonify({'error': 'File not found'}), 404

+    @app.route('/downloads/find-file')
+    def find_and_download_file():
+        """Find and download file with various name variants"""
+        try:
+            filename = request.args.get('filename', '')
+            ext = request.args.get('ext', 'txt')
+            
+            if not filename:
+                return jsonify({'error': 'Filename required'}), 400
+            
+            # Validate to prevent path traversal
+            if '..' in filename or filename.startswith('/'):
+                return jsonify({'error': 'Invalid filename'}), 400
+            
+            # Try various name variants
+            base_name = filename.replace('_unificado', '').replace('_unified', '')
+            name_variants = [
+                f"{base_name}.{ext}",
+                f"{base_name}_unificado.{ext}",
+                f"{base_name}_unified.{ext}",
+                f"{base_name.replace(' ', '_')}.{ext}",
+                f"{base_name.replace(' ', '_')}_unificado.{ext}",
+            ]
+            
+            # Directories to search
+            directories = [
+                settings.LOCAL_DOWNLOADS_PATH,
+                settings.LOCAL_DOCX
+            ]
+            
+            # Search for file
+            for directory in directories:
+                if not directory.exists():
+                    continue
+                for variant in name_variants:
+                    file_path = directory / variant
+                    if file_path.exists():
+                        # Determinar mimetype para que se abra en el navegador
+                        mimetype = None
+                        if ext == 'pdf':
+                            mimetype = 'application/pdf'
+                        elif ext == 'md':
+                            mimetype = 'text/markdown'
+                        elif ext == 'txt':
+                            mimetype = 'text/plain'
+                        # as_attachment=False para abrir en navegador, no descargar
+                        return send_from_directory(str(directory), variant, as_attachment=False, mimetype=mimetype)
+            
+            return jsonify({'error': f'File not found: {filename}.{ext}'}), 404
+            
+        except Exception as e:
+            app.logger.error(f"Error finding file: {e}")
+            return jsonify({'error': 'File not found'}), 404
+
    @app.route('/health')
    def health_check():
        """Health check endpoint"""
@@ -174,11 +235,316 @@ def create_app() -> Flask:
            }
        })

+    @app.route('/api/transcription/<filename>')
+    def get_transcription(filename: str):
+        """Get transcription content for a specific file"""
+        try:
+            # Validate filename to prevent path traversal
+            if '..' in filename or filename.startswith('/'):
+                return jsonify({
+                    'success': False,
+                    'message': 'Invalid filename'
+                }), 400
+
+            # Extract base name without extension (handle .mp3, .wav, .txt, etc.)
+            base_name = Path(filename).stem
+            
+            # Construct file path for transcription
+            file_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt"
+
+            # Check if file exists
+            if not file_path.exists():
+                return jsonify({
+                    'success': False,
+                    'message': f'Transcription file not found: {base_name}.txt'
+                }), 404
+
+            # Read file content
+            with open(file_path, 'r', encoding='utf-8') as f:
+                transcription_text = f.read()
+
+            # Calculate statistics
+            word_count = len(transcription_text.split())
+            char_count = len(transcription_text)
+
+            return jsonify({
+                'success': True,
+                'filename': filename,
+                'transcription': transcription_text,
+                'file_path': str(file_path),
+                'word_count': word_count,
+                'char_count': char_count
+            })
+
+        except Exception as e:
+            app.logger.error(f"Error reading transcription: {e}")
+            return jsonify({
+                'success': False,
+                'message': f"Error reading transcription: {str(e)}"
+            }), 500
+
+    @app.route('/api/summary/<filename>')
+    def get_summary(filename: str):
+        """Get summary content for a specific file"""
+        try:
+            # Validate filename to prevent path traversal
+            if '..' in filename or filename.startswith('/'):
+                return jsonify({
+                    'success': False,
+                    'message': 'Invalid filename'
+                }), 400
+
+            # Extract base name without extension (handle .mp3, .wav, etc.)
+            base_name = Path(filename).stem
+            # Also remove _unificado/_unified suffixes if present
+            base_name = base_name.replace('_unificado', '').replace('_unified', '')
+
+            # Try different file path variants
+            possible_paths = [
+                settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unificado.md",
+                settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unified.md",
+                settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.md",
+            ]
+
+            file_path = None
+            for path in possible_paths:
+                if path.exists():
+                    file_path = path
+                    break
+
+            if not file_path:
+                return jsonify({
+                    'success': False,
+                    'message': f'Summary file not found for: {filename}'
+                }), 404
+
+            # Read file content
+            with open(file_path, 'r', encoding='utf-8') as f:
+                summary_text = f.read()
+
+            # Get available formats
+            formats_available = get_available_formats(base_name)
+
+            return jsonify({
+                'success': True,
+                'filename': base_name,
+                'summary': summary_text,
+                'file_path': str(file_path),
+                'formats_available': formats_available
+            })
+
+        except Exception as e:
+            app.logger.error(f"Error reading summary: {e}")
+            return jsonify({
+                'success': False,
+                'message': f"Error reading summary: {str(e)}"
+            }), 500
+
+    @app.route('/api/versions/<filename>')
+    def get_versions(filename: str):
+        """Get all summary versions for a file"""
+        try:
+            # Validate filename
+            if '..' in filename or filename.startswith('/'):
+                return jsonify({'success': False, 'message': 'Invalid filename'}), 400
+            
+            # Extract base name
+            base_name = Path(filename).stem
+            base_name = base_name.replace('_unificado', '').replace('_unified', '')
+            
+            versions = []
+            downloads_path = settings.LOCAL_DOWNLOADS_PATH
+            docx_path = settings.LOCAL_DOCX
+            
+            # Check for transcription (original)
+            txt_path = downloads_path / f"{base_name}.txt"
+            if txt_path.exists():
+                stat = txt_path.stat()
+                versions.append({
+                    'type': 'transcription',
+                    'label': '📝 Transcripción Original',
+                    'filename': txt_path.name,
+                    'path': f"/downloads/find-file?filename={base_name}&ext=txt",
+                    'date': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M'),
+                    'size': f"{stat.st_size / 1024:.1f} KB"
+                })
+            
+            # Check for summary versions (md, docx, pdf)
+            summary_patterns = [
+                (f"{base_name}_unificado.md", "📋 Resumen MD"),
+                (f"{base_name}_unificado.docx", "📄 Documento DOCX"),
+                (f"{base_name}_unificado.pdf", "📑 PDF"),
+            ]
+            
+            for pattern, label in summary_patterns:
+                # Check downloads path
+                file_path = downloads_path / pattern
+                if not file_path.exists():
+                    file_path = docx_path / pattern
+                
+                if file_path.exists():
+                    stat = file_path.stat()
+                    ext = file_path.suffix[1:]  # Remove the dot
+                    versions.append({
+                        'type': 'summary',
+                        'label': label,
+                        'filename': pattern,
+                        'path': f"/downloads/find-file?filename={base_name}&ext={ext}",
+                        'date': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M'),
+                        'size': f"{stat.st_size / 1024:.1f} KB"
+                    })
+            
+            # Sort by date descending
+            versions.sort(key=lambda x: x['date'], reverse=True)
+            
+            return jsonify({
+                'success': True,
+                'base_name': base_name,
+                'versions': versions,
+                'count': len(versions)
+            })
+            
+        except Exception as e:
+            app.logger.error(f"Error getting versions: {e}")
+            return jsonify({'success': False, 'message': str(e)}), 500
+
+    @app.route('/api/regenerate-summary', methods=['POST'])
+    def regenerate_summary():
+        """Regenerate summary from existing transcription"""
+        start_time = time.time()
+
+        try:
+            data = request.get_json()
+            filename = data.get('filename')
+            custom_prompt = data.get('custom_prompt')
+
+            if not filename:
+                return jsonify({
+                    'success': False,
+                    'message': 'Filename is required'
+                }), 400
+
+            # Validate filename to prevent path traversal
+            if '..' in filename or filename.startswith('/'):
+                return jsonify({
+                    'success': False,
+                    'message': 'Invalid filename'
+                }), 400
+
+            # Get base name (remove extension if present)
+            base_name = Path(filename).stem
+
+            # Read transcription from .txt file
+            transcription_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt"
+
+            if not transcription_path.exists():
+                # Try without .txt extension if already included
+                transcription_path = settings.LOCAL_DOWNLOADS_PATH / filename
+                if not transcription_path.exists():
+                    return jsonify({
+                        'success': False,
+                        'message': f'Transcription file not found for: {filename}'
+                    }), 404
+
+            with open(transcription_path, 'r', encoding='utf-8') as f:
+                transcription_text = f.read()
+
+            # Generate new summary using DocumentGenerator
+            doc_generator = DocumentGenerator()
+            success, new_summary, metadata = doc_generator.generate_summary(
+                transcription_text,
+                base_name
+            )
+
+            if not success:
+                return jsonify({
+                    'success': False,
+                    'message': 'Failed to generate summary'
+                }), 500
+
+            # Upload to WebDAV if configured
+            files_updated = []
+            if settings.has_webdav_config:
+                try:
+                    # Upload markdown
+                    if 'markdown_path' in metadata:
+                        md_path = Path(metadata['markdown_path'])
+                        if md_path.exists():
+                            remote_md_path = f"{settings.REMOTE_TXT_FOLDER}/{md_path.name}"
+                            webdav_service.upload(str(md_path), remote_md_path)
+                            files_updated.append(remote_md_path)
+
+                    # Upload DOCX
+                    if 'docx_path' in metadata:
+                        docx_path = Path(metadata['docx_path'])
+                        if docx_path.exists():
+                            remote_docx_path = f"{settings.DOCX_FOLDER}/{docx_path.name}"
+                            webdav_service.upload(str(docx_path), remote_docx_path)
+                            files_updated.append(remote_docx_path)
+
+                    # Upload PDF if available
+                    if 'pdf_path' in metadata:
+                        pdf_path = Path(metadata['pdf_path'])
+                        if pdf_path.exists():
+                            remote_pdf_path = f"{settings.REMOTE_PDF_FOLDER}/{pdf_path.name}"
+                            webdav_service.upload(str(pdf_path), remote_pdf_path)
+                            files_updated.append(remote_pdf_path)
+
+                except Exception as e:
+                    app.logger.warning(f"WebDAV upload failed: {e}")
+                    # Continue even if upload fails
+
+            processing_time = time.time() - start_time
+
+            return jsonify({
+                'success': True,
+                'message': 'Summary regenerated successfully',
+                'new_summary': new_summary,
+                'files_updated': files_updated,
+                'processing_time': f"{processing_time:.2f}s",
+                'metadata': metadata
+            })
+
+        except Exception as e:
+            app.logger.error(f"Error regenerating summary: {e}")
+            return jsonify({
+                'success': False,
+                'message': f"Error regenerating summary: {str(e)}"
+            }), 500
+
+    @app.route('/api/files-detailed')
+    def get_files_detailed():
+        """Get detailed list of files with transcription and summary info"""
+        try:
+            files = get_audio_files_detailed()
+
+            # Calculate statistics
+            total = len(files)
+            with_transcription = sum(1 for f in files if f['has_transcription'])
+            with_summary = sum(1 for f in files if f['has_summary'])
+
+            return jsonify({
+                'success': True,
+                'files': files,
+                'total': total,
+                'with_transcription': with_transcription,
+                'with_summary': with_summary
+            })
+
+        except Exception as e:
+            app.logger.error(f"Error getting detailed files: {e}")
+            return jsonify({
+                'success': False,
+                'message': f"Error: {str(e)}"
+            }), 500
+
    return app


 def get_audio_files() -> List[Dict[str, Any]]:
    """Get list of audio files from WebDAV and local"""
+    import logging
+    logger = logging.getLogger(__name__)
    files = []

    # Get files from WebDAV
@@ -202,7 +568,7 @@ def get_audio_files() -> List[Dict[str, Any]]:
                        'available_formats': get_available_formats(base_name)
                    })
        except Exception as e:
-            app.logger.error(f"Error getting WebDAV files: {e}")
+            logger.warning(f"Error getting WebDAV files: {e}")

    # Get local files
    try:
@@ -222,13 +588,113 @@ def get_audio_files() -> List[Dict[str, Any]]:
                        'available_formats': get_available_formats(file_path.name)
                    })
    except Exception as e:
-        app.logger.error(f"Error getting local files: {e}")
+        logger.error(f"Error getting local files: {e}")

-    # Remove duplicates (WebDAV takes precedence)
+    # Remove duplicates (keep both local and webdav - distinguish by source)
    unique_files = {}
    for file in files:
-        key = file['filename']
-        if key not in unique_files or file['source'] == 'webdav':
+        # Use (filename, source) as key to keep both local and webdav files
+        key = (file['filename'], file['source'])
+        unique_files[key] = file
+
+    return sorted(unique_files.values(), key=lambda x: (x['source'], x['filename']))
+
+
+def get_audio_files_detailed() -> List[Dict[str, Any]]:
+    """Get detailed list of audio files with transcription and summary information"""
+    files = []
+
+    # Get local audio files only for detailed view
+    try:
+        if settings.LOCAL_DOWNLOADS_PATH.exists():
+            for ext in settings.AUDIO_EXTENSIONS:
+                for file_path in settings.LOCAL_DOWNLOADS_PATH.glob(f"*{ext}"):
+                    stat = file_path.stat()
+                    filename = file_path.name
+                    base_name = file_path.stem
+
+                    # Check for transcription
+                    transcription_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt"
+                    has_transcription = transcription_path.exists()
+                    transcription_words = 0
+
+                    if has_transcription:
+                        try:
+                            with open(transcription_path, 'r', encoding='utf-8') as f:
+                                transcription_text = f.read()
+                                transcription_words = len(transcription_text.split())
+                        except Exception:
+                            pass
+
+                    # Check for summary and formats
+                    formats = get_available_formats(filename)
+                    has_summary = formats.get('md', False)
+
+                    # Get summary path
+                    summary_path = None
+                    if has_summary:
+                        summary_variants = [
+                            settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unificado.md",
+                            settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unified.md",
+                            settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.md",
+                        ]
+                        for variant in summary_variants:
+                            if variant.exists():
+                                summary_path = str(variant)
+                                break
+
+                    files.append({
+                        'filename': filename,
+                        'base_name': base_name,
+                        'audio_path': str(file_path),
+                        'has_transcription': has_transcription,
+                        'transcription_path': str(transcription_path) if has_transcription else None,
+                        'transcription_words': transcription_words,
+                        'has_summary': has_summary,
+                        'summary_path': summary_path,
+                        'formats': formats,
+                        'processed': processed_registry.is_processed(filename),
+                        'last_modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
+                        'size': format_size(stat.st_size)
+                    })
+    except Exception as e:
+        pass  # Error logged in endpoint
+
+    # Get WebDAV files
+    if settings.has_webdav_config:
+        try:
+            webdav_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER)
+            for file_path in webdav_files:
+                normalized_path = webdav_service.normalize_path(file_path)
+                base_name = Path(normalized_path).stem
+
+                if any(normalized_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
+                    # Check if already in local files
+                    if not any(f['base_name'] == base_name for f in files):
+                        formats = get_available_formats(base_name)
+
+                        files.append({
+                            'filename': Path(normalized_path).name,
+                            'base_name': base_name,
+                            'audio_path': normalized_path,
+                            'has_transcription': formats.get('txt', False),
+                            'transcription_path': None,
+                            'transcription_words': 0,
+                            'has_summary': formats.get('md', False),
+                            'summary_path': None,
+                            'formats': formats,
+                            'processed': processed_registry.is_processed(normalized_path),
+                            'last_modified': 'Unknown',
+                            'size': 'Unknown'
+                        })
+        except Exception as e:
+            pass  # Error logged in endpoint
+
+    # Remove duplicates and sort
+    unique_files = {}
+    for file in files:
+        key = file['base_name']
+        if key not in unique_files:
            unique_files[key] = file

    return sorted(unique_files.values(), key=lambda x: x['filename'])
--- a/BIN
+++ b/BIN
--- a/main.py
+++ b/main.py
@@ -9,10 +9,15 @@ import time
 import fcntl
 import os
 import json
+import threading
 from pathlib import Path
 from datetime import datetime
 from typing import Optional

+# Load environment variables from .env file
+from dotenv import load_dotenv
+load_dotenv()
+
 # Configure logging with JSON formatter for production
 class JSONFormatter(logging.Formatter):
    """JSON formatter for structured logging in production"""
@@ -234,6 +239,41 @@ def send_error_notification(error_type: str, error_message: str) -> None:
        logger.warning(f"Failed to send error notification: {e}")


+def run_dashboard_thread() -> None:
+    """Run Flask dashboard in a separate thread"""
+    try:
+        from api.routes import create_app
+        app = create_app()
+
+        # Run Flask in production mode with threaded=True
+        app.run(
+            host='0.0.0.0',
+            port=5000,
+            debug=False,
+            threaded=True,
+            use_reloader=False  # Important: disable reloader in thread
+        )
+    except Exception as e:
+        logger.error(f"Dashboard thread error: {e}")
+        logger.exception("Dashboard thread exception details")
+
+
+def start_dashboard() -> threading.Thread:
+    """Start dashboard in a background daemon thread"""
+    dashboard_port = int(os.getenv('DASHBOARD_PORT', '5000'))
+    logger.info(f"Starting dashboard on port {dashboard_port}...")
+
+    # Create daemon thread so it doesn't block shutdown
+    dashboard_thread = threading.Thread(
+        target=run_dashboard_thread,
+        name="DashboardThread",
+        daemon=True
+    )
+    dashboard_thread.start()
+    logger.info(f"Dashboard thread started (Thread-ID: {dashboard_thread.ident})")
+    return dashboard_thread
+
+
 def run_main_loop() -> None:
    """Main processing loop with improved error handling"""
    from config import settings
@@ -418,13 +458,19 @@ def run_main_loop() -> None:
 def main():
    """Main entry point"""
    lock_fd = None
+    dashboard_thread = None
    try:
        logger.info("=== CBCFacil Service Started ===")
        logger.info(f"Version: {os.getenv('APP_VERSION', '8.0')}")
        logger.info(f"Environment: {'production' if os.getenv('DEBUG', 'false').lower() != 'true' else 'development'}")
-        
+
        lock_fd = acquire_lock()
        initialize_services()
+
+        # Start dashboard in background thread
+        dashboard_thread = start_dashboard()
+
+        # Run main processing loop
        run_main_loop()
        
    except KeyboardInterrupt:
--- a/plus.md
+++ b/plus.md
@@ -0,0 +1,799 @@
+# 🚀 CBCFacil - Mejoras y Extensiones Recomendadas
+
+Documento con recomendaciones para hacer el proyecto más complejo, robusto y profesional.
+
+---
+
+## 📋 Resumen Ejecutivo
+
+Después de analizar todo el proyecto, identifiqué las siguientes áreas principales de mejora:
+
+| Área | Prioridad | Complejidad | Estado Actual |
+|------|-----------|-------------|---------------|
+| Testing | 🔴 Alta | Media | Solo `conftest.py` existe |
+| Frontend Dashboard | 🔴 Alta | Alta | Template básico sin JS |
+| Sistema de Colas | 🟡 Media | Alta | Loop síncrono simple |
+| Autenticación API | 🔴 Alta | Media | Sin autenticación |
+| Base de Datos | 🟡 Media | Media | Solo archivo TXT |
+| Métricas/Observabilidad | 🟡 Media | Media | Básico |
+| Video Processor | 🟢 Baja | Alta | No existe |
+| WebSockets | 🟢 Baja | Media | No existe |
+| Internacionalización | 🟢 Baja | Baja | Solo español |
+
+---
+
+## 🧪 1. Testing Completo (CRÍTICO)
+
+### Estado Actual
+- Solo existe `tests/conftest.py` y `tests/__init__.py`
+- No hay tests unitarios ni de integración implementados
+- Arquitectura mencionada en `ARCHITECTURE.md` indica ~60% cobertura (falso)
+
+### Recomendaciones
+
+#### 1.1 Tests Unitarios
+```
+tests/
+├── unit/
+│   ├── test_settings.py          # Validar configuración
+│   ├── test_validators.py        # Validar validators.py
+│   ├── test_result.py            # Patrón Result
+│   ├── test_exceptions.py        # Excepciones personalizadas
+│   ├── test_bloom_filter.py      # BloomFilter en registry
+│   ├── test_token_bucket.py      # Rate limiter
+│   └── test_circuit_breaker.py   # Circuit breaker
+```
+
+#### 1.2 Tests de Integración
+```
+tests/
+├── integration/
+│   ├── test_webdav_service.py    # Mock de Nextcloud
+│   ├── test_telegram_service.py  # Mock de Telegram API
+│   ├── test_ai_providers.py      # Mock de APIs AI
+│   ├── test_audio_processor.py   # Con audio de prueba
+│   ├── test_pdf_processor.py     # Con PDF de prueba
+│   └── test_document_generator.py
+```
+
+#### 1.3 Tests E2E
+```
+tests/
+├── e2e/
+│   ├── test_full_audio_workflow.py
+│   ├── test_full_pdf_workflow.py
+│   └── test_api_endpoints.py
+```
+
+#### 1.4 Fixtures de Prueba
+```python
+# tests/fixtures/
+# - sample_audio.mp3 (5 segundos de audio en español)
+# - sample_pdf.pdf (2 páginas con texto)
+# - expected_transcription.txt
+# - expected_summary.md
+```
+
+---
+
+## 🖥️ 2. Dashboard Frontend Completo
+
+### Estado Actual
+- Solo existe `templates/` con un archivo básico
+- API REST sin interfaz visual
+- Sin JavaScript interactivo
+
+### Recomendaciones
+
+#### 2.1 Estructura Frontend
+```
+frontend/
+├── src/
+│   ├── components/
+│   │   ├── FileList.js           # Lista de archivos
+│   │   ├── FileCard.js           # Tarjeta individual
+│   │   ├── ProcessingStatus.js   # Estado en tiempo real
+│   │   ├── GPUMonitor.js         # Monitor VRAM
+│   │   ├── QueueViewer.js        # Cola de procesamiento
+│   │   └── NotificationBell.js   # Notificaciones
+│   ├── pages/
+│   │   ├── Dashboard.js          # Vista principal
+│   │   ├── Files.js              # Gestión de archivos
+│   │   ├── Settings.js           # Configuración
+│   │   └── Logs.js               # Visor de logs
+│   └── services/
+│       ├── api.js                # Cliente API
+│       └── websocket.js          # Conexión WS
+├── public/
+│   └── index.html
+└── package.json
+```
+
+#### 2.2 Funcionalidades
+- [ ] Drag & drop para subir archivos
+- [ ] Preview de PDFs y audio
+- [ ] Visor de transcripciones lado a lado
+- [ ] Editor de resúmenes con Markdown preview
+- [ ] Gráficas de uso de GPU/CPU
+- [ ] Historial de procesamiento
+- [ ] Búsqueda en contenido
+- [ ] Dark mode / Light mode
+
+---
+
+## 📬 3. Sistema de Colas (Celery/RQ)
+
+### Estado Actual
+- Loop infinito síncrono en `main.py`
+- Sin priorización de tareas
+- Sin reintentos configurables
+- Sin distribución de carga
+
+### Recomendaciones
+
+#### 3.1 Implementar Celery
+```python
+# services/queue/
+├── __init__.py
+├── celery_app.py           # Configuración Celery
+├── tasks/
+│   ├── __init__.py
+│   ├── audio_tasks.py      # Tareas de audio
+│   ├── pdf_tasks.py        # Tareas de PDF
+│   └── notification_tasks.py
+└── workers/
+    └── worker_config.py
+```
+
+#### 3.2 Estructura de Tareas
+```python
+# tasks/audio_tasks.py
+from celery import shared_task
+
+@shared_task(bind=True, max_retries=3, default_retry_delay=60)
+def process_audio(self, file_path: str, options: dict) -> dict:
+    """Procesar audio con reintentos automáticos"""
+    ...
+
+@shared_task
+def transcribe_audio(file_path: str) -> str:
+    """Transcribir audio con Whisper"""
+    ...
+
+@shared_task
+def generate_summary(transcription: str, base_name: str) -> dict:
+    """Generar resumen con IA"""
+    ...
+```
+
+#### 3.3 Prioridades de Cola
+- `high`: Archivos pequeños (<10MB)
+- `default`: Archivos normales
+- `low`: Archivos grandes (>100MB)
+
+---
+
+## 🔐 4. Autenticación y Autorización
+
+### Estado Actual
+- API completamente abierta
+- Sin manejo de sesiones
+- Sin roles de usuario
+
+### Recomendaciones
+
+#### 4.1 Implementar JWT
+```python
+# api/auth/
+├── __init__.py
+├── jwt_handler.py          # Generación/validación JWT
+├── middleware.py           # Middleware de autenticación
+├── decorators.py           # @require_auth, @require_admin
+└── models.py               # User, Role, Permission
+```
+
+#### 4.2 Endpoints de Auth
+```python
+# api/routes_auth.py
+POST /api/auth/login        # Login con usuario/password
+POST /api/auth/refresh      # Refrescar token
+POST /api/auth/logout       # Invalidar token
+GET  /api/auth/me           # Perfil del usuario
+```
+
+#### 4.3 Roles Sugeridos
+- `admin`: Acceso completo
+- `processor`: Puede procesar archivos
+- `viewer`: Solo lectura
+- `api`: Acceso solo API (para integraciones)
+
+---
+
+## 🗄️ 5. Base de Datos (SQLite/PostgreSQL)
+
+### Estado Actual
+- Solo `processed_files.txt` como registro
+- Sin historial de procesamiento
+- Sin metadatos de archivos
+
+### Recomendaciones
+
+#### 5.1 Modelos de Base de Datos
+```python
+# storage/models/
+├── __init__.py
+├── base.py                 # SQLAlchemy base
+├── file.py                 # Modelo File
+├── processing_job.py       # Modelo ProcessingJob
+├── user.py                 # Modelo User
+└── audit_log.py           # Modelo AuditLog
+```
+
+#### 5.2 Esquema Propuesto
+```sql
+-- files
+CREATE TABLE files (
+    id SERIAL PRIMARY KEY,
+    filename VARCHAR(255) NOT NULL,
+    original_path TEXT,
+    file_type VARCHAR(20),
+    file_size BIGINT,
+    checksum VARCHAR(64),
+    status VARCHAR(20) DEFAULT 'pending',
+    created_at TIMESTAMP DEFAULT NOW(),
+    updated_at TIMESTAMP DEFAULT NOW()
+);
+
+-- processing_jobs
+CREATE TABLE processing_jobs (
+    id SERIAL PRIMARY KEY,
+    file_id INTEGER REFERENCES files(id),
+    job_type VARCHAR(50),
+    status VARCHAR(20),
+    started_at TIMESTAMP,
+    completed_at TIMESTAMP,
+    error_message TEXT,
+    result_path TEXT,
+    metadata JSONB
+);
+
+-- audit_logs
+CREATE TABLE audit_logs (
+    id SERIAL PRIMARY KEY,
+    user_id INTEGER,
+    action VARCHAR(50),
+    resource_type VARCHAR(50),
+    resource_id INTEGER,
+    details JSONB,
+    timestamp TIMESTAMP DEFAULT NOW()
+);
+```
+
+---
+
+## 📊 6. Métricas y Observabilidad
+
+### Estado Actual
+- `services/metrics_collector.py` básico
+- Sin exportación a sistemas externos
+- Sin dashboards de monitoreo
+
+### Recomendaciones
+
+#### 6.1 Prometheus Metrics
+```python
+# services/observability/
+├── __init__.py
+├── prometheus_exporter.py   # Endpoint /metrics
+├── metrics.py              # Definición de métricas
+└── tracing.py              # Tracing distribuido
+```
+
+#### 6.2 Métricas a Implementar
+```python
+from prometheus_client import Counter, Histogram, Gauge
+
+# Contadores
+files_processed_total = Counter('files_processed_total', 'Total files processed', ['type', 'status'])
+ai_requests_total = Counter('ai_requests_total', 'AI API requests', ['provider', 'operation'])
+
+# Histogramas
+processing_duration = Histogram('processing_duration_seconds', 'Processing time', ['type'])
+ai_response_time = Histogram('ai_response_time_seconds', 'AI response time', ['provider'])
+
+# Gauges
+active_jobs = Gauge('active_jobs', 'Currently processing jobs')
+vram_usage = Gauge('vram_usage_bytes', 'GPU memory usage')
+queue_size = Gauge('queue_size', 'Jobs in queue', ['priority'])
+```
+
+#### 6.3 Integración
+- [ ] Grafana dashboard preconfigurado
+- [ ] Alertas con AlertManager
+- [ ] Logs estructurados con Loki
+- [ ] Tracing con Jaeger/Zipkin
+
+---
+
+## 🎬 7. Video Processor (NUEVO)
+
+### Recomendaciones
+
+#### 7.1 Estructura
+```python
+# processors/video_processor.py
+class VideoProcessor(FileProcessor):
+    """Processor for video files"""
+    
+    def extract_audio(self, video_path: str) -> str:
+        """Extraer audio de video con ffmpeg"""
+        ...
+    
+    def extract_frames(self, video_path: str, interval: int = 60) -> List[str]:
+        """Extraer frames cada N segundos para análisis"""
+        ...
+    
+    def analyze_frames(self, frames: List[str]) -> Dict[str, Any]:
+        """Analizar frames con visión AI (Gemini Vision)"""
+        ...
+    
+    def process(self, file_path: str) -> Dict[str, Any]:
+        """Pipeline completo: audio + frames + análisis"""
+        ...
+```
+
+#### 7.2 Extensiones de Video
+```python
+VIDEO_EXTENSIONS = {".mp4", ".avi", ".mkv", ".mov", ".webm"}
+```
+
+#### 7.3 Funcionalidades
+- [ ] Transcripción de audio del video
+- [ ] Extracción de frames clave
+- [ ] Análisis visual con IA (slides, pizarra)
+- [ ] Generación de índice por escenas
+- [ ] Subtítulos automáticos (SRT/VTT)
+
+---
+
+## 🔌 8. WebSockets para Tiempo Real
+
+### Estado Actual
+- Solo API REST
+- Sin actualizaciones en tiempo real
+- Polling pesado para estado
+
+### Recomendaciones
+
+#### 8.1 Implementación
+```python
+# api/websocket/
+├── __init__.py
+├── manager.py              # ConnectionManager
+├── events.py               # Tipos de eventos
+└── handlers.py             # Event handlers
+```
+
+#### 8.2 Eventos a Implementar
+```python
+# Eventos del servidor -> cliente
+{
+    "type": "file.processing_started",
+    "data": {"file_id": 1, "filename": "audio.mp3"}
+}
+{
+    "type": "file.processing_progress",
+    "data": {"file_id": 1, "progress": 45, "stage": "transcribing"}
+}
+{
+    "type": "file.processing_completed",
+    "data": {"file_id": 1, "result_path": "/path/to/result.docx"}
+}
+{
+    "type": "system.gpu_usage",
+    "data": {"vram_used": 4.5, "vram_total": 8.0}
+}
+```
+
+#### 8.3 Integración con Flask
+```python
+from flask_socketio import SocketIO, emit
+
+socketio = SocketIO(app, cors_allowed_origins="*")
+
+@socketio.on('connect')
+def handle_connect():
+    emit('connected', {'status': 'ok'})
+
+@socketio.on('subscribe')
+def handle_subscribe(data):
+    join_room(data['file_id'])
+```
+
+---
+
+## 🌐 9. API versioning y OpenAPI
+
+### Estado Actual
+- API sin versionado
+- Sin documentación OpenAPI/Swagger
+- Endpoints inconsistentes
+
+### Recomendaciones
+
+#### 9.1 Versionado de API
+```
+/api/v1/files
+/api/v1/process
+/api/v1/health
+/api/v2/files  (futura versión)
+```
+
+#### 9.2 OpenAPI Spec
+```python
+# api/openapi/
+├── spec.yaml               # Especificación OpenAPI 3.0
+└── swagger_ui.py           # Swagger UI integration
+
+# Usar flask-restx o flasgger
+from flask_restx import Api, Resource, fields
+
+api = Api(app, 
+    version='1.0', 
+    title='CBCFacil API',
+    description='API para procesamiento de documentos'
+)
+```
+
+---
+
+## 🐳 10. Containerización Mejorada
+
+### Estado Actual
+- `.dockerignore` existe pero no Dockerfile completo
+- Sin docker-compose
+- Sin multi-stage builds
+
+### Recomendaciones
+
+#### 10.1 Docker Multi-stage
+```dockerfile
+# Dockerfile
+FROM python:3.11-slim as builder
+WORKDIR /app
+COPY requirements.txt .
+RUN pip wheel --no-cache-dir -w /wheels -r requirements.txt
+
+FROM nvidia/cuda:12.1-runtime-ubuntu22.04 as runtime
+# ... instalación optimizada
+```
+
+#### 10.2 Docker Compose
+```yaml
+# docker-compose.yml
+version: '3.8'
+services:
+  app:
+    build: .
+    ports:
+      - "5000:5000"
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+  
+  redis:
+    image: redis:7-alpine
+    
+  celery-worker:
+    build: .
+    command: celery -A celery_app worker -l info
+    depends_on:
+      - redis
+  
+  prometheus:
+    image: prom/prometheus
+    volumes:
+      - ./prometheus.yml:/etc/prometheus/prometheus.yml
+  
+  grafana:
+    image: grafana/grafana
+    ports:
+      - "3000:3000"
+```
+
+---
+
+## 🔧 11. Unificación y Refactoring
+
+### 11.1 AIProvider Unificado
+Actualmente existe lógica duplicada entre:
+- `services/ai_service.py`
+- `services/ai/gemini_provider.py`
+- `services/ai/claude_provider.py`
+
+**Recomendación**: Crear interfaz unificada con Chain of Responsibility:
+```python
+class AIProviderChain:
+    """Cadena de proveedores con fallback automático"""
+    
+    def __init__(self, providers: List[AIProvider]):
+        self.providers = providers
+    
+    def generate(self, prompt: str) -> str:
+        for provider in self.providers:
+            try:
+                if provider.is_available():
+                    return provider.generate_text(prompt)
+            except Exception as e:
+                logging.warning(f"{provider.name} failed: {e}")
+        raise AllProvidersFailedError()
+```
+
+### 11.2 Procesadores Unificados
+Crear pipeline unificado:
+```python
+class ProcessingPipeline:
+    def __init__(self):
+        self.steps = []
+    
+    def add_step(self, processor: FileProcessor):
+        self.steps.append(processor)
+        return self
+    
+    def process(self, file_path: str) -> Dict[str, Any]:
+        result = {}
+        for step in self.steps:
+            if step.can_process(file_path):
+                result.update(step.process(file_path))
+        return result
+```
+
+---
+
+## 📱 12. Notificaciones Mejoradas
+
+### Estado Actual
+- Solo Telegram
+- Sin templates de mensajes
+- Sin notificaciones push
+
+### Recomendaciones
+
+#### 12.1 Multi-canal
+```python
+# services/notifications/
+├── __init__.py
+├── base_notifier.py        # Interface base
+├── telegram_notifier.py    # Actual optimizado
+├── email_notifier.py       # Nuevo
+├── slack_notifier.py       # Nuevo
+├── webhook_notifier.py     # Para integraciones
+└── notification_manager.py # Orquestador
+```
+
+#### 12.2 Templates de Mensaje
+```python
+TEMPLATES = {
+    "processing_started": "🎵 Procesando: {filename}\n⏱️ Estimado: {eta}",
+    "processing_completed": "✅ Completado: {filename}\n📄 Resumen: {summary_url}",
+    "processing_failed": "❌ Error en {filename}\n🔍 Detalles: {error}",
+    "daily_summary": "📊 Resumen del día:\n- Procesados: {count}\n- Tiempo total: {time}"
+}
+```
+
+---
+
+## 🗂️ 13. Sistema de Plugins
+
+### Recomendaciones
+
+```python
+# plugins/
+├── __init__.py
+├── base_plugin.py          # Interface de plugin
+├── plugin_manager.py       # Gestor de plugins
+└── examples/
+    ├── custom_ocr/         # Plugin OCR personalizado
+    ├── s3_storage/         # Plugin para AWS S3
+    └── discord_notifier/   # Plugin Discord
+```
+
+#### Interfaz de Plugin
+```python
+class BasePlugin(ABC):
+    """Base class for plugins"""
+    
+    @property
+    @abstractmethod
+    def name(self) -> str: ...
+    
+    @property
+    @abstractmethod
+    def version(self) -> str: ...
+    
+    @abstractmethod
+    def initialize(self, config: dict) -> None: ...
+    
+    @abstractmethod
+    def execute(self, context: dict) -> dict: ...
+    
+    @abstractmethod
+    def cleanup(self) -> None: ...
+```
+
+---
+
+## 📈 14. Mejoras de Rendimiento
+
+### 14.1 Caching Avanzado
+```python
+# services/cache/
+├── __init__.py
+├── cache_manager.py        # Gestor de cache
+├── redis_cache.py          # Cache en Redis
+└── file_cache.py           # Cache en disco
+```
+
+### 14.2 Batch Processing
+```python
+class BatchProcessor:
+    """Procesar múltiples archivos en paralelo"""
+    
+    def process_batch(self, files: List[str], max_workers: int = 4) -> List[dict]:
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = {executor.submit(self.process_single, f): f for f in files}
+            results = []
+            for future in as_completed(futures):
+                results.append(future.result())
+            return results
+```
+
+### 14.3 Streaming de Archivos Grandes
+```python
+def stream_process(file_path: str, chunk_size: int = 1024*1024):
+    """Procesar archivos grandes en streaming"""
+    with open(file_path, 'rb') as f:
+        while chunk := f.read(chunk_size):
+            yield process_chunk(chunk)
+```
+
+---
+
+## 🔒 15. Seguridad Adicional
+
+### 15.1 Validación de Archivos
+```python
+# services/security/
+├── __init__.py
+├── file_validator.py       # Validación de archivos
+├── malware_scanner.py      # Escaneo de malware
+└── rate_limiter.py         # Rate limiting por IP
+```
+
+### 15.2 Checks de Seguridad
+- [ ] Validar tipos MIME reales (no solo extensiones)
+- [ ] Limitar tamaño máximo de archivo
+- [ ] Sanitizar nombres de archivo
+- [ ] Escanear con ClamAV
+- [ ] Rate limiting por usuario/IP
+- [ ] Logs de auditoría
+
+---
+
+## 📝 16. CLI Mejorado
+
+### Estado Actual
+- Solo comandos básicos en `main.py`
+
+### Recomendaciones
+```python
+# cli/
+├── __init__.py
+├── main.py                 # Click/Typer app
+├── commands/
+│   ├── process.py          # cbcfacil process audio.mp3
+│   ├── queue.py            # cbcfacil queue list/stats
+│   ├── config.py           # cbcfacil config show/set
+│   └── db.py               # cbcfacil db migrate/seed
+```
+
+#### Ejemplo con Typer
+```python
+import typer
+
+app = typer.Typer()
+
+@app.command()
+def process(
+    file: str,
+    output_dir: str = ".",
+    format: str = "docx",
+    ai_provider: str = "auto"
+):
+    """Procesar archivo de audio o PDF"""
+    ...
+
+@app.command()
+def status():
+    """Mostrar estado del servicio"""
+    ...
+
+@app.command()
+def queue(action: str):
+    """Gestionar cola de procesamiento"""
+    ...
+```
+
+---
+
+## 📁 Resumen de Nuevos Archivos/Directorios
+
+```
+cbc/
+├── tests/
+│   ├── unit/
+│   ├── integration/
+│   ├── e2e/
+│   └── fixtures/
+├── frontend/
+│   ├── src/
+│   └── public/
+├── services/
+│   ├── queue/
+│   ├── cache/
+│   ├── notifications/
+│   ├── observability/
+│   └── security/
+├── api/
+│   ├── auth/
+│   ├── websocket/
+│   └── openapi/
+├── storage/
+│   └── models/
+├── processors/
+│   └── video_processor.py
+├── plugins/
+├── cli/
+├── docker-compose.yml
+├── prometheus.yml
+└── grafana/
+```
+
+---
+
+## ✅ Checklist de Implementación
+
+### Fase 1 - Fundamentos (2-3 semanas)
+- [ ] Implementar tests unitarios básicos
+- [ ] Agregar autenticación JWT
+- [ ] Migrar a base de datos SQLite
+
+### Fase 2 - Mejoras Core (3-4 semanas)
+- [ ] Implementar sistema de colas con Celery
+- [ ] Agregar WebSockets
+- [ ] Crear dashboard frontend básico
+
+### Fase 3 - Observabilidad (1-2 semanas)
+- [ ] Prometheus metrics
+- [ ] Grafana dashboards
+- [ ] Logging estructurado
+
+### Fase 4 - Extensiones (2-3 semanas)
+- [ ] Video processor
+- [ ] Multi-canal de notificaciones
+- [ ] Sistema de plugins
+
+### Fase 5 - Producción (2 semanas)
+- [ ] Docker compose completo
+- [ ] CI/CD pipeline
+- [ ] Documentación completa
+
+---
+
+*Documento generado por análisis exhaustivo del proyecto CBCFacil v9*
--- a/templates/index.html
+++ b/templates/index.html
--- a/todo.md
+++ b/todo.md