feat: Integración automática con Notion + análisis completo del código

- Instalado notion-client SDK oficial para integración robusta - Refactorizado services/notion_service.py con SDK oficial de Notion - Rate limiting con retry y exponential backoff - Parser Markdown → Notion blocks (headings, bullets, paragraphs) - Soporte para pages y databases - Manejo robusto de errores - Integración automática en document/generators.py - PDFs se suben automáticamente a Notion después de generarse - Contenido completo del resumen formateado con bloques - Metadata rica (tipo de archivo, path, fecha) - Configuración de Notion en main.py - Inicialización automática al arrancar el servicio - Validación de credenciales - Actualizado config/settings.py - Agregado load_dotenv() para cargar variables de .env - Configuración de Notion (NOTION_API, NOTION_DATABASE_ID) - Scripts de utilidad creados: - test_notion_integration.py: Test de subida a Notion - test_pipeline_notion.py: Test del pipeline completo - verify_notion_permissions.py: Verificación de permisos - list_notion_pages.py: Listar páginas accesibles - diagnose_notion.py: Diagnóstico completo - create_notion_database.py: Crear database automáticamente - restart_service.sh: Script de reinicio del servicio - Documentación completa en opus.md: - Análisis exhaustivo del codebase (42 archivos Python) - Bugs críticos identificados y soluciones - Mejoras de seguridad (autenticación, rate limiting, CORS, CSP) - Optimizaciones de rendimiento (Celery, Redis, PostgreSQL, WebSockets) - Plan de testing (estructura, ejemplos, 80% coverage goal) - Roadmap de implementación (6 sprints detallados) - Integración avanzada con Notion documentada Estado: Notion funcionando correctamente, PDFs se suben automáticamente
2026-01-26 17:26:50 +00:00
parent 47896fd50a
commit 6058dc642e
12 changed files with 3863 additions and 184 deletions
--- a/.env.example
+++ b/.env.example
@@ -40,6 +40,14 @@ GEMINI_CLI_PATH=/path/to/gemini  # or leave empty
 TELEGRAM_TOKEN=your_telegram_bot_token
 TELEGRAM_CHAT_ID=your_telegram_chat_id

+# =============================================================================
+# Notion Integration (Optional - for automatic PDF uploads)
+# =============================================================================
+# Get your token from: https://developers.notion.com/docs/create-a-notion-integration
+NOTION_API=ntn_YOUR_NOTION_INTEGRATION_TOKEN_HERE
+# Get your database ID from the database URL in Notion
+NOTION_DATABASE_ID=your_database_id_here
+
 # =============================================================================
 # Dashboard Configuration (Required for production)
 # =============================================================================
--- a/config/settings.py
+++ b/config/settings.py
@@ -1,13 +1,19 @@
 """
 Centralized configuration management for CBCFacil
 """
+
 import os
 from pathlib import Path
 from typing import Optional, Set, Union
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()


 class ConfigurationError(Exception):
    """Raised when configuration is invalid"""
+
    pass


@@ -44,7 +50,9 @@ class Settings:
    POLL_INTERVAL: int = int(os.getenv("POLL_INTERVAL", "5"))
    HTTP_TIMEOUT: int = int(os.getenv("HTTP_TIMEOUT", "30"))
    WEBDAV_MAX_RETRIES: int = int(os.getenv("WEBDAV_MAX_RETRIES", "3"))
-    DOWNLOAD_CHUNK_SIZE: int = int(os.getenv("DOWNLOAD_CHUNK_SIZE", "65536"))  # 64KB for better performance
+    DOWNLOAD_CHUNK_SIZE: int = int(
+        os.getenv("DOWNLOAD_CHUNK_SIZE", "65536")
+    )  # 64KB for better performance
    MAX_FILENAME_LENGTH: int = int(os.getenv("MAX_FILENAME_LENGTH", "80"))
    MAX_FILENAME_BASE_LENGTH: int = int(os.getenv("MAX_FILENAME_BASE_LENGTH", "40"))
    MAX_FILENAME_TOPICS_LENGTH: int = int(os.getenv("MAX_FILENAME_TOPICS_LENGTH", "20"))
@@ -57,7 +65,13 @@ class Settings:
    # AI Providers
    ZAI_BASE_URL: str = os.getenv("ZAI_BASE_URL", "https://api.z.ai/api/anthropic")
    ZAI_DEFAULT_MODEL: str = os.getenv("ZAI_MODEL", "glm-4.6")
-    ZAI_AUTH_TOKEN: Optional[str] = os.getenv("ANTHROPIC_AUTH_TOKEN") or os.getenv("ZAI_AUTH_TOKEN", "")
+    ZAI_AUTH_TOKEN: Optional[str] = os.getenv("ANTHROPIC_AUTH_TOKEN") or os.getenv(
+        "ZAI_AUTH_TOKEN", ""
+    )
+
+    # Notion Integration
+    NOTION_API_TOKEN: Optional[str] = os.getenv("NOTION_API")
+    NOTION_DATABASE_ID: Optional[str] = os.getenv("NOTION_DATABASE_ID")

    # Gemini
    GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
@@ -76,13 +90,25 @@ class Settings:
    CPU_COUNT: int = os.cpu_count() or 1
    PDF_MAX_PAGES_PER_CHUNK: int = int(os.getenv("PDF_MAX_PAGES_PER_CHUNK", "2"))
    PDF_DPI: int = int(os.getenv("PDF_DPI", "200"))
-    PDF_RENDER_THREAD_COUNT: int = int(os.getenv("PDF_RENDER_THREAD_COUNT", str(min(4, CPU_COUNT))))
+    PDF_RENDER_THREAD_COUNT: int = int(
+        os.getenv("PDF_RENDER_THREAD_COUNT", str(min(4, CPU_COUNT)))
+    )
    PDF_BATCH_SIZE: int = int(os.getenv("PDF_BATCH_SIZE", "2"))
-    PDF_TROCR_MAX_BATCH: int = int(os.getenv("PDF_TROCR_MAX_BATCH", str(PDF_BATCH_SIZE)))
-    PDF_TESSERACT_THREADS: int = int(os.getenv("PDF_TESSERACT_THREADS", str(max(1, min(2, max(1, CPU_COUNT // 3))))))
-    PDF_PREPROCESS_THREADS: int = int(os.getenv("PDF_PREPROCESS_THREADS", str(PDF_TESSERACT_THREADS)))
-    PDF_TEXT_DETECTION_MIN_RATIO: float = float(os.getenv("PDF_TEXT_DETECTION_MIN_RATIO", "0.6"))
-    PDF_TEXT_DETECTION_MIN_AVG_CHARS: int = int(os.getenv("PDF_TEXT_DETECTION_MIN_AVG_CHARS", "120"))
+    PDF_TROCR_MAX_BATCH: int = int(
+        os.getenv("PDF_TROCR_MAX_BATCH", str(PDF_BATCH_SIZE))
+    )
+    PDF_TESSERACT_THREADS: int = int(
+        os.getenv("PDF_TESSERACT_THREADS", str(max(1, min(2, max(1, CPU_COUNT // 3)))))
+    )
+    PDF_PREPROCESS_THREADS: int = int(
+        os.getenv("PDF_PREPROCESS_THREADS", str(PDF_TESSERACT_THREADS))
+    )
+    PDF_TEXT_DETECTION_MIN_RATIO: float = float(
+        os.getenv("PDF_TEXT_DETECTION_MIN_RATIO", "0.6")
+    )
+    PDF_TEXT_DETECTION_MIN_AVG_CHARS: int = int(
+        os.getenv("PDF_TEXT_DETECTION_MIN_AVG_CHARS", "120")
+    )

    # Error handling
    ERROR_THROTTLE_SECONDS: int = int(os.getenv("ERROR_THROTTLE_SECONDS", "600"))
@@ -90,7 +116,9 @@ class Settings:
    # GPU/VRAM Management
    MODEL_TIMEOUT_SECONDS: int = int(os.getenv("MODEL_TIMEOUT_SECONDS", "300"))
    CUDA_VISIBLE_DEVICES: str = os.getenv("CUDA_VISIBLE_DEVICES", "all")
-    PYTORCH_CUDA_ALLOC_CONF: str = os.getenv("PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:512")
+    PYTORCH_CUDA_ALLOC_CONF: str = os.getenv(
+        "PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:512"
+    )

    # GPU Detection (auto, nvidia, amd, cpu)
    GPU_PREFERENCE: str = os.getenv("GPU_PREFERENCE", "auto")
@@ -127,17 +155,29 @@ class Settings:
    @property
    def has_ai_config(self) -> bool:
        """Check if AI providers are configured"""
-        return any([
+        return any(
+            [
                self.ZAI_AUTH_TOKEN,
                self.GEMINI_API_KEY,
                self.CLAUDE_CLI_PATH,
-            self.GEMINI_CLI_PATH
-        ])
+                self.GEMINI_CLI_PATH,
+            ]
+        )
+
+    @property
+    def has_notion_config(self) -> bool:
+        """Check if Notion is configured"""
+        return bool(self.NOTION_API_TOKEN and self.NOTION_DATABASE_ID)

    @property
    def processed_files_path(self) -> Path:
        """Get the path to the processed files registry"""
-        return Path(os.getenv("PROCESSED_FILES_PATH", str(Path(self.LOCAL_STATE_DIR) / "processed_files.txt")))
+        return Path(
+            os.getenv(
+                "PROCESSED_FILES_PATH",
+                str(Path(self.LOCAL_STATE_DIR) / "processed_files.txt"),
+            )
+        )

    @property
    def nextcloud_url(self) -> str:
@@ -157,7 +197,9 @@ class Settings:
    def nextcloud_password(self) -> str:
        """Get Nextcloud password with validation"""
        if not self.NEXTCLOUD_PASSWORD and self.is_production:
-            raise ConfigurationError("NEXTCLOUD_PASSWORD is required in production mode")
+            raise ConfigurationError(
+                "NEXTCLOUD_PASSWORD is required in production mode"
+            )
        return self.NEXTCLOUD_PASSWORD

    @property
@@ -181,6 +223,7 @@ class Settings:
        """Check if GPU support is available"""
        try:
            import torch
+
            return torch.cuda.is_available()
        except ImportError:
            return False
@@ -203,7 +246,7 @@ class Settings:
            "telegram_configured": self.telegram_configured,
            "gpu_support": self.has_gpu_support,
            "cpu_count": self.CPU_COUNT,
-            "poll_interval": self.POLL_INTERVAL
+            "poll_interval": self.POLL_INTERVAL,
        }


--- a/create_notion_database.py
+++ b/create_notion_database.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""
+Script para crear una nueva base de datos de Notion y compartirla automáticamente
+"""
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from config import settings
+from notion_client import Client
+
+
+def main():
+    print("\n" + "=" * 70)
+    print("🛠️  CREAR BASE DE DATOS DE NOTION PARA CBCFACIL")
+    print("=" * 70 + "\n")
+
+    token = settings.NOTION_API_TOKEN
+    if not token:
+        print("❌ Token no configurado en .env")
+        return
+
+    client = Client(auth=token)
+
+    # Primero, buscar una página donde crear la database
+    print("🔍 Buscando páginas accesibles...\n")
+    results = client.search(page_size=100)
+    pages = [p for p in results.get("results", []) if p.get("object") == "page"]
+
+    if not pages:
+        print("❌ No tienes páginas accesibles.")
+        print("\n📋 SOLUCIÓN:")
+        print("1. Ve a Notion y crea una nueva página")
+        print("2. En esa página, click en 'Share'")
+        print("3. Busca y agrega tu integración")
+        print("4. Ejecuta este script nuevamente\n")
+        return
+
+    # Mostrar páginas disponibles
+    print(f"✅ Encontradas {len(pages)} página(s) accesibles:\n")
+    for i, page in enumerate(pages[:10], 1):
+        page_id = page.get("id")
+        props = page.get("properties", {})
+
+        # Intentar obtener el título
+        title = "Sin título"
+        for prop_name, prop_data in props.items():
+            if prop_data.get("type") == "title":
+                title_list = prop_data.get("title", [])
+                if title_list:
+                    title = title_list[0].get("plain_text", "Sin título")
+                break
+
+        print(f"{i}. {title[:50]}")
+        print(f"   ID: {page_id}\n")
+
+    # Usar la primera página accesible
+    parent_page = pages[0]
+    parent_id = parent_page.get("id")
+
+    print("=" * 70)
+    print(f"📄 Voy a crear la base de datos dentro de la primera página")
+    print("=" * 70 + "\n")
+
+    try:
+        # Crear la base de datos
+        print("🚀 Creando base de datos 'CBCFacil - Documentos'...\n")
+
+        database = client.databases.create(
+            parent={"page_id": parent_id},
+            title=[
+                {
+                    "type": "text",
+                    "text": {"content": "CBCFacil - Documentos Procesados"},
+                }
+            ],
+            properties={
+                "Name": {"title": {}},
+                "Status": {
+                    "select": {
+                        "options": [
+                            {"name": "Procesado", "color": "green"},
+                            {"name": "En Proceso", "color": "yellow"},
+                            {"name": "Error", "color": "red"},
+                        ]
+                    }
+                },
+                "Tipo": {
+                    "select": {
+                        "options": [
+                            {"name": "AUDIO", "color": "purple"},
+                            {"name": "PDF", "color": "orange"},
+                            {"name": "TEXTO", "color": "gray"},
+                        ]
+                    }
+                },
+                "Fecha": {"date": {}},
+            },
+        )
+
+        db_id = database["id"]
+
+        print("✅ ¡Base de datos creada exitosamente!")
+        print("=" * 70)
+        print(f"\n📊 Información de la base de datos:\n")
+        print(f"   Nombre: CBCFacil - Documentos Procesados")
+        print(f"   ID: {db_id}")
+        print(f"   URL: https://notion.so/{db_id.replace('-', '')}")
+        print("\n=" * 70)
+        print("\n🎯 SIGUIENTE PASO:")
+        print("=" * 70)
+        print(f"\nActualiza tu archivo .env con:\n")
+        print(f"NOTION_DATABASE_ID={db_id}\n")
+        print("Luego ejecuta:")
+        print("python test_notion_integration.py\n")
+        print("=" * 70 + "\n")
+
+    except Exception as e:
+        print(f"❌ Error creando base de datos: {e}")
+        print("\nVerifica que la integración tenga permisos de escritura.\n")
+
+
+if __name__ == "__main__":
+    main()
--- a/diagnose_notion.py
+++ b/diagnose_notion.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+"""
+Script para diagnosticar la integración de Notion
+"""
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from config import settings
+from notion_client import Client
+
+
+def main():
+    print("\n" + "=" * 70)
+    print("🔍 DIAGNÓSTICO COMPLETO DE NOTION")
+    print("=" * 70 + "\n")
+
+    token = settings.NOTION_API_TOKEN
+    database_id = settings.NOTION_DATABASE_ID
+
+    print(f"Token: {token[:30]}..." if token else "❌ Token no configurado")
+    print(f"Database ID: {database_id}\n")
+
+    if not token:
+        print("❌ Configura NOTION_API en .env\n")
+        return
+
+    client = Client(auth=token)
+
+    # Test 1: Verificar que el token sea válido
+    print("📝 Test 1: Verificando token...")
+    try:
+        # Intentar buscar páginas (cualquiera)
+        results = client.search(query="", page_size=1)
+        print("✅ Token válido - la integración está activa\n")
+
+        # Ver si tiene acceso a alguna página
+        pages = results.get("results", [])
+        if pages:
+            print(f"✅ La integración tiene acceso a {len(pages)} página(s)")
+            for page in pages[:3]:
+                page_id = page.get("id", "N/A")
+                page_type = page.get("object", "N/A")
+                print(f"   - {page_type}: {page_id}")
+        else:
+            print("⚠️  La integración NO tiene acceso a ninguna página aún")
+            print("   Esto es normal si acabas de crear la integración.\n")
+
+    except Exception as e:
+        print(f"❌ Error con el token: {e}\n")
+        return
+
+    # Test 2: Verificar acceso a la base de datos específica
+    print("\n📊 Test 2: Verificando acceso a la base de datos CBC...")
+    try:
+        database = client.databases.retrieve(database_id=database_id)
+        print("✅ ¡ÉXITO! La integración puede acceder a la base de datos\n")
+
+        title = database.get("title", [{}])[0].get("plain_text", "Sin título")
+        print(f"   Título: {title}")
+        print(f"   ID: {database['id']}")
+        print(f"\n   Propiedades:")
+        for prop_name in database.get("properties", {}).keys():
+            print(f"   ✓ {prop_name}")
+
+        print("\n" + "=" * 70)
+        print("✅ TODO CONFIGURADO CORRECTAMENTE")
+        print("=" * 70)
+        print("\n🚀 Ejecuta: python test_notion_integration.py\n")
+
+    except Exception as e:
+        error_msg = str(e)
+        print(f"❌ No se puede acceder a la base de datos")
+        print(f"   Error: {error_msg}\n")
+
+        if "Could not find database" in error_msg:
+            print("=" * 70)
+            print("⚠️  ACCIÓN REQUERIDA: Compartir la base de datos")
+            print("=" * 70)
+            print("\n📋 PASOS DETALLADOS:\n")
+            print("1. Abre Notion en tu navegador")
+            print("\n2. Ve a tu base de datos 'CBC'")
+            print(f"   Opción A: Usa este link directo:")
+            print(f"   → https://www.notion.so/{database_id.replace('-', '')}")
+            print(f"\n   Opción B: Busca 'CBC' en tu workspace")
+            print("\n3. En la página de la base de datos, busca el botón '...' ")
+            print("   (tres puntos) en la esquina SUPERIOR DERECHA")
+            print("\n4. En el menú que se abre, busca:")
+            print("   • 'Connections' (en inglés)")
+            print("   • 'Conexiones' (en español)")
+            print("   • 'Connect to' (puede variar)")
+            print("\n5. Haz click y verás un menú de integraciones")
+            print("\n6. Busca tu integración en la lista")
+            print("   (Debería tener el nombre que le pusiste al crearla)")
+            print("\n7. Haz click en tu integración para activarla")
+            print("\n8. Confirma los permisos cuando te lo pida")
+            print("\n9. Deberías ver un mensaje confirmando la conexión")
+            print("\n10. ¡Listo! Vuelve a ejecutar:")
+            print("    python verify_notion_permissions.py\n")
+            print("=" * 70)
+
+            # Crear una página de prueba simple para verificar
+            print("\n💡 ALTERNATIVA: Crear una nueva página de prueba\n")
+            print("Si no encuentras la opción de conexiones en tu base de datos,")
+            print("puedes crear una página nueva y compartirla con la integración:\n")
+            print("1. Crea una nueva página en Notion")
+            print("2. En esa página, click en 'Share' (Compartir)")
+            print("3. Busca tu integración y agrégala")
+            print("4. Luego convierte esa página en una base de datos")
+            print("5. Usa el ID de esa nueva base de datos\n")
+
+
+if __name__ == "__main__":
+    main()
--- a/document/generators.py
+++ b/document/generators.py
@@ -1,6 +1,7 @@
 """
 Document generation utilities
 """
+
 import logging
 import re
 from pathlib import Path
@@ -17,7 +18,9 @@ class DocumentGenerator:
        self.logger = logging.getLogger(__name__)
        self.ai_provider = ai_provider_factory.get_best_provider()

-    def generate_summary(self, text: str, base_name: str) -> Tuple[bool, str, Dict[str, Any]]:
+    def generate_summary(
+        self, text: str, base_name: str
+    ) -> Tuple[bool, str, Dict[str, Any]]:
        """Generate unified summary"""
        self.logger.info(f"Generating summary for {base_name}")

@@ -85,13 +88,16 @@ Instrucciones:

            # Use generic Gemini provider for formatting as requested
            from services.ai.gemini_provider import GeminiProvider
+
            formatter = GeminiProvider()

            try:
                if formatter.is_available():
                    summary = formatter.generate_text(format_prompt)
                else:
-                    self.logger.warning("Gemini formatter not available, using raw summary")
+                    self.logger.warning(
+                        "Gemini formatter not available, using raw summary"
+                    )
                    summary = raw_summary
            except Exception as e:
                self.logger.warning(f"Formatting failed ({e}), using raw summary")
@@ -105,13 +111,51 @@ Instrucciones:
            docx_path = self._create_docx(summary, base_name)
            pdf_path = self._create_pdf(summary, base_name)

+            # Upload to Notion if configured
+            from services.notion_service import notion_service
+
+            notion_uploaded = False
+            notion_page_id = None
+            if settings.has_notion_config:
+                try:
+                    title = base_name.replace("_", " ").title()
+
+                    # Crear página con el contenido completo del resumen
+                    notion_metadata = {
+                        "file_type": "Audio",  # O 'PDF' dependiendo del origen
+                        "pdf_path": pdf_path,
+                        "add_status": False,  # No usar Status/Tipo (no existen en la DB)
+                        "use_as_page": False,  # Usar como database, no página
+                    }
+
+                    notion_page_id = notion_service.create_page_with_summary(
+                        title=title, summary=summary, metadata=notion_metadata
+                    )
+
+                    if notion_page_id:
+                        notion_uploaded = True
+                        self.logger.info(
+                            f"✅ Resumen subido a Notion: {title} (ID: {notion_page_id})"
+                        )
+                    else:
+                        self.logger.warning(f"⚠️ No se pudo subir a Notion: {title}")
+                except Exception as e:
+                    self.logger.warning(f"❌ Error al subir a Notion: {e}")
+                    import traceback
+
+                    traceback.print_exc()
+            else:
+                self.logger.info("Notion not configured - skipping upload")
+
            metadata = {
-                'markdown_path': str(markdown_path),
-                'docx_path': str(docx_path),
-                'pdf_path': str(pdf_path),
-                'docx_name': Path(docx_path).name,
-                'summary': summary,
-                'filename': filename
+                "markdown_path": str(markdown_path),
+                "docx_path": str(docx_path),
+                "pdf_path": str(pdf_path),
+                "docx_name": Path(docx_path).name,
+                "summary": summary,
+                "filename": filename,
+                "notion_uploaded": notion_uploaded,
+                "notion_page_id": notion_page_id,
            }

            return True, summary, metadata
@@ -129,17 +173,21 @@ Summary: {summary}

 Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""

-            topics_text = self.ai_provider.sanitize_input(prompt) if hasattr(self.ai_provider, 'sanitize_input') else summary[:100]
+            topics_text = (
+                self.ai_provider.sanitize_input(prompt)
+                if hasattr(self.ai_provider, "sanitize_input")
+                else summary[:100]
+            )

            # Simple topic extraction
-            topics = re.findall(r'\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b', topics_text)[:3]
+            topics = re.findall(r"\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b", topics_text)[:3]
            if not topics:
-                topics = ['documento']
+                topics = ["documento"]

            # Limit topic length
            topics = [t[: settings.MAX_FILENAME_TOPICS_LENGTH] for t in topics]

-            filename = '_'.join(topics)[:settings.MAX_FILENAME_LENGTH]
+            filename = "_".join(topics)[: settings.MAX_FILENAME_LENGTH]
            return filename

        except Exception as e:
@@ -153,7 +201,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""

        output_path = output_dir / f"{base_name}_unificado.md"

-        content = f"""# {base_name.replace('_', ' ').title()}
+        content = f"""# {base_name.replace("_", " ").title()}

 ## Resumen

@@ -164,7 +212,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
 *Generado por CBCFacil*
 """

-        with open(output_path, 'w', encoding='utf-8') as f:
+        with open(output_path, "w", encoding="utf-8") as f:
            f.write(content)

        return output_path
@@ -183,7 +231,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
        output_path = output_dir / f"{base_name}_unificado.docx"

        doc = Document()
-        doc.add_heading(base_name.replace('_', ' ').title(), 0)
+        doc.add_heading(base_name.replace("_", " ").title(), 0)

        # Parse and render Markdown content line by line
        lines = summary.splitlines()
@@ -193,41 +241,43 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
            line = line.strip()
            if not line:
                if current_paragraph:
-                    p = doc.add_paragraph(' '.join(current_paragraph))
+                    p = doc.add_paragraph(" ".join(current_paragraph))
                    p.alignment = 3  # JUSTIFY alignment (WD_ALIGN_PARAGRAPH.JUSTIFY=3)
                    current_paragraph = []
                continue

-            if line.startswith('#'):
+            if line.startswith("#"):
                if current_paragraph:
-                    p = doc.add_paragraph(' '.join(current_paragraph))
+                    p = doc.add_paragraph(" ".join(current_paragraph))
                    p.alignment = 3
                    current_paragraph = []
                # Process heading
-                level = len(line) - len(line.lstrip('#'))
-                heading_text = line.lstrip('#').strip()
+                level = len(line) - len(line.lstrip("#"))
+                heading_text = line.lstrip("#").strip()
                if level <= 6:
                    doc.add_heading(heading_text, level=level)
                else:
                    current_paragraph.append(heading_text)
-            elif line.startswith('-') or line.startswith('*') or line.startswith('•'):
+            elif line.startswith("-") or line.startswith("*") or line.startswith("•"):
                if current_paragraph:
-                    p = doc.add_paragraph(' '.join(current_paragraph))
+                    p = doc.add_paragraph(" ".join(current_paragraph))
                    p.alignment = 3
                    current_paragraph = []
-                bullet_text = line.lstrip('-*• ').strip()
-                p = doc.add_paragraph(bullet_text, style='List Bullet')
+                bullet_text = line.lstrip("-*• ").strip()
+                p = doc.add_paragraph(bullet_text, style="List Bullet")
                # Remove bold markers from bullets if present
-                if '**' in bullet_text:
+                if "**" in bullet_text:
                    # Basic cleanup for bullets
                    pass
            else:
                # Clean up excessive bold markers in body text if user requested
-                clean_line = line.replace('**', '') # Removing asterisks as per user complaint "se abusa de los asteriscos"
+                clean_line = line.replace(
+                    "**", ""
+                )  # Removing asterisks as per user complaint "se abusa de los asteriscos"
                current_paragraph.append(clean_line)

        if current_paragraph:
-            p = doc.add_paragraph(' '.join(current_paragraph))
+            p = doc.add_paragraph(" ".join(current_paragraph))
            p.alignment = 3

        doc.add_page_break()
@@ -258,18 +308,20 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
        def new_page():
            nonlocal y_position
            c.showPage()
-            c.setFont('Helvetica', 11)
+            c.setFont("Helvetica", 11)
            y_position = height - margin

-        c.setFont('Helvetica', 11)
+        c.setFont("Helvetica", 11)

        # Title
-        c.setFont('Helvetica-Bold', 16)
-        c.drawString(margin, y_position, base_name.replace('_', ' ').title()[:100])
+        c.setFont("Helvetica-Bold", 16)
+        c.drawString(margin, y_position, base_name.replace("_", " ").title()[:100])
        y_position -= 28
-        c.setFont('Helvetica', 11)
+        c.setFont("Helvetica", 11)

-        summary_clean = summary.replace('**', '') # Remove asterisks globally for cleaner PDF
+        summary_clean = summary.replace(
+            "**", ""
+        )  # Remove asterisks globally for cleaner PDF

        for raw_line in summary_clean.splitlines():
            line = raw_line.rstrip()
@@ -282,24 +334,24 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""

            stripped = line.lstrip()

-            if stripped.startswith('#'):
-                level = len(stripped) - len(stripped.lstrip('#'))
-                heading_text = stripped.lstrip('#').strip()
+            if stripped.startswith("#"):
+                level = len(stripped) - len(stripped.lstrip("#"))
+                heading_text = stripped.lstrip("#").strip()
                if heading_text:
                    font_size = 16 if level == 1 else 14 if level == 2 else 12
-                    c.setFont('Helvetica-Bold', font_size)
+                    c.setFont("Helvetica-Bold", font_size)
                    c.drawString(margin, y_position, heading_text[:90])
                    y_position -= font_size + 6
                    if y_position < margin:
                        new_page()
-                    c.setFont('Helvetica', 11)
+                    c.setFont("Helvetica", 11)
                continue

-            if stripped.startswith(('-', '*', '•')):
-                bullet_text = stripped.lstrip('-*•').strip()
-                wrapped_lines = textwrap.wrap(bullet_text, width=80) or ['']
+            if stripped.startswith(("-", "*", "•")):
+                bullet_text = stripped.lstrip("-*•").strip()
+                wrapped_lines = textwrap.wrap(bullet_text, width=80) or [""]
                for idx, wrapped in enumerate(wrapped_lines):
-                    prefix = '• ' if idx == 0 else '  '
+                    prefix = "• " if idx == 0 else "  "
                    c.drawString(margin, y_position, f"{prefix}{wrapped}")
                    y_position -= 14
                    if y_position < margin:
@@ -307,7 +359,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
                continue

            # Body text - Justified approximation (ReportLab native justification requires Paragraph styles, defaulting to wrap)
-            wrapped_lines = textwrap.wrap(stripped, width=90) or ['']
+            wrapped_lines = textwrap.wrap(stripped, width=90) or [""]
            for wrapped in wrapped_lines:
                c.drawString(margin, y_position, wrapped)
                y_position -= 14
--- a/list_notion_pages.py
+++ b/list_notion_pages.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""
+Script para listar todas las páginas y bases de datos accesibles
+"""
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from config import settings
+from notion_client import Client
+
+
+def main():
+    print("\n" + "=" * 70)
+    print("📚 LISTANDO TODAS LAS PÁGINAS Y BASES DE DATOS")
+    print("=" * 70 + "\n")
+
+    token = settings.NOTION_API_TOKEN
+    client = Client(auth=token)
+
+    try:
+        # Buscar todas las páginas sin filtro
+        print("🔍 Buscando todas las páginas accesibles...\n")
+        results = client.search(page_size=100)
+
+        all_items = results.get("results", [])
+
+        # Separar bases de datos y páginas
+        databases = [item for item in all_items if item.get("object") == "database"]
+        pages = [item for item in all_items if item.get("object") == "page"]
+
+        print(
+            f"✅ Encontrados: {len(databases)} base(s) de datos y {len(pages)} página(s)\n"
+        )
+
+        if databases:
+            print("=" * 70)
+            print("📊 BASES DE DATOS ENCONTRADAS:")
+            print("=" * 70)
+
+            for i, db in enumerate(databases, 1):
+                db_id = db.get("id", "N/A")
+                title_list = db.get("title", [])
+                title = (
+                    title_list[0].get("plain_text", "Sin título")
+                    if title_list
+                    else "Sin título"
+                )
+
+                print(f"\n🔷 {i}. {title}")
+                print(f"   ID: {db_id}")
+                print(f"   URL: https://notion.so/{db_id.replace('-', '')}")
+
+                # Mostrar propiedades
+                props = db.get("properties", {})
+                if props:
+                    print(f"   Propiedades:")
+                    for prop_name, prop_data in list(props.items())[:5]:
+                        prop_type = prop_data.get("type", "unknown")
+                        print(f"      • {prop_name} ({prop_type})")
+                    if len(props) > 5:
+                        print(f"      ... y {len(props) - 5} más")
+
+                print("-" * 70)
+
+        if pages:
+            print("\n" + "=" * 70)
+            print("📄 PÁGINAS ENCONTRADAS:")
+            print("=" * 70)
+
+            for i, page in enumerate(pages, 1):
+                page_id = page.get("id", "N/A")
+
+                # Intentar obtener el título
+                title = "Sin título"
+                props = page.get("properties", {})
+
+                # Buscar en diferentes ubicaciones del título
+                if "title" in props:
+                    title_prop = props["title"]
+                    if "title" in title_prop:
+                        title_list = title_prop["title"]
+                        if title_list:
+                            title = title_list[0].get("plain_text", "Sin título")
+                elif "Name" in props:
+                    name_prop = props["Name"]
+                    if "title" in name_prop:
+                        title_list = name_prop["title"]
+                        if title_list:
+                            title = title_list[0].get("plain_text", "Sin título")
+
+                print(f"\n🔷 {i}. {title}")
+                print(f"   ID: {page_id}")
+                print(f"   URL: https://notion.so/{page_id.replace('-', '')}")
+                print("-" * 70)
+
+        if databases:
+            print("\n" + "=" * 70)
+            print("💡 SIGUIENTE PASO:")
+            print("=" * 70)
+            print("\nSi 'CBC' aparece arriba como BASE DE DATOS:")
+            print("1. Copia el ID de la base de datos 'CBC'")
+            print("2. Actualiza tu .env:")
+            print("   NOTION_DATABASE_ID=<el_id_completo>")
+            print("\nSi 'CBC' aparece como PÁGINA:")
+            print("1. Abre la página en Notion")
+            print("2. Busca una base de datos dentro de esa página")
+            print("3. Haz click en '...' de la base de datos")
+            print("4. Selecciona 'Copy link to view'")
+            print("5. El ID estará en el URL copiado")
+            print("\n4. Ejecuta: python test_notion_integration.py\n")
+        else:
+            print("\n⚠️  No se encontraron bases de datos accesibles.")
+            print("\n📋 OPCIONES:")
+            print("\n1. Crear una nueva base de datos:")
+            print("   - Abre una de las páginas listadas arriba")
+            print("   - Crea una tabla/database dentro")
+            print("   - Copia el ID de esa base de datos")
+            print("\n2. O comparte una base de datos existente:")
+            print("   - Abre tu base de datos 'CBC' en Notion")
+            print("   - Click en '...' > 'Connections'")
+            print("   - Agrega tu integración\n")
+
+    except Exception as e:
+        print(f"❌ Error: {e}\n")
+        import traceback
+
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    main()
--- a/main.py
+++ b/main.py
@@ -3,6 +3,7 @@
 CBCFacil - Main Service Entry Point
 Unified AI service for document processing (audio, PDF, text)
 """
+
 import logging
 import sys
 import time
@@ -16,8 +17,10 @@ from typing import Optional

 # Load environment variables from .env file
 from dotenv import load_dotenv
+
 load_dotenv()

+
 # Configure logging with JSON formatter for production
 class JSONFormatter(logging.Formatter):
    """JSON formatter for structured logging in production"""
@@ -29,7 +32,7 @@ class JSONFormatter(logging.Formatter):
            "message": record.getMessage(),
            "module": record.module,
            "function": record.funcName,
-            "line": record.lineno
+            "line": record.lineno,
        }

        # Add exception info if present
@@ -55,9 +58,9 @@ def setup_logging() -> logging.Logger:
    if settings.is_production:
        console_handler.setFormatter(JSONFormatter())
    else:
-        console_handler.setFormatter(logging.Formatter(
-            "%(asctime)s [%(levelname)s] - %(name)s - %(message)s"
-        ))
+        console_handler.setFormatter(
+            logging.Formatter("%(asctime)s [%(levelname)s] - %(name)s - %(message)s")
+        )
    logger.addHandler(console_handler)

    # File handler if configured
@@ -74,9 +77,12 @@ logger = setup_logging()

 def acquire_lock() -> int:
    """Acquire single instance lock"""
-    lock_file = Path(os.getenv("LOCAL_STATE_DIR", str(Path(__file__).parent))) / ".main_service.lock"
+    lock_file = (
+        Path(os.getenv("LOCAL_STATE_DIR", str(Path(__file__).parent)))
+        / ".main_service.lock"
+    )
    lock_file.parent.mkdir(parents=True, exist_ok=True)
-    lock_fd = open(lock_file, 'w')
+    lock_fd = open(lock_file, "w")
    fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
    lock_fd.write(str(os.getpid()))
    lock_fd.flush()
@@ -100,7 +106,9 @@ def validate_configuration() -> None:
    try:
        warnings = validate_environment()
        if warnings:
-            logger.info(f"Configuration validation completed with {len(warnings)} warnings")
+            logger.info(
+                f"Configuration validation completed with {len(warnings)} warnings"
+            )
    except ConfigurationError as e:
        logger.error(f"Configuration validation failed: {e}")
        raise
@@ -117,7 +125,7 @@ def check_service_health() -> dict:
    health_status = {
        "timestamp": datetime.utcnow().isoformat(),
        "status": "healthy",
-        "services": {}
+        "services": {},
    }

    # Check WebDAV
@@ -129,15 +137,13 @@ def check_service_health() -> dict:
        else:
            health_status["services"]["webdav"] = {"status": "not_configured"}
    except Exception as e:
-        health_status["services"]["webdav"] = {
-            "status": "unhealthy",
-            "error": str(e)
-        }
+        health_status["services"]["webdav"] = {"status": "unhealthy", "error": str(e)}
        health_status["status"] = "degraded"

    # Check Telegram
    try:
        from services.telegram_service import telegram_service
+
        if telegram_service.is_configured:
            health_status["services"]["telegram"] = {"status": "healthy"}
        else:
@@ -145,22 +151,20 @@ def check_service_health() -> dict:
    except Exception as e:
        health_status["services"]["telegram"] = {
            "status": "unavailable",
-            "error": str(e)
+            "error": str(e),
        }

    # Check VRAM manager
    try:
        from services.vram_manager import vram_manager
+
        vram_info = vram_manager.get_vram_info()
        health_status["services"]["vram"] = {
            "status": "healthy",
-            "available_gb": vram_info.get("free", 0) / (1024**3)
+            "available_gb": vram_info.get("free", 0) / (1024**3),
        }
    except Exception as e:
-        health_status["services"]["vram"] = {
-            "status": "unavailable",
-            "error": str(e)
-        }
+        health_status["services"]["vram"] = {"status": "unavailable", "error": str(e)}

    return health_status

@@ -189,12 +193,28 @@ def initialize_services() -> None:
    # Configure Telegram if credentials available
    if settings.TELEGRAM_TOKEN and settings.TELEGRAM_CHAT_ID:
        try:
-            telegram_service.configure(settings.TELEGRAM_TOKEN, settings.TELEGRAM_CHAT_ID)
+            telegram_service.configure(
+                settings.TELEGRAM_TOKEN, settings.TELEGRAM_CHAT_ID
+            )
            telegram_service.send_start_notification()
            logger.info("Telegram notifications enabled")
        except Exception as e:
            logger.error(f"Failed to configure Telegram: {e}")

+    # Configure Notion if credentials available
+    if settings.has_notion_config:
+        try:
+            from services.notion_service import notion_service
+
+            notion_service.configure(
+                settings.NOTION_API_TOKEN, settings.NOTION_DATABASE_ID
+            )
+            logger.info("✅ Notion integration enabled")
+        except Exception as e:
+            logger.error(f"Failed to configure Notion: {e}")
+    else:
+        logger.info("Notion not configured - upload to Notion disabled")
+
    # Initialize WebDAV if configured
    if settings.has_webdav_config:
        try:
@@ -233,6 +253,7 @@ def send_error_notification(error_type: str, error_message: str) -> None:
    """Send error notification via Telegram"""
    try:
        from services.telegram_service import telegram_service
+
        if telegram_service.is_configured:
            telegram_service.send_error_notification(error_type, error_message)
    except Exception as e:
@@ -243,15 +264,16 @@ def run_dashboard_thread() -> None:
    """Run Flask dashboard in a separate thread"""
    try:
        from api.routes import create_app
+
        app = create_app()

        # Run Flask in production mode with threaded=True
        app.run(
-            host='0.0.0.0',
+            host="0.0.0.0",
            port=5000,
            debug=False,
            threaded=True,
-            use_reloader=False  # Important: disable reloader in thread
+            use_reloader=False,  # Important: disable reloader in thread
        )
    except Exception as e:
        logger.error(f"Dashboard thread error: {e}")
@@ -260,14 +282,12 @@ def run_dashboard_thread() -> None:

 def start_dashboard() -> threading.Thread:
    """Start dashboard in a background daemon thread"""
-    dashboard_port = int(os.getenv('DASHBOARD_PORT', '5000'))
+    dashboard_port = int(os.getenv("DASHBOARD_PORT", "5000"))
    logger.info(f"Starting dashboard on port {dashboard_port}...")

    # Create daemon thread so it doesn't block shutdown
    dashboard_thread = threading.Thread(
-        target=run_dashboard_thread,
-        name="DashboardThread",
-        daemon=True
+        target=run_dashboard_thread, name="DashboardThread", daemon=True
    )
    dashboard_thread.start()
    logger.info(f"Dashboard thread started (Thread-ID: {dashboard_thread.ident})")
@@ -301,9 +321,37 @@ def run_main_loop() -> None:
                    webdav_service.mkdir(settings.REMOTE_PDF_FOLDER)
                    pdf_files = webdav_service.list(settings.REMOTE_PDF_FOLDER)
                    for file_path in pdf_files:
-                        if file_path.lower().endswith('.pdf'):
+                        if file_path.lower().endswith(".pdf"):
                            if not processed_registry.is_processed(file_path):
-                                pdf_processor.process(file_path)
+                                from pathlib import Path
+                                from urllib.parse import unquote
+                                from services.telegram_service import telegram_service
+
+                                local_filename = unquote(Path(file_path).name)
+                                base_name = Path(local_filename).stem
+                                local_path = (
+                                    settings.LOCAL_DOWNLOADS_PATH / local_filename
+                                )
+                                settings.LOCAL_DOWNLOADS_PATH.mkdir(
+                                    parents=True, exist_ok=True
+                                )
+
+                                # Step 1: Notify and download
+                                telegram_service.send_message(
+                                    f"📄 Nuevo PDF detectado: {local_filename}\n"
+                                    f"⬇️ Descargando..."
+                                )
+                                logger.info(
+                                    f"Downloading PDF: {file_path} -> {local_path}"
+                                )
+                                webdav_service.download(file_path, local_path)
+
+                                # Step 2: Process PDF
+                                telegram_service.send_message(
+                                    f"🔍 Procesando PDF con OCR..."
+                                )
+                                pdf_processor.process(str(local_path))
+
                                processed_registry.save(file_path)
                except Exception as e:
                    logger.exception(f"Error processing PDFs: {e}")
@@ -314,7 +362,10 @@ def run_main_loop() -> None:
                try:
                    audio_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER)
                    for file_path in audio_files:
-                        if any(file_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
+                        if any(
+                            file_path.lower().endswith(ext)
+                            for ext in settings.AUDIO_EXTENSIONS
+                        ):
                            if not processed_registry.is_processed(file_path):
                                from pathlib import Path
                                from urllib.parse import unquote
@@ -323,36 +374,55 @@ def run_main_loop() -> None:

                                local_filename = unquote(Path(file_path).name)
                                base_name = Path(local_filename).stem
-                                local_path = settings.LOCAL_DOWNLOADS_PATH / local_filename
-                                settings.LOCAL_DOWNLOADS_PATH.mkdir(parents=True, exist_ok=True)
+                                local_path = (
+                                    settings.LOCAL_DOWNLOADS_PATH / local_filename
+                                )
+                                settings.LOCAL_DOWNLOADS_PATH.mkdir(
+                                    parents=True, exist_ok=True
+                                )

                                # Step 1: Notify and download
                                telegram_service.send_message(
                                    f"🎵 Nuevo audio detectado: {local_filename}\n"
                                    f"⬇️ Descargando..."
                                )
-                                logger.info(f"Downloading audio: {file_path} -> {local_path}")
+                                logger.info(
+                                    f"Downloading audio: {file_path} -> {local_path}"
+                                )
                                webdav_service.download(file_path, local_path)

                                # Step 2: Transcribe
-                                telegram_service.send_message(f"📝 Transcribiendo audio con Whisper...")
+                                telegram_service.send_message(
+                                    f"📝 Transcribiendo audio con Whisper..."
+                                )
                                result = audio_processor.process(str(local_path))

-                                if result.get("success") and result.get("transcription_path"):
-                                    transcription_file = Path(result["transcription_path"])
+                                if result.get("success") and result.get(
+                                    "transcription_path"
+                                ):
+                                    transcription_file = Path(
+                                        result["transcription_path"]
+                                    )
                                    transcription_text = result.get("text", "")

                                    # Step 3: Generate AI summary and documents
-                                    telegram_service.send_message(f"🤖 Generando resumen con IA...")
+                                    telegram_service.send_message(
+                                        f"🤖 Generando resumen con IA..."
+                                    )
                                    doc_generator = DocumentGenerator()
-                                    success, summary, output_files = doc_generator.generate_summary(
+                                    success, summary, output_files = (
+                                        doc_generator.generate_summary(
                                            transcription_text, base_name
                                        )
+                                    )

                                    # Step 4: Upload all files to Nextcloud
                                    if success and output_files:
                                        # Create folders
-                                        for folder in [settings.RESUMENES_FOLDER, settings.DOCX_FOLDER]:
+                                        for folder in [
+                                            settings.RESUMENES_FOLDER,
+                                            settings.DOCX_FOLDER,
+                                        ]:
                                            try:
                                                webdav_service.makedirs(folder)
                                            except Exception:
@@ -361,25 +431,35 @@ def run_main_loop() -> None:
                                        # Upload transcription TXT
                                        if transcription_file.exists():
                                            remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
-                                            webdav_service.upload(transcription_file, remote_txt)
+                                            webdav_service.upload(
+                                                transcription_file, remote_txt
+                                            )
                                            logger.info(f"Uploaded: {remote_txt}")

                                        # Upload DOCX
-                                        docx_path = Path(output_files.get('docx_path', ''))
+                                        docx_path = Path(
+                                            output_files.get("docx_path", "")
+                                        )
                                        if docx_path.exists():
                                            remote_docx = f"{settings.DOCX_FOLDER}/{docx_path.name}"
-                                            webdav_service.upload(docx_path, remote_docx)
+                                            webdav_service.upload(
+                                                docx_path, remote_docx
+                                            )
                                            logger.info(f"Uploaded: {remote_docx}")

                                        # Upload PDF
-                                        pdf_path = Path(output_files.get('pdf_path', ''))
+                                        pdf_path = Path(
+                                            output_files.get("pdf_path", "")
+                                        )
                                        if pdf_path.exists():
                                            remote_pdf = f"{settings.DOCX_FOLDER}/{pdf_path.name}"
                                            webdav_service.upload(pdf_path, remote_pdf)
                                            logger.info(f"Uploaded: {remote_pdf}")

                                        # Upload Markdown
-                                        md_path = Path(output_files.get('markdown_path', ''))
+                                        md_path = Path(
+                                            output_files.get("markdown_path", "")
+                                        )
                                        if md_path.exists():
                                            remote_md = f"{settings.RESUMENES_FOLDER}/{md_path.name}"
                                            webdav_service.upload(md_path, remote_md)
@@ -396,11 +476,15 @@ def run_main_loop() -> None:
                                        # Just upload transcription if summary failed
                                        if transcription_file.exists():
                                            try:
-                                                webdav_service.makedirs(settings.RESUMENES_FOLDER)
+                                                webdav_service.makedirs(
+                                                    settings.RESUMENES_FOLDER
+                                                )
                                            except Exception:
                                                pass
                                            remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
-                                            webdav_service.upload(transcription_file, remote_txt)
+                                            webdav_service.upload(
+                                                transcription_file, remote_txt
+                                            )
                                            telegram_service.send_message(
                                                f"⚠️ Resumen fallido, solo transcripción subida:\n{transcription_file.name}"
                                            )
@@ -415,7 +499,10 @@ def run_main_loop() -> None:
                try:
                    text_files = webdav_service.list(settings.REMOTE_TXT_FOLDER)
                    for file_path in text_files:
-                        if any(file_path.lower().endswith(ext) for ext in settings.TXT_EXTENSIONS):
+                        if any(
+                            file_path.lower().endswith(ext)
+                            for ext in settings.TXT_EXTENSIONS
+                        ):
                            if not processed_registry.is_processed(file_path):
                                text_processor.process(file_path)
                                processed_registry.save(file_path)
@@ -443,7 +530,7 @@ def run_main_loop() -> None:
                )
                send_error_notification(
                    "consecutive_errors",
-                    f"Service has failed {consecutive_errors} consecutive times"
+                    f"Service has failed {consecutive_errors} consecutive times",
                )

            # Don't exit, let the loop continue with backoff
@@ -462,7 +549,9 @@ def main():
    try:
        logger.info("=== CBCFacil Service Started ===")
        logger.info(f"Version: {os.getenv('APP_VERSION', '8.0')}")
-        logger.info(f"Environment: {'production' if os.getenv('DEBUG', 'false').lower() != 'true' else 'development'}")
+        logger.info(
+            f"Environment: {'production' if os.getenv('DEBUG', 'false').lower() != 'true' else 'development'}"
+        )

        lock_fd = acquire_lock()
        initialize_services()
@@ -491,12 +580,15 @@ if __name__ == "__main__":
        command = sys.argv[1]
        if command == "whisper" and len(sys.argv) == 4:
            from processors.audio_processor import AudioProcessor
+
            AudioProcessor().process(sys.argv[2])
        elif command == "pdf" and len(sys.argv) == 4:
            from processors.pdf_processor import PDFProcessor
+
            PDFProcessor().process(sys.argv[2])
        elif command == "health":
            from main import check_service_health
+
            health = check_service_health()
            print(json.dumps(health, indent=2))
        else:
--- a/opus.md
+++ b/opus.md
--- a/restart_service.sh
+++ b/restart_service.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# Detener servicio existente
+pkill -f "python main.py"
+sleep 2
+
+# Reiniciar con log visible
+cd /home/ren/proyectos/cbc
+source .venv/bin/activate
+python main.py >> main.log 2>&1 &
+echo "Servicio reiniciado. Ver logs con: tail -f main.log"
--- a/services/notion_service.py
+++ b/services/notion_service.py
@@ -0,0 +1,353 @@
+"""
+Notion integration service with official SDK
+"""
+
+import logging
+from typing import Optional, Dict, Any, List
+from pathlib import Path
+from datetime import datetime
+import time
+
+try:
+    from notion_client import Client
+    from notion_client.errors import APIResponseError
+
+    NOTION_AVAILABLE = True
+except ImportError:
+    NOTION_AVAILABLE = False
+    Client = None
+    APIResponseError = Exception
+
+from config import settings
+
+
+class NotionService:
+    """Enhanced Notion API integration service"""
+
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self._client: Optional[Client] = None
+        self._database_id: Optional[str] = None
+
+    def configure(self, token: str, database_id: str) -> None:
+        """Configure Notion with official SDK"""
+        if not NOTION_AVAILABLE:
+            self.logger.error(
+                "notion-client not installed. Install with: pip install notion-client"
+            )
+            return
+
+        self._client = Client(auth=token)
+        self._database_id = database_id
+        self.logger.info("Notion service configured with official SDK")
+
+    @property
+    def is_configured(self) -> bool:
+        """Check if Notion is configured"""
+        return bool(self._client and self._database_id and NOTION_AVAILABLE)
+
+    def _rate_limited_request(self, func, *args, **kwargs):
+        """Execute request with rate limiting and retry"""
+        max_retries = 3
+        base_delay = 1
+
+        for attempt in range(max_retries):
+            try:
+                return func(*args, **kwargs)
+            except APIResponseError as e:
+                if hasattr(e, "code") and e.code == "rate_limited":
+                    delay = base_delay * (2**attempt)
+                    self.logger.warning(f"Rate limited by Notion, waiting {delay}s")
+                    time.sleep(delay)
+                else:
+                    raise
+
+        raise Exception("Max retries exceeded for Notion API")
+
+    def create_page_with_summary(
+        self, title: str, summary: str, metadata: Dict[str, Any]
+    ) -> Optional[str]:
+        """Create a new page in Notion (database or parent page) with summary content"""
+        if not self.is_configured:
+            self.logger.warning("Notion not configured, skipping upload")
+            return None
+
+        try:
+            # Determinar si es database o página padre
+            use_as_page = metadata.get("use_as_page", False)
+
+            if use_as_page:
+                # Crear página dentro de otra página
+                page = self._rate_limited_request(
+                    self._client.pages.create,
+                    parent={"page_id": self._database_id},
+                    properties={"title": [{"text": {"content": title[:100]}}]},
+                )
+            else:
+                # Crear página en database (método original)
+                properties = {"Name": {"title": [{"text": {"content": title[:100]}}]}}
+
+                # Agregar status si la DB lo soporta
+                if metadata.get("add_status", True):
+                    properties["Status"] = {"select": {"name": "Procesado"}}
+
+                # Agregar tipo de archivo si está disponible Y add_status está habilitado
+                if metadata.get("add_status", False) and metadata.get("file_type"):
+                    properties["Tipo"] = {
+                        "select": {" name": metadata["file_type"].upper()}
+                    }
+
+                page = self._rate_limited_request(
+                    self._client.pages.create,
+                    parent={"database_id": self._database_id},
+                    properties=properties,
+                )
+
+            page_id = page["id"]
+            self.logger.info(f"✅ Notion page created: {page_id}")
+
+            # Agregar contenido del resumen como bloques
+            self._add_summary_content(page_id, summary, metadata.get("pdf_path"))
+
+            return page_id
+
+        except Exception as e:
+            self.logger.error(f"❌ Error creating Notion page: {e}")
+            return None
+
+        try:
+            # Preparar properties de la página
+            properties = {
+                "Name": {
+                    "title": [
+                        {
+                            "text": {
+                                "content": title[:100]  # Notion limit
+                            }
+                        }
+                    ]
+                }
+            }
+
+            # Agregar status si la DB lo soporta
+            if metadata.get("add_status", True):
+                properties["Status"] = {"select": {"name": "Procesado"}}
+
+            # Agregar tipo de archivo si está disponible
+            if metadata.get("file_type"):
+                properties["Tipo"] = {"select": {"name": metadata["file_type"].upper()}}
+
+            # Crear página
+            page = self._rate_limited_request(
+                self._client.pages.create,
+                parent={"database_id": self._database_id},
+                properties=properties,
+            )
+
+            page_id = page["id"]
+            self.logger.info(f"✅ Notion page created: {page_id}")
+
+            # Agregar contenido del resumen como bloques
+            self._add_summary_content(page_id, summary, metadata.get("pdf_path"))
+
+            return page_id
+
+        except Exception as e:
+            self.logger.error(f"❌ Error creating Notion page: {e}")
+            return None
+
+    def _add_summary_content(
+        self, page_id: str, summary: str, pdf_path: Optional[Path] = None
+    ) -> bool:
+        """Add summary content as Notion blocks"""
+        try:
+            blocks = []
+
+            # Agregar nota sobre el PDF si existe
+            if pdf_path and pdf_path.exists():
+                blocks.append(
+                    {
+                        "object": "block",
+                        "type": "callout",
+                        "callout": {
+                            "rich_text": [
+                                {
+                                    "type": "text",
+                                    "text": {
+                                        "content": f"📄 Documento generado automáticamente: {pdf_path.name}"
+                                    },
+                                }
+                            ],
+                            "icon": {"emoji": "📄"},
+                        },
+                    }
+                )
+
+            # Agregar bloques del resumen
+            summary_blocks = self._parse_markdown_to_blocks(summary)
+            blocks.extend(summary_blocks)
+
+            # Agregar footer
+            blocks.append({"object": "block", "type": "divider", "divider": {}})
+            blocks.append(
+                {
+                    "object": "block",
+                    "type": "paragraph",
+                    "paragraph": {
+                        "rich_text": [
+                            {
+                                "type": "text",
+                                "text": {
+                                    "content": f"Generado por CBCFacil el {datetime.now().strftime('%d/%m/%Y %H:%M')}"
+                                },
+                                "annotations": {"italic": True, "color": "gray"},
+                            }
+                        ]
+                    },
+                }
+            )
+
+            # Notion API limita a 100 bloques por request
+            if blocks:
+                for i in range(0, len(blocks), 100):
+                    batch = blocks[i : i + 100]
+                    self._rate_limited_request(
+                        self._client.blocks.children.append,
+                        block_id=page_id,
+                        children=batch,
+                    )
+                self.logger.info(f"✅ Added {len(blocks)} blocks to Notion page")
+
+            return True
+
+        except Exception as e:
+            self.logger.error(f"❌ Error adding content blocks: {e}")
+            return False
+
+    def _parse_markdown_to_blocks(self, markdown: str) -> List[Dict]:
+        """Convert markdown to Notion blocks"""
+        blocks = []
+        lines = markdown.split("\n")
+
+        for line in lines:
+            line = line.strip()
+
+            if not line:
+                continue
+
+            # Headings
+            if line.startswith("# "):
+                text = line[2:].strip()[:2000]
+                if text:
+                    blocks.append(
+                        {
+                            "object": "block",
+                            "type": "heading_1",
+                            "heading_1": {
+                                "rich_text": [
+                                    {"type": "text", "text": {"content": text}}
+                                ]
+                            },
+                        }
+                    )
+            elif line.startswith("## "):
+                text = line[3:].strip()[:2000]
+                if text:
+                    blocks.append(
+                        {
+                            "object": "block",
+                            "type": "heading_2",
+                            "heading_2": {
+                                "rich_text": [
+                                    {"type": "text", "text": {"content": text}}
+                                ]
+                            },
+                        }
+                    )
+            elif line.startswith("### "):
+                text = line[4:].strip()[:2000]
+                if text:
+                    blocks.append(
+                        {
+                            "object": "block",
+                            "type": "heading_3",
+                            "heading_3": {
+                                "rich_text": [
+                                    {"type": "text", "text": {"content": text}}
+                                ]
+                            },
+                        }
+                    )
+            # Bullet points
+            elif line.startswith("- ") or line.startswith("* "):
+                text = line[2:].strip()[:2000]
+                if text:
+                    blocks.append(
+                        {
+                            "object": "block",
+                            "type": "bulleted_list_item",
+                            "bulleted_list_item": {
+                                "rich_text": [
+                                    {"type": "text", "text": {"content": text}}
+                                ]
+                            },
+                        }
+                    )
+            # Divider
+            elif line.strip() == "---":
+                blocks.append({"object": "block", "type": "divider", "divider": {}})
+            # Paragraph (skip footer lines)
+            elif not line.startswith("*Generado por"):
+                text = line[:2000]
+                if text:
+                    blocks.append(
+                        {
+                            "object": "block",
+                            "type": "paragraph",
+                            "paragraph": {
+                                "rich_text": [
+                                    {"type": "text", "text": {"content": text}}
+                                ]
+                            },
+                        }
+                    )
+
+        return blocks
+
+    def upload_pdf_legacy(self, pdf_path: Path, title: str) -> bool:
+        """Legacy method - creates simple page (backward compatibility)"""
+        if not self.is_configured:
+            self.logger.warning("Notion not configured, skipping upload")
+            return False
+
+        try:
+            # Crear página simple
+            page_id = self.create_page_with_summary(
+                title=title,
+                summary=f"Documento procesado: {title}",
+                metadata={"file_type": "PDF", "pdf_path": pdf_path},
+            )
+
+            return bool(page_id)
+
+        except Exception as e:
+            self.logger.error(f"Error uploading PDF to Notion: {e}")
+            return False
+
+    # Alias para backward compatibility
+    def upload_pdf(self, pdf_path: Path, title: str) -> bool:
+        """Upload PDF info to Notion (alias for backward compatibility)"""
+        return self.upload_pdf_legacy(pdf_path, title)
+
+    def upload_pdf_as_file(self, pdf_path: Path, title: str) -> bool:
+        """Upload PDF info as file (alias for backward compatibility)"""
+        return self.upload_pdf_legacy(pdf_path, title)
+
+
+# Global instance
+notion_service = NotionService()
+
+
+def upload_to_notion(pdf_path: Path, title: str) -> bool:
+    """Legacy function for backward compatibility"""
+    return notion_service.upload_pdf(pdf_path, title)
--- a/services/notion_service_old.py
+++ b/services/notion_service_old.py
@@ -0,0 +1,203 @@
+"""
+Notion integration service
+"""
+import logging
+import base64
+from typing import Optional
+from pathlib import Path
+
+try:
+    import requests
+    REQUESTS_AVAILABLE = True
+except ImportError:
+    REQUESTS_AVAILABLE = False
+    requests = None
+
+from config import settings
+
+
+class NotionService:
+    """Service for Notion API integration"""
+
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self._token: Optional[str] = None
+        self._database_id: Optional[str] = None
+        self._base_url = "https://api.notion.com/v1"
+
+    def configure(self, token: str, database_id: str) -> None:
+        """Configure Notion credentials"""
+        self._token = token
+        self._database_id = database_id
+        self.logger.info("Notion service configured")
+
+    @property
+    def is_configured(self) -> bool:
+        """Check if Notion is configured"""
+        return bool(self._token and self._database_id)
+
+    def _get_headers(self) -> dict:
+        """Get headers for Notion API requests"""
+        return {
+            "Authorization": f"Bearer {self._token}",
+            "Content-Type": "application/json",
+            "Notion-Version": "2022-06-28"
+        }
+
+    def upload_pdf(self, pdf_path: Path, title: str) -> bool:
+        """Upload PDF to Notion database"""
+        if not self.is_configured:
+            self.logger.warning("Notion not configured, skipping upload")
+            return False
+
+        if not REQUESTS_AVAILABLE:
+            self.logger.error("requests library not available for Notion upload")
+            return False
+
+        if not pdf_path.exists():
+            self.logger.error(f"PDF file not found: {pdf_path}")
+            return False
+
+        try:
+            # Read and encode PDF
+            with open(pdf_path, 'rb') as f:
+                pdf_data = base64.b64encode(f.read()).decode('utf-8')
+
+            # Prepare the page data
+            page_data = {
+                "parent": {"database_id": self._database_id},
+                "properties": {
+                    "Name": {
+                        "title": [
+                            {
+                                "text": {
+                                    "content": title
+                                }
+                            }
+                        ]
+                    },
+                    "Status": {
+                        "select": {
+                            "name": "Procesado"
+                        }
+                    }
+                },
+                "children": [
+                    {
+                        "object": "block",
+                        "type": "paragraph",
+                        "paragraph": {
+                            "rich_text": [
+                                {
+                                    "type": "text",
+                                    "text": {
+                                        "content": f"Documento generado automáticamente: {title}"
+                                    }
+                                }
+                            ]
+                        }
+                    },
+                    {
+                        "object": "block",
+                        "type": "file",
+                        "file": {
+                            "type": "external",
+                            "external": {
+                                "url": f"data:application/pdf;base64,{pdf_data}"
+                            }
+                        }
+                    }
+                ]
+            }
+
+            # Create page in database
+            response = requests.post(
+                f"{self._base_url}/pages",
+                headers=self._get_headers(),
+                json=page_data,
+                timeout=30
+            )
+
+            if response.status_code == 200:
+                self.logger.info(f"PDF uploaded to Notion successfully: {title}")
+                return True
+            else:
+                self.logger.error(f"Notion API error: {response.status_code} - {response.text}")
+                return False
+
+        except Exception as e:
+            self.logger.error(f"Error uploading PDF to Notion: {e}")
+            return False
+
+    def upload_pdf_as_file(self, pdf_path: Path, title: str) -> bool:
+        """Upload PDF as a file block (alternative method)"""
+        if not self.is_configured:
+            self.logger.warning("Notion not configured, skipping upload")
+            return False
+
+        if not REQUESTS_AVAILABLE:
+            self.logger.error("requests library not available for Notion upload")
+            return False
+
+        if not pdf_path.exists():
+            self.logger.error(f"PDF file not found: {pdf_path}")
+            return False
+
+        try:
+            # For simplicity, we'll create a page with just the title and a link placeholder
+            # In a real implementation, you'd need to upload the file to Notion's file storage
+            page_data = {
+                "parent": {"database_id": self._database_id},
+                "properties": {
+                    "Name": {
+                        "title": [
+                            {
+                                "text": {
+                                    "content": title
+                                }
+                            }
+                        ]
+                    },
+                    "Status": {
+                        "select": {
+                            "name": "Procesado"
+                        }
+                    },
+                    "File Path": {
+                        "rich_text": [
+                            {
+                                "text": {
+                                    "content": str(pdf_path)
+                                }
+                            }
+                        ]
+                    }
+                }
+            }
+
+            response = requests.post(
+                f"{self._base_url}/pages",
+                headers=self._get_headers(),
+                json=page_data,
+                timeout=30
+            )
+
+            if response.status_code == 200:
+                self.logger.info(f"PDF uploaded to Notion successfully: {title}")
+                return True
+            else:
+                self.logger.error(f"Notion API error: {response.status_code} - {response.text}")
+                return False
+
+        except Exception as e:
+            self.logger.error(f"Error uploading PDF to Notion: {e}")
+            return False
+
+
+# Global instance
+notion_service = NotionService()
+
+
+def upload_to_notion(pdf_path: Path, title: str) -> bool:
+    """Legacy function for backward compatibility"""
+    return notion_service.upload_pdf(pdf_path, title)
--- a/verify_notion_permissions.py
+++ b/verify_notion_permissions.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""
+Script para verificar y configurar permisos de Notion
+"""
+
+import sys
+import logging
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from config import settings
+from notion_client import Client
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def main():
+    print("\n" + "=" * 60)
+    print("🔧 VERIFICACIÓN DE PERMISOS DE NOTION")
+    print("=" * 60 + "\n")
+
+    # Configuración
+    token = settings.NOTION_API_TOKEN
+    database_id = settings.NOTION_DATABASE_ID
+
+    if not token or not database_id:
+        print("❌ Falta configuración de Notion en .env")
+        print(f"   NOTION_API: {'✅' if token else '❌'}")
+        print(f"   NOTION_DATABASE_ID: {'✅' if database_id else '❌'}")
+        return
+
+    print(f"✅ Token configurado: {token[:20]}...")
+    print(f"✅ Database ID: {database_id}\n")
+
+    # Crear cliente
+    client = Client(auth=token)
+
+    print("📋 PASOS PARA CONFIGURAR LOS PERMISOS:\n")
+    print("1. Abre Notion y ve a tu base de datos 'CBC'")
+    print(f"   URL: https://www.notion.so/{database_id}")
+    print("\n2. Click en los 3 puntos (⋯) en la esquina superior derecha")
+    print("\n3. Selecciona 'Connections' o 'Añadir conexiones'")
+    print("\n4. Busca tu integración y actívala")
+    print(f"   (Debería aparecer con el nombre que le pusiste)")
+    print("\n5. Confirma los permisos\n")
+
+    print("-" * 60)
+    print("\n🧪 Intentando conectar con Notion...\n")
+
+    try:
+        # Intentar obtener la base de datos
+        database = client.databases.retrieve(database_id=database_id)
+
+        print("✅ ¡ÉXITO! La integración puede acceder a la base de datos")
+        print(f"\n📊 Información de la base de datos:")
+        print(
+            f"   Título: {database['title'][0]['plain_text'] if database.get('title') else 'Sin título'}"
+        )
+        print(f"   ID: {database['id']}")
+        print(f"\n   Propiedades disponibles:")
+
+        for prop_name, prop_data in database.get("properties", {}).items():
+            prop_type = prop_data.get("type", "unknown")
+            print(f"   - {prop_name}: {prop_type}")
+
+        print("\n" + "=" * 60)
+        print("✅ TODO CONFIGURADO CORRECTAMENTE")
+        print("=" * 60 + "\n")
+
+        print("🚀 Ahora ejecuta: python test_notion_integration.py")
+        print("   para probar subir un documento\n")
+
+    except Exception as e:
+        error_msg = str(e)
+
+        print("❌ ERROR AL CONECTAR CON NOTION\n")
+        print(f"Error: {error_msg}\n")
+
+        if "Could not find database" in error_msg:
+            print("⚠️  LA BASE DE DATOS NO ESTÁ COMPARTIDA CON TU INTEGRACIÓN")
+            print("\nSigue los pasos arriba para compartir la base de datos.")
+        elif "Unauthorized" in error_msg or "401" in error_msg:
+            print("⚠️  EL TOKEN DE API ES INVÁLIDO")
+            print("\nVerifica que el token esté correcto en .env")
+        else:
+            print("⚠️  ERROR DESCONOCIDO")
+            print(f"\nDetalles: {error_msg}")
+
+        print("\n" + "=" * 60 + "\n")
+
+
+if __name__ == "__main__":
+    main()