- Instalado notion-client SDK oficial para integración robusta - Refactorizado services/notion_service.py con SDK oficial de Notion - Rate limiting con retry y exponential backoff - Parser Markdown → Notion blocks (headings, bullets, paragraphs) - Soporte para pages y databases - Manejo robusto de errores - Integración automática en document/generators.py - PDFs se suben automáticamente a Notion después de generarse - Contenido completo del resumen formateado con bloques - Metadata rica (tipo de archivo, path, fecha) - Configuración de Notion en main.py - Inicialización automática al arrancar el servicio - Validación de credenciales - Actualizado config/settings.py - Agregado load_dotenv() para cargar variables de .env - Configuración de Notion (NOTION_API, NOTION_DATABASE_ID) - Scripts de utilidad creados: - test_notion_integration.py: Test de subida a Notion - test_pipeline_notion.py: Test del pipeline completo - verify_notion_permissions.py: Verificación de permisos - list_notion_pages.py: Listar páginas accesibles - diagnose_notion.py: Diagnóstico completo - create_notion_database.py: Crear database automáticamente - restart_service.sh: Script de reinicio del servicio - Documentación completa en opus.md: - Análisis exhaustivo del codebase (42 archivos Python) - Bugs críticos identificados y soluciones - Mejoras de seguridad (autenticación, rate limiting, CORS, CSP) - Optimizaciones de rendimiento (Celery, Redis, PostgreSQL, WebSockets) - Plan de testing (estructura, ejemplos, 80% coverage goal) - Roadmap de implementación (6 sprints detallados) - Integración avanzada con Notion documentada Estado: Notion funcionando correctamente, PDFs se suben automáticamente
354 lines
12 KiB
Python
354 lines
12 KiB
Python
"""
|
|
Notion integration service with official SDK
|
|
"""
|
|
|
|
import logging
|
|
from typing import Optional, Dict, Any, List
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import time
|
|
|
|
try:
|
|
from notion_client import Client
|
|
from notion_client.errors import APIResponseError
|
|
|
|
NOTION_AVAILABLE = True
|
|
except ImportError:
|
|
NOTION_AVAILABLE = False
|
|
Client = None
|
|
APIResponseError = Exception
|
|
|
|
from config import settings
|
|
|
|
|
|
class NotionService:
|
|
"""Enhanced Notion API integration service"""
|
|
|
|
def __init__(self):
|
|
self.logger = logging.getLogger(__name__)
|
|
self._client: Optional[Client] = None
|
|
self._database_id: Optional[str] = None
|
|
|
|
def configure(self, token: str, database_id: str) -> None:
|
|
"""Configure Notion with official SDK"""
|
|
if not NOTION_AVAILABLE:
|
|
self.logger.error(
|
|
"notion-client not installed. Install with: pip install notion-client"
|
|
)
|
|
return
|
|
|
|
self._client = Client(auth=token)
|
|
self._database_id = database_id
|
|
self.logger.info("Notion service configured with official SDK")
|
|
|
|
@property
|
|
def is_configured(self) -> bool:
|
|
"""Check if Notion is configured"""
|
|
return bool(self._client and self._database_id and NOTION_AVAILABLE)
|
|
|
|
def _rate_limited_request(self, func, *args, **kwargs):
|
|
"""Execute request with rate limiting and retry"""
|
|
max_retries = 3
|
|
base_delay = 1
|
|
|
|
for attempt in range(max_retries):
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except APIResponseError as e:
|
|
if hasattr(e, "code") and e.code == "rate_limited":
|
|
delay = base_delay * (2**attempt)
|
|
self.logger.warning(f"Rate limited by Notion, waiting {delay}s")
|
|
time.sleep(delay)
|
|
else:
|
|
raise
|
|
|
|
raise Exception("Max retries exceeded for Notion API")
|
|
|
|
def create_page_with_summary(
|
|
self, title: str, summary: str, metadata: Dict[str, Any]
|
|
) -> Optional[str]:
|
|
"""Create a new page in Notion (database or parent page) with summary content"""
|
|
if not self.is_configured:
|
|
self.logger.warning("Notion not configured, skipping upload")
|
|
return None
|
|
|
|
try:
|
|
# Determinar si es database o página padre
|
|
use_as_page = metadata.get("use_as_page", False)
|
|
|
|
if use_as_page:
|
|
# Crear página dentro de otra página
|
|
page = self._rate_limited_request(
|
|
self._client.pages.create,
|
|
parent={"page_id": self._database_id},
|
|
properties={"title": [{"text": {"content": title[:100]}}]},
|
|
)
|
|
else:
|
|
# Crear página en database (método original)
|
|
properties = {"Name": {"title": [{"text": {"content": title[:100]}}]}}
|
|
|
|
# Agregar status si la DB lo soporta
|
|
if metadata.get("add_status", True):
|
|
properties["Status"] = {"select": {"name": "Procesado"}}
|
|
|
|
# Agregar tipo de archivo si está disponible Y add_status está habilitado
|
|
if metadata.get("add_status", False) and metadata.get("file_type"):
|
|
properties["Tipo"] = {
|
|
"select": {" name": metadata["file_type"].upper()}
|
|
}
|
|
|
|
page = self._rate_limited_request(
|
|
self._client.pages.create,
|
|
parent={"database_id": self._database_id},
|
|
properties=properties,
|
|
)
|
|
|
|
page_id = page["id"]
|
|
self.logger.info(f"✅ Notion page created: {page_id}")
|
|
|
|
# Agregar contenido del resumen como bloques
|
|
self._add_summary_content(page_id, summary, metadata.get("pdf_path"))
|
|
|
|
return page_id
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"❌ Error creating Notion page: {e}")
|
|
return None
|
|
|
|
try:
|
|
# Preparar properties de la página
|
|
properties = {
|
|
"Name": {
|
|
"title": [
|
|
{
|
|
"text": {
|
|
"content": title[:100] # Notion limit
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|
|
|
|
# Agregar status si la DB lo soporta
|
|
if metadata.get("add_status", True):
|
|
properties["Status"] = {"select": {"name": "Procesado"}}
|
|
|
|
# Agregar tipo de archivo si está disponible
|
|
if metadata.get("file_type"):
|
|
properties["Tipo"] = {"select": {"name": metadata["file_type"].upper()}}
|
|
|
|
# Crear página
|
|
page = self._rate_limited_request(
|
|
self._client.pages.create,
|
|
parent={"database_id": self._database_id},
|
|
properties=properties,
|
|
)
|
|
|
|
page_id = page["id"]
|
|
self.logger.info(f"✅ Notion page created: {page_id}")
|
|
|
|
# Agregar contenido del resumen como bloques
|
|
self._add_summary_content(page_id, summary, metadata.get("pdf_path"))
|
|
|
|
return page_id
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"❌ Error creating Notion page: {e}")
|
|
return None
|
|
|
|
def _add_summary_content(
|
|
self, page_id: str, summary: str, pdf_path: Optional[Path] = None
|
|
) -> bool:
|
|
"""Add summary content as Notion blocks"""
|
|
try:
|
|
blocks = []
|
|
|
|
# Agregar nota sobre el PDF si existe
|
|
if pdf_path and pdf_path.exists():
|
|
blocks.append(
|
|
{
|
|
"object": "block",
|
|
"type": "callout",
|
|
"callout": {
|
|
"rich_text": [
|
|
{
|
|
"type": "text",
|
|
"text": {
|
|
"content": f"📄 Documento generado automáticamente: {pdf_path.name}"
|
|
},
|
|
}
|
|
],
|
|
"icon": {"emoji": "📄"},
|
|
},
|
|
}
|
|
)
|
|
|
|
# Agregar bloques del resumen
|
|
summary_blocks = self._parse_markdown_to_blocks(summary)
|
|
blocks.extend(summary_blocks)
|
|
|
|
# Agregar footer
|
|
blocks.append({"object": "block", "type": "divider", "divider": {}})
|
|
blocks.append(
|
|
{
|
|
"object": "block",
|
|
"type": "paragraph",
|
|
"paragraph": {
|
|
"rich_text": [
|
|
{
|
|
"type": "text",
|
|
"text": {
|
|
"content": f"Generado por CBCFacil el {datetime.now().strftime('%d/%m/%Y %H:%M')}"
|
|
},
|
|
"annotations": {"italic": True, "color": "gray"},
|
|
}
|
|
]
|
|
},
|
|
}
|
|
)
|
|
|
|
# Notion API limita a 100 bloques por request
|
|
if blocks:
|
|
for i in range(0, len(blocks), 100):
|
|
batch = blocks[i : i + 100]
|
|
self._rate_limited_request(
|
|
self._client.blocks.children.append,
|
|
block_id=page_id,
|
|
children=batch,
|
|
)
|
|
self.logger.info(f"✅ Added {len(blocks)} blocks to Notion page")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"❌ Error adding content blocks: {e}")
|
|
return False
|
|
|
|
def _parse_markdown_to_blocks(self, markdown: str) -> List[Dict]:
|
|
"""Convert markdown to Notion blocks"""
|
|
blocks = []
|
|
lines = markdown.split("\n")
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
|
|
if not line:
|
|
continue
|
|
|
|
# Headings
|
|
if line.startswith("# "):
|
|
text = line[2:].strip()[:2000]
|
|
if text:
|
|
blocks.append(
|
|
{
|
|
"object": "block",
|
|
"type": "heading_1",
|
|
"heading_1": {
|
|
"rich_text": [
|
|
{"type": "text", "text": {"content": text}}
|
|
]
|
|
},
|
|
}
|
|
)
|
|
elif line.startswith("## "):
|
|
text = line[3:].strip()[:2000]
|
|
if text:
|
|
blocks.append(
|
|
{
|
|
"object": "block",
|
|
"type": "heading_2",
|
|
"heading_2": {
|
|
"rich_text": [
|
|
{"type": "text", "text": {"content": text}}
|
|
]
|
|
},
|
|
}
|
|
)
|
|
elif line.startswith("### "):
|
|
text = line[4:].strip()[:2000]
|
|
if text:
|
|
blocks.append(
|
|
{
|
|
"object": "block",
|
|
"type": "heading_3",
|
|
"heading_3": {
|
|
"rich_text": [
|
|
{"type": "text", "text": {"content": text}}
|
|
]
|
|
},
|
|
}
|
|
)
|
|
# Bullet points
|
|
elif line.startswith("- ") or line.startswith("* "):
|
|
text = line[2:].strip()[:2000]
|
|
if text:
|
|
blocks.append(
|
|
{
|
|
"object": "block",
|
|
"type": "bulleted_list_item",
|
|
"bulleted_list_item": {
|
|
"rich_text": [
|
|
{"type": "text", "text": {"content": text}}
|
|
]
|
|
},
|
|
}
|
|
)
|
|
# Divider
|
|
elif line.strip() == "---":
|
|
blocks.append({"object": "block", "type": "divider", "divider": {}})
|
|
# Paragraph (skip footer lines)
|
|
elif not line.startswith("*Generado por"):
|
|
text = line[:2000]
|
|
if text:
|
|
blocks.append(
|
|
{
|
|
"object": "block",
|
|
"type": "paragraph",
|
|
"paragraph": {
|
|
"rich_text": [
|
|
{"type": "text", "text": {"content": text}}
|
|
]
|
|
},
|
|
}
|
|
)
|
|
|
|
return blocks
|
|
|
|
def upload_pdf_legacy(self, pdf_path: Path, title: str) -> bool:
|
|
"""Legacy method - creates simple page (backward compatibility)"""
|
|
if not self.is_configured:
|
|
self.logger.warning("Notion not configured, skipping upload")
|
|
return False
|
|
|
|
try:
|
|
# Crear página simple
|
|
page_id = self.create_page_with_summary(
|
|
title=title,
|
|
summary=f"Documento procesado: {title}",
|
|
metadata={"file_type": "PDF", "pdf_path": pdf_path},
|
|
)
|
|
|
|
return bool(page_id)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error uploading PDF to Notion: {e}")
|
|
return False
|
|
|
|
# Alias para backward compatibility
|
|
def upload_pdf(self, pdf_path: Path, title: str) -> bool:
|
|
"""Upload PDF info to Notion (alias for backward compatibility)"""
|
|
return self.upload_pdf_legacy(pdf_path, title)
|
|
|
|
def upload_pdf_as_file(self, pdf_path: Path, title: str) -> bool:
|
|
"""Upload PDF info as file (alias for backward compatibility)"""
|
|
return self.upload_pdf_legacy(pdf_path, title)
|
|
|
|
|
|
# Global instance
|
|
notion_service = NotionService()
|
|
|
|
|
|
def upload_to_notion(pdf_path: Path, title: str) -> bool:
|
|
"""Legacy function for backward compatibility"""
|
|
return notion_service.upload_pdf(pdf_path, title)
|