feat: Integración automática con Notion + análisis completo del código
- Instalado notion-client SDK oficial para integración robusta - Refactorizado services/notion_service.py con SDK oficial de Notion - Rate limiting con retry y exponential backoff - Parser Markdown → Notion blocks (headings, bullets, paragraphs) - Soporte para pages y databases - Manejo robusto de errores - Integración automática en document/generators.py - PDFs se suben automáticamente a Notion después de generarse - Contenido completo del resumen formateado con bloques - Metadata rica (tipo de archivo, path, fecha) - Configuración de Notion en main.py - Inicialización automática al arrancar el servicio - Validación de credenciales - Actualizado config/settings.py - Agregado load_dotenv() para cargar variables de .env - Configuración de Notion (NOTION_API, NOTION_DATABASE_ID) - Scripts de utilidad creados: - test_notion_integration.py: Test de subida a Notion - test_pipeline_notion.py: Test del pipeline completo - verify_notion_permissions.py: Verificación de permisos - list_notion_pages.py: Listar páginas accesibles - diagnose_notion.py: Diagnóstico completo - create_notion_database.py: Crear database automáticamente - restart_service.sh: Script de reinicio del servicio - Documentación completa en opus.md: - Análisis exhaustivo del codebase (42 archivos Python) - Bugs críticos identificados y soluciones - Mejoras de seguridad (autenticación, rate limiting, CORS, CSP) - Optimizaciones de rendimiento (Celery, Redis, PostgreSQL, WebSockets) - Plan de testing (estructura, ejemplos, 80% coverage goal) - Roadmap de implementación (6 sprints detallados) - Integración avanzada con Notion documentada Estado: Notion funcionando correctamente, PDFs se suben automáticamente
This commit is contained in:
@@ -40,6 +40,14 @@ GEMINI_CLI_PATH=/path/to/gemini # or leave empty
|
||||
TELEGRAM_TOKEN=your_telegram_bot_token
|
||||
TELEGRAM_CHAT_ID=your_telegram_chat_id
|
||||
|
||||
# =============================================================================
|
||||
# Notion Integration (Optional - for automatic PDF uploads)
|
||||
# =============================================================================
|
||||
# Get your token from: https://developers.notion.com/docs/create-a-notion-integration
|
||||
NOTION_API=ntn_YOUR_NOTION_INTEGRATION_TOKEN_HERE
|
||||
# Get your database ID from the database URL in Notion
|
||||
NOTION_DATABASE_ID=your_database_id_here
|
||||
|
||||
# =============================================================================
|
||||
# Dashboard Configuration (Required for production)
|
||||
# =============================================================================
|
||||
|
||||
@@ -1,13 +1,19 @@
|
||||
"""
|
||||
Centralized configuration management for CBCFacil
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional, Set, Union
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class ConfigurationError(Exception):
|
||||
"""Raised when configuration is invalid"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@@ -44,7 +50,9 @@ class Settings:
|
||||
POLL_INTERVAL: int = int(os.getenv("POLL_INTERVAL", "5"))
|
||||
HTTP_TIMEOUT: int = int(os.getenv("HTTP_TIMEOUT", "30"))
|
||||
WEBDAV_MAX_RETRIES: int = int(os.getenv("WEBDAV_MAX_RETRIES", "3"))
|
||||
DOWNLOAD_CHUNK_SIZE: int = int(os.getenv("DOWNLOAD_CHUNK_SIZE", "65536")) # 64KB for better performance
|
||||
DOWNLOAD_CHUNK_SIZE: int = int(
|
||||
os.getenv("DOWNLOAD_CHUNK_SIZE", "65536")
|
||||
) # 64KB for better performance
|
||||
MAX_FILENAME_LENGTH: int = int(os.getenv("MAX_FILENAME_LENGTH", "80"))
|
||||
MAX_FILENAME_BASE_LENGTH: int = int(os.getenv("MAX_FILENAME_BASE_LENGTH", "40"))
|
||||
MAX_FILENAME_TOPICS_LENGTH: int = int(os.getenv("MAX_FILENAME_TOPICS_LENGTH", "20"))
|
||||
@@ -57,7 +65,13 @@ class Settings:
|
||||
# AI Providers
|
||||
ZAI_BASE_URL: str = os.getenv("ZAI_BASE_URL", "https://api.z.ai/api/anthropic")
|
||||
ZAI_DEFAULT_MODEL: str = os.getenv("ZAI_MODEL", "glm-4.6")
|
||||
ZAI_AUTH_TOKEN: Optional[str] = os.getenv("ANTHROPIC_AUTH_TOKEN") or os.getenv("ZAI_AUTH_TOKEN", "")
|
||||
ZAI_AUTH_TOKEN: Optional[str] = os.getenv("ANTHROPIC_AUTH_TOKEN") or os.getenv(
|
||||
"ZAI_AUTH_TOKEN", ""
|
||||
)
|
||||
|
||||
# Notion Integration
|
||||
NOTION_API_TOKEN: Optional[str] = os.getenv("NOTION_API")
|
||||
NOTION_DATABASE_ID: Optional[str] = os.getenv("NOTION_DATABASE_ID")
|
||||
|
||||
# Gemini
|
||||
GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
|
||||
@@ -76,13 +90,25 @@ class Settings:
|
||||
CPU_COUNT: int = os.cpu_count() or 1
|
||||
PDF_MAX_PAGES_PER_CHUNK: int = int(os.getenv("PDF_MAX_PAGES_PER_CHUNK", "2"))
|
||||
PDF_DPI: int = int(os.getenv("PDF_DPI", "200"))
|
||||
PDF_RENDER_THREAD_COUNT: int = int(os.getenv("PDF_RENDER_THREAD_COUNT", str(min(4, CPU_COUNT))))
|
||||
PDF_RENDER_THREAD_COUNT: int = int(
|
||||
os.getenv("PDF_RENDER_THREAD_COUNT", str(min(4, CPU_COUNT)))
|
||||
)
|
||||
PDF_BATCH_SIZE: int = int(os.getenv("PDF_BATCH_SIZE", "2"))
|
||||
PDF_TROCR_MAX_BATCH: int = int(os.getenv("PDF_TROCR_MAX_BATCH", str(PDF_BATCH_SIZE)))
|
||||
PDF_TESSERACT_THREADS: int = int(os.getenv("PDF_TESSERACT_THREADS", str(max(1, min(2, max(1, CPU_COUNT // 3))))))
|
||||
PDF_PREPROCESS_THREADS: int = int(os.getenv("PDF_PREPROCESS_THREADS", str(PDF_TESSERACT_THREADS)))
|
||||
PDF_TEXT_DETECTION_MIN_RATIO: float = float(os.getenv("PDF_TEXT_DETECTION_MIN_RATIO", "0.6"))
|
||||
PDF_TEXT_DETECTION_MIN_AVG_CHARS: int = int(os.getenv("PDF_TEXT_DETECTION_MIN_AVG_CHARS", "120"))
|
||||
PDF_TROCR_MAX_BATCH: int = int(
|
||||
os.getenv("PDF_TROCR_MAX_BATCH", str(PDF_BATCH_SIZE))
|
||||
)
|
||||
PDF_TESSERACT_THREADS: int = int(
|
||||
os.getenv("PDF_TESSERACT_THREADS", str(max(1, min(2, max(1, CPU_COUNT // 3)))))
|
||||
)
|
||||
PDF_PREPROCESS_THREADS: int = int(
|
||||
os.getenv("PDF_PREPROCESS_THREADS", str(PDF_TESSERACT_THREADS))
|
||||
)
|
||||
PDF_TEXT_DETECTION_MIN_RATIO: float = float(
|
||||
os.getenv("PDF_TEXT_DETECTION_MIN_RATIO", "0.6")
|
||||
)
|
||||
PDF_TEXT_DETECTION_MIN_AVG_CHARS: int = int(
|
||||
os.getenv("PDF_TEXT_DETECTION_MIN_AVG_CHARS", "120")
|
||||
)
|
||||
|
||||
# Error handling
|
||||
ERROR_THROTTLE_SECONDS: int = int(os.getenv("ERROR_THROTTLE_SECONDS", "600"))
|
||||
@@ -90,7 +116,9 @@ class Settings:
|
||||
# GPU/VRAM Management
|
||||
MODEL_TIMEOUT_SECONDS: int = int(os.getenv("MODEL_TIMEOUT_SECONDS", "300"))
|
||||
CUDA_VISIBLE_DEVICES: str = os.getenv("CUDA_VISIBLE_DEVICES", "all")
|
||||
PYTORCH_CUDA_ALLOC_CONF: str = os.getenv("PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:512")
|
||||
PYTORCH_CUDA_ALLOC_CONF: str = os.getenv(
|
||||
"PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:512"
|
||||
)
|
||||
|
||||
# GPU Detection (auto, nvidia, amd, cpu)
|
||||
GPU_PREFERENCE: str = os.getenv("GPU_PREFERENCE", "auto")
|
||||
@@ -127,17 +155,29 @@ class Settings:
|
||||
@property
|
||||
def has_ai_config(self) -> bool:
|
||||
"""Check if AI providers are configured"""
|
||||
return any([
|
||||
return any(
|
||||
[
|
||||
self.ZAI_AUTH_TOKEN,
|
||||
self.GEMINI_API_KEY,
|
||||
self.CLAUDE_CLI_PATH,
|
||||
self.GEMINI_CLI_PATH
|
||||
])
|
||||
self.GEMINI_CLI_PATH,
|
||||
]
|
||||
)
|
||||
|
||||
@property
|
||||
def has_notion_config(self) -> bool:
|
||||
"""Check if Notion is configured"""
|
||||
return bool(self.NOTION_API_TOKEN and self.NOTION_DATABASE_ID)
|
||||
|
||||
@property
|
||||
def processed_files_path(self) -> Path:
|
||||
"""Get the path to the processed files registry"""
|
||||
return Path(os.getenv("PROCESSED_FILES_PATH", str(Path(self.LOCAL_STATE_DIR) / "processed_files.txt")))
|
||||
return Path(
|
||||
os.getenv(
|
||||
"PROCESSED_FILES_PATH",
|
||||
str(Path(self.LOCAL_STATE_DIR) / "processed_files.txt"),
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def nextcloud_url(self) -> str:
|
||||
@@ -157,7 +197,9 @@ class Settings:
|
||||
def nextcloud_password(self) -> str:
|
||||
"""Get Nextcloud password with validation"""
|
||||
if not self.NEXTCLOUD_PASSWORD and self.is_production:
|
||||
raise ConfigurationError("NEXTCLOUD_PASSWORD is required in production mode")
|
||||
raise ConfigurationError(
|
||||
"NEXTCLOUD_PASSWORD is required in production mode"
|
||||
)
|
||||
return self.NEXTCLOUD_PASSWORD
|
||||
|
||||
@property
|
||||
@@ -181,6 +223,7 @@ class Settings:
|
||||
"""Check if GPU support is available"""
|
||||
try:
|
||||
import torch
|
||||
|
||||
return torch.cuda.is_available()
|
||||
except ImportError:
|
||||
return False
|
||||
@@ -203,7 +246,7 @@ class Settings:
|
||||
"telegram_configured": self.telegram_configured,
|
||||
"gpu_support": self.has_gpu_support,
|
||||
"cpu_count": self.CPU_COUNT,
|
||||
"poll_interval": self.POLL_INTERVAL
|
||||
"poll_interval": self.POLL_INTERVAL,
|
||||
}
|
||||
|
||||
|
||||
|
||||
126
create_notion_database.py
Normal file
126
create_notion_database.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script para crear una nueva base de datos de Notion y compartirla automáticamente
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from config import settings
|
||||
from notion_client import Client
|
||||
|
||||
|
||||
def main():
|
||||
print("\n" + "=" * 70)
|
||||
print("🛠️ CREAR BASE DE DATOS DE NOTION PARA CBCFACIL")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
token = settings.NOTION_API_TOKEN
|
||||
if not token:
|
||||
print("❌ Token no configurado en .env")
|
||||
return
|
||||
|
||||
client = Client(auth=token)
|
||||
|
||||
# Primero, buscar una página donde crear la database
|
||||
print("🔍 Buscando páginas accesibles...\n")
|
||||
results = client.search(page_size=100)
|
||||
pages = [p for p in results.get("results", []) if p.get("object") == "page"]
|
||||
|
||||
if not pages:
|
||||
print("❌ No tienes páginas accesibles.")
|
||||
print("\n📋 SOLUCIÓN:")
|
||||
print("1. Ve a Notion y crea una nueva página")
|
||||
print("2. En esa página, click en 'Share'")
|
||||
print("3. Busca y agrega tu integración")
|
||||
print("4. Ejecuta este script nuevamente\n")
|
||||
return
|
||||
|
||||
# Mostrar páginas disponibles
|
||||
print(f"✅ Encontradas {len(pages)} página(s) accesibles:\n")
|
||||
for i, page in enumerate(pages[:10], 1):
|
||||
page_id = page.get("id")
|
||||
props = page.get("properties", {})
|
||||
|
||||
# Intentar obtener el título
|
||||
title = "Sin título"
|
||||
for prop_name, prop_data in props.items():
|
||||
if prop_data.get("type") == "title":
|
||||
title_list = prop_data.get("title", [])
|
||||
if title_list:
|
||||
title = title_list[0].get("plain_text", "Sin título")
|
||||
break
|
||||
|
||||
print(f"{i}. {title[:50]}")
|
||||
print(f" ID: {page_id}\n")
|
||||
|
||||
# Usar la primera página accesible
|
||||
parent_page = pages[0]
|
||||
parent_id = parent_page.get("id")
|
||||
|
||||
print("=" * 70)
|
||||
print(f"📄 Voy a crear la base de datos dentro de la primera página")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
try:
|
||||
# Crear la base de datos
|
||||
print("🚀 Creando base de datos 'CBCFacil - Documentos'...\n")
|
||||
|
||||
database = client.databases.create(
|
||||
parent={"page_id": parent_id},
|
||||
title=[
|
||||
{
|
||||
"type": "text",
|
||||
"text": {"content": "CBCFacil - Documentos Procesados"},
|
||||
}
|
||||
],
|
||||
properties={
|
||||
"Name": {"title": {}},
|
||||
"Status": {
|
||||
"select": {
|
||||
"options": [
|
||||
{"name": "Procesado", "color": "green"},
|
||||
{"name": "En Proceso", "color": "yellow"},
|
||||
{"name": "Error", "color": "red"},
|
||||
]
|
||||
}
|
||||
},
|
||||
"Tipo": {
|
||||
"select": {
|
||||
"options": [
|
||||
{"name": "AUDIO", "color": "purple"},
|
||||
{"name": "PDF", "color": "orange"},
|
||||
{"name": "TEXTO", "color": "gray"},
|
||||
]
|
||||
}
|
||||
},
|
||||
"Fecha": {"date": {}},
|
||||
},
|
||||
)
|
||||
|
||||
db_id = database["id"]
|
||||
|
||||
print("✅ ¡Base de datos creada exitosamente!")
|
||||
print("=" * 70)
|
||||
print(f"\n📊 Información de la base de datos:\n")
|
||||
print(f" Nombre: CBCFacil - Documentos Procesados")
|
||||
print(f" ID: {db_id}")
|
||||
print(f" URL: https://notion.so/{db_id.replace('-', '')}")
|
||||
print("\n=" * 70)
|
||||
print("\n🎯 SIGUIENTE PASO:")
|
||||
print("=" * 70)
|
||||
print(f"\nActualiza tu archivo .env con:\n")
|
||||
print(f"NOTION_DATABASE_ID={db_id}\n")
|
||||
print("Luego ejecuta:")
|
||||
print("python test_notion_integration.py\n")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error creando base de datos: {e}")
|
||||
print("\nVerifica que la integración tenga permisos de escritura.\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
116
diagnose_notion.py
Normal file
116
diagnose_notion.py
Normal file
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script para diagnosticar la integración de Notion
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from config import settings
|
||||
from notion_client import Client
|
||||
|
||||
|
||||
def main():
|
||||
print("\n" + "=" * 70)
|
||||
print("🔍 DIAGNÓSTICO COMPLETO DE NOTION")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
token = settings.NOTION_API_TOKEN
|
||||
database_id = settings.NOTION_DATABASE_ID
|
||||
|
||||
print(f"Token: {token[:30]}..." if token else "❌ Token no configurado")
|
||||
print(f"Database ID: {database_id}\n")
|
||||
|
||||
if not token:
|
||||
print("❌ Configura NOTION_API en .env\n")
|
||||
return
|
||||
|
||||
client = Client(auth=token)
|
||||
|
||||
# Test 1: Verificar que el token sea válido
|
||||
print("📝 Test 1: Verificando token...")
|
||||
try:
|
||||
# Intentar buscar páginas (cualquiera)
|
||||
results = client.search(query="", page_size=1)
|
||||
print("✅ Token válido - la integración está activa\n")
|
||||
|
||||
# Ver si tiene acceso a alguna página
|
||||
pages = results.get("results", [])
|
||||
if pages:
|
||||
print(f"✅ La integración tiene acceso a {len(pages)} página(s)")
|
||||
for page in pages[:3]:
|
||||
page_id = page.get("id", "N/A")
|
||||
page_type = page.get("object", "N/A")
|
||||
print(f" - {page_type}: {page_id}")
|
||||
else:
|
||||
print("⚠️ La integración NO tiene acceso a ninguna página aún")
|
||||
print(" Esto es normal si acabas de crear la integración.\n")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error con el token: {e}\n")
|
||||
return
|
||||
|
||||
# Test 2: Verificar acceso a la base de datos específica
|
||||
print("\n📊 Test 2: Verificando acceso a la base de datos CBC...")
|
||||
try:
|
||||
database = client.databases.retrieve(database_id=database_id)
|
||||
print("✅ ¡ÉXITO! La integración puede acceder a la base de datos\n")
|
||||
|
||||
title = database.get("title", [{}])[0].get("plain_text", "Sin título")
|
||||
print(f" Título: {title}")
|
||||
print(f" ID: {database['id']}")
|
||||
print(f"\n Propiedades:")
|
||||
for prop_name in database.get("properties", {}).keys():
|
||||
print(f" ✓ {prop_name}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("✅ TODO CONFIGURADO CORRECTAMENTE")
|
||||
print("=" * 70)
|
||||
print("\n🚀 Ejecuta: python test_notion_integration.py\n")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
print(f"❌ No se puede acceder a la base de datos")
|
||||
print(f" Error: {error_msg}\n")
|
||||
|
||||
if "Could not find database" in error_msg:
|
||||
print("=" * 70)
|
||||
print("⚠️ ACCIÓN REQUERIDA: Compartir la base de datos")
|
||||
print("=" * 70)
|
||||
print("\n📋 PASOS DETALLADOS:\n")
|
||||
print("1. Abre Notion en tu navegador")
|
||||
print("\n2. Ve a tu base de datos 'CBC'")
|
||||
print(f" Opción A: Usa este link directo:")
|
||||
print(f" → https://www.notion.so/{database_id.replace('-', '')}")
|
||||
print(f"\n Opción B: Busca 'CBC' en tu workspace")
|
||||
print("\n3. En la página de la base de datos, busca el botón '...' ")
|
||||
print(" (tres puntos) en la esquina SUPERIOR DERECHA")
|
||||
print("\n4. En el menú que se abre, busca:")
|
||||
print(" • 'Connections' (en inglés)")
|
||||
print(" • 'Conexiones' (en español)")
|
||||
print(" • 'Connect to' (puede variar)")
|
||||
print("\n5. Haz click y verás un menú de integraciones")
|
||||
print("\n6. Busca tu integración en la lista")
|
||||
print(" (Debería tener el nombre que le pusiste al crearla)")
|
||||
print("\n7. Haz click en tu integración para activarla")
|
||||
print("\n8. Confirma los permisos cuando te lo pida")
|
||||
print("\n9. Deberías ver un mensaje confirmando la conexión")
|
||||
print("\n10. ¡Listo! Vuelve a ejecutar:")
|
||||
print(" python verify_notion_permissions.py\n")
|
||||
print("=" * 70)
|
||||
|
||||
# Crear una página de prueba simple para verificar
|
||||
print("\n💡 ALTERNATIVA: Crear una nueva página de prueba\n")
|
||||
print("Si no encuentras la opción de conexiones en tu base de datos,")
|
||||
print("puedes crear una página nueva y compartirla con la integración:\n")
|
||||
print("1. Crea una nueva página en Notion")
|
||||
print("2. En esa página, click en 'Share' (Compartir)")
|
||||
print("3. Busca tu integración y agrégala")
|
||||
print("4. Luego convierte esa página en una base de datos")
|
||||
print("5. Usa el ID de esa nueva base de datos\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,6 +1,7 @@
|
||||
"""
|
||||
Document generation utilities
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
@@ -17,7 +18,9 @@ class DocumentGenerator:
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.ai_provider = ai_provider_factory.get_best_provider()
|
||||
|
||||
def generate_summary(self, text: str, base_name: str) -> Tuple[bool, str, Dict[str, Any]]:
|
||||
def generate_summary(
|
||||
self, text: str, base_name: str
|
||||
) -> Tuple[bool, str, Dict[str, Any]]:
|
||||
"""Generate unified summary"""
|
||||
self.logger.info(f"Generating summary for {base_name}")
|
||||
|
||||
@@ -85,13 +88,16 @@ Instrucciones:
|
||||
|
||||
# Use generic Gemini provider for formatting as requested
|
||||
from services.ai.gemini_provider import GeminiProvider
|
||||
|
||||
formatter = GeminiProvider()
|
||||
|
||||
try:
|
||||
if formatter.is_available():
|
||||
summary = formatter.generate_text(format_prompt)
|
||||
else:
|
||||
self.logger.warning("Gemini formatter not available, using raw summary")
|
||||
self.logger.warning(
|
||||
"Gemini formatter not available, using raw summary"
|
||||
)
|
||||
summary = raw_summary
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Formatting failed ({e}), using raw summary")
|
||||
@@ -105,13 +111,51 @@ Instrucciones:
|
||||
docx_path = self._create_docx(summary, base_name)
|
||||
pdf_path = self._create_pdf(summary, base_name)
|
||||
|
||||
# Upload to Notion if configured
|
||||
from services.notion_service import notion_service
|
||||
|
||||
notion_uploaded = False
|
||||
notion_page_id = None
|
||||
if settings.has_notion_config:
|
||||
try:
|
||||
title = base_name.replace("_", " ").title()
|
||||
|
||||
# Crear página con el contenido completo del resumen
|
||||
notion_metadata = {
|
||||
"file_type": "Audio", # O 'PDF' dependiendo del origen
|
||||
"pdf_path": pdf_path,
|
||||
"add_status": False, # No usar Status/Tipo (no existen en la DB)
|
||||
"use_as_page": False, # Usar como database, no página
|
||||
}
|
||||
|
||||
notion_page_id = notion_service.create_page_with_summary(
|
||||
title=title, summary=summary, metadata=notion_metadata
|
||||
)
|
||||
|
||||
if notion_page_id:
|
||||
notion_uploaded = True
|
||||
self.logger.info(
|
||||
f"✅ Resumen subido a Notion: {title} (ID: {notion_page_id})"
|
||||
)
|
||||
else:
|
||||
self.logger.warning(f"⚠️ No se pudo subir a Notion: {title}")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"❌ Error al subir a Notion: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
else:
|
||||
self.logger.info("Notion not configured - skipping upload")
|
||||
|
||||
metadata = {
|
||||
'markdown_path': str(markdown_path),
|
||||
'docx_path': str(docx_path),
|
||||
'pdf_path': str(pdf_path),
|
||||
'docx_name': Path(docx_path).name,
|
||||
'summary': summary,
|
||||
'filename': filename
|
||||
"markdown_path": str(markdown_path),
|
||||
"docx_path": str(docx_path),
|
||||
"pdf_path": str(pdf_path),
|
||||
"docx_name": Path(docx_path).name,
|
||||
"summary": summary,
|
||||
"filename": filename,
|
||||
"notion_uploaded": notion_uploaded,
|
||||
"notion_page_id": notion_page_id,
|
||||
}
|
||||
|
||||
return True, summary, metadata
|
||||
@@ -129,17 +173,21 @@ Summary: {summary}
|
||||
|
||||
Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
|
||||
topics_text = self.ai_provider.sanitize_input(prompt) if hasattr(self.ai_provider, 'sanitize_input') else summary[:100]
|
||||
topics_text = (
|
||||
self.ai_provider.sanitize_input(prompt)
|
||||
if hasattr(self.ai_provider, "sanitize_input")
|
||||
else summary[:100]
|
||||
)
|
||||
|
||||
# Simple topic extraction
|
||||
topics = re.findall(r'\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b', topics_text)[:3]
|
||||
topics = re.findall(r"\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b", topics_text)[:3]
|
||||
if not topics:
|
||||
topics = ['documento']
|
||||
topics = ["documento"]
|
||||
|
||||
# Limit topic length
|
||||
topics = [t[: settings.MAX_FILENAME_TOPICS_LENGTH] for t in topics]
|
||||
|
||||
filename = '_'.join(topics)[:settings.MAX_FILENAME_LENGTH]
|
||||
filename = "_".join(topics)[: settings.MAX_FILENAME_LENGTH]
|
||||
return filename
|
||||
|
||||
except Exception as e:
|
||||
@@ -153,7 +201,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
|
||||
output_path = output_dir / f"{base_name}_unificado.md"
|
||||
|
||||
content = f"""# {base_name.replace('_', ' ').title()}
|
||||
content = f"""# {base_name.replace("_", " ").title()}
|
||||
|
||||
## Resumen
|
||||
|
||||
@@ -164,7 +212,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
*Generado por CBCFacil*
|
||||
"""
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
return output_path
|
||||
@@ -183,7 +231,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
output_path = output_dir / f"{base_name}_unificado.docx"
|
||||
|
||||
doc = Document()
|
||||
doc.add_heading(base_name.replace('_', ' ').title(), 0)
|
||||
doc.add_heading(base_name.replace("_", " ").title(), 0)
|
||||
|
||||
# Parse and render Markdown content line by line
|
||||
lines = summary.splitlines()
|
||||
@@ -193,41 +241,43 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p = doc.add_paragraph(" ".join(current_paragraph))
|
||||
p.alignment = 3 # JUSTIFY alignment (WD_ALIGN_PARAGRAPH.JUSTIFY=3)
|
||||
current_paragraph = []
|
||||
continue
|
||||
|
||||
if line.startswith('#'):
|
||||
if line.startswith("#"):
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p = doc.add_paragraph(" ".join(current_paragraph))
|
||||
p.alignment = 3
|
||||
current_paragraph = []
|
||||
# Process heading
|
||||
level = len(line) - len(line.lstrip('#'))
|
||||
heading_text = line.lstrip('#').strip()
|
||||
level = len(line) - len(line.lstrip("#"))
|
||||
heading_text = line.lstrip("#").strip()
|
||||
if level <= 6:
|
||||
doc.add_heading(heading_text, level=level)
|
||||
else:
|
||||
current_paragraph.append(heading_text)
|
||||
elif line.startswith('-') or line.startswith('*') or line.startswith('•'):
|
||||
elif line.startswith("-") or line.startswith("*") or line.startswith("•"):
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p = doc.add_paragraph(" ".join(current_paragraph))
|
||||
p.alignment = 3
|
||||
current_paragraph = []
|
||||
bullet_text = line.lstrip('-*• ').strip()
|
||||
p = doc.add_paragraph(bullet_text, style='List Bullet')
|
||||
bullet_text = line.lstrip("-*• ").strip()
|
||||
p = doc.add_paragraph(bullet_text, style="List Bullet")
|
||||
# Remove bold markers from bullets if present
|
||||
if '**' in bullet_text:
|
||||
if "**" in bullet_text:
|
||||
# Basic cleanup for bullets
|
||||
pass
|
||||
else:
|
||||
# Clean up excessive bold markers in body text if user requested
|
||||
clean_line = line.replace('**', '') # Removing asterisks as per user complaint "se abusa de los asteriscos"
|
||||
clean_line = line.replace(
|
||||
"**", ""
|
||||
) # Removing asterisks as per user complaint "se abusa de los asteriscos"
|
||||
current_paragraph.append(clean_line)
|
||||
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p = doc.add_paragraph(" ".join(current_paragraph))
|
||||
p.alignment = 3
|
||||
|
||||
doc.add_page_break()
|
||||
@@ -258,18 +308,20 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
def new_page():
|
||||
nonlocal y_position
|
||||
c.showPage()
|
||||
c.setFont('Helvetica', 11)
|
||||
c.setFont("Helvetica", 11)
|
||||
y_position = height - margin
|
||||
|
||||
c.setFont('Helvetica', 11)
|
||||
c.setFont("Helvetica", 11)
|
||||
|
||||
# Title
|
||||
c.setFont('Helvetica-Bold', 16)
|
||||
c.drawString(margin, y_position, base_name.replace('_', ' ').title()[:100])
|
||||
c.setFont("Helvetica-Bold", 16)
|
||||
c.drawString(margin, y_position, base_name.replace("_", " ").title()[:100])
|
||||
y_position -= 28
|
||||
c.setFont('Helvetica', 11)
|
||||
c.setFont("Helvetica", 11)
|
||||
|
||||
summary_clean = summary.replace('**', '') # Remove asterisks globally for cleaner PDF
|
||||
summary_clean = summary.replace(
|
||||
"**", ""
|
||||
) # Remove asterisks globally for cleaner PDF
|
||||
|
||||
for raw_line in summary_clean.splitlines():
|
||||
line = raw_line.rstrip()
|
||||
@@ -282,24 +334,24 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
|
||||
stripped = line.lstrip()
|
||||
|
||||
if stripped.startswith('#'):
|
||||
level = len(stripped) - len(stripped.lstrip('#'))
|
||||
heading_text = stripped.lstrip('#').strip()
|
||||
if stripped.startswith("#"):
|
||||
level = len(stripped) - len(stripped.lstrip("#"))
|
||||
heading_text = stripped.lstrip("#").strip()
|
||||
if heading_text:
|
||||
font_size = 16 if level == 1 else 14 if level == 2 else 12
|
||||
c.setFont('Helvetica-Bold', font_size)
|
||||
c.setFont("Helvetica-Bold", font_size)
|
||||
c.drawString(margin, y_position, heading_text[:90])
|
||||
y_position -= font_size + 6
|
||||
if y_position < margin:
|
||||
new_page()
|
||||
c.setFont('Helvetica', 11)
|
||||
c.setFont("Helvetica", 11)
|
||||
continue
|
||||
|
||||
if stripped.startswith(('-', '*', '•')):
|
||||
bullet_text = stripped.lstrip('-*•').strip()
|
||||
wrapped_lines = textwrap.wrap(bullet_text, width=80) or ['']
|
||||
if stripped.startswith(("-", "*", "•")):
|
||||
bullet_text = stripped.lstrip("-*•").strip()
|
||||
wrapped_lines = textwrap.wrap(bullet_text, width=80) or [""]
|
||||
for idx, wrapped in enumerate(wrapped_lines):
|
||||
prefix = '• ' if idx == 0 else ' '
|
||||
prefix = "• " if idx == 0 else " "
|
||||
c.drawString(margin, y_position, f"{prefix}{wrapped}")
|
||||
y_position -= 14
|
||||
if y_position < margin:
|
||||
@@ -307,7 +359,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
continue
|
||||
|
||||
# Body text - Justified approximation (ReportLab native justification requires Paragraph styles, defaulting to wrap)
|
||||
wrapped_lines = textwrap.wrap(stripped, width=90) or ['']
|
||||
wrapped_lines = textwrap.wrap(stripped, width=90) or [""]
|
||||
for wrapped in wrapped_lines:
|
||||
c.drawString(margin, y_position, wrapped)
|
||||
y_position -= 14
|
||||
|
||||
134
list_notion_pages.py
Normal file
134
list_notion_pages.py
Normal file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script para listar todas las páginas y bases de datos accesibles
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from config import settings
|
||||
from notion_client import Client
|
||||
|
||||
|
||||
def main():
|
||||
print("\n" + "=" * 70)
|
||||
print("📚 LISTANDO TODAS LAS PÁGINAS Y BASES DE DATOS")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
token = settings.NOTION_API_TOKEN
|
||||
client = Client(auth=token)
|
||||
|
||||
try:
|
||||
# Buscar todas las páginas sin filtro
|
||||
print("🔍 Buscando todas las páginas accesibles...\n")
|
||||
results = client.search(page_size=100)
|
||||
|
||||
all_items = results.get("results", [])
|
||||
|
||||
# Separar bases de datos y páginas
|
||||
databases = [item for item in all_items if item.get("object") == "database"]
|
||||
pages = [item for item in all_items if item.get("object") == "page"]
|
||||
|
||||
print(
|
||||
f"✅ Encontrados: {len(databases)} base(s) de datos y {len(pages)} página(s)\n"
|
||||
)
|
||||
|
||||
if databases:
|
||||
print("=" * 70)
|
||||
print("📊 BASES DE DATOS ENCONTRADAS:")
|
||||
print("=" * 70)
|
||||
|
||||
for i, db in enumerate(databases, 1):
|
||||
db_id = db.get("id", "N/A")
|
||||
title_list = db.get("title", [])
|
||||
title = (
|
||||
title_list[0].get("plain_text", "Sin título")
|
||||
if title_list
|
||||
else "Sin título"
|
||||
)
|
||||
|
||||
print(f"\n🔷 {i}. {title}")
|
||||
print(f" ID: {db_id}")
|
||||
print(f" URL: https://notion.so/{db_id.replace('-', '')}")
|
||||
|
||||
# Mostrar propiedades
|
||||
props = db.get("properties", {})
|
||||
if props:
|
||||
print(f" Propiedades:")
|
||||
for prop_name, prop_data in list(props.items())[:5]:
|
||||
prop_type = prop_data.get("type", "unknown")
|
||||
print(f" • {prop_name} ({prop_type})")
|
||||
if len(props) > 5:
|
||||
print(f" ... y {len(props) - 5} más")
|
||||
|
||||
print("-" * 70)
|
||||
|
||||
if pages:
|
||||
print("\n" + "=" * 70)
|
||||
print("📄 PÁGINAS ENCONTRADAS:")
|
||||
print("=" * 70)
|
||||
|
||||
for i, page in enumerate(pages, 1):
|
||||
page_id = page.get("id", "N/A")
|
||||
|
||||
# Intentar obtener el título
|
||||
title = "Sin título"
|
||||
props = page.get("properties", {})
|
||||
|
||||
# Buscar en diferentes ubicaciones del título
|
||||
if "title" in props:
|
||||
title_prop = props["title"]
|
||||
if "title" in title_prop:
|
||||
title_list = title_prop["title"]
|
||||
if title_list:
|
||||
title = title_list[0].get("plain_text", "Sin título")
|
||||
elif "Name" in props:
|
||||
name_prop = props["Name"]
|
||||
if "title" in name_prop:
|
||||
title_list = name_prop["title"]
|
||||
if title_list:
|
||||
title = title_list[0].get("plain_text", "Sin título")
|
||||
|
||||
print(f"\n🔷 {i}. {title}")
|
||||
print(f" ID: {page_id}")
|
||||
print(f" URL: https://notion.so/{page_id.replace('-', '')}")
|
||||
print("-" * 70)
|
||||
|
||||
if databases:
|
||||
print("\n" + "=" * 70)
|
||||
print("💡 SIGUIENTE PASO:")
|
||||
print("=" * 70)
|
||||
print("\nSi 'CBC' aparece arriba como BASE DE DATOS:")
|
||||
print("1. Copia el ID de la base de datos 'CBC'")
|
||||
print("2. Actualiza tu .env:")
|
||||
print(" NOTION_DATABASE_ID=<el_id_completo>")
|
||||
print("\nSi 'CBC' aparece como PÁGINA:")
|
||||
print("1. Abre la página en Notion")
|
||||
print("2. Busca una base de datos dentro de esa página")
|
||||
print("3. Haz click en '...' de la base de datos")
|
||||
print("4. Selecciona 'Copy link to view'")
|
||||
print("5. El ID estará en el URL copiado")
|
||||
print("\n4. Ejecuta: python test_notion_integration.py\n")
|
||||
else:
|
||||
print("\n⚠️ No se encontraron bases de datos accesibles.")
|
||||
print("\n📋 OPCIONES:")
|
||||
print("\n1. Crear una nueva base de datos:")
|
||||
print(" - Abre una de las páginas listadas arriba")
|
||||
print(" - Crea una tabla/database dentro")
|
||||
print(" - Copia el ID de esa base de datos")
|
||||
print("\n2. O comparte una base de datos existente:")
|
||||
print(" - Abre tu base de datos 'CBC' en Notion")
|
||||
print(" - Click en '...' > 'Connections'")
|
||||
print(" - Agrega tu integración\n")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}\n")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
186
main.py
186
main.py
@@ -3,6 +3,7 @@
|
||||
CBCFacil - Main Service Entry Point
|
||||
Unified AI service for document processing (audio, PDF, text)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
@@ -16,8 +17,10 @@ from typing import Optional
|
||||
|
||||
# Load environment variables from .env file
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# Configure logging with JSON formatter for production
|
||||
class JSONFormatter(logging.Formatter):
|
||||
"""JSON formatter for structured logging in production"""
|
||||
@@ -29,7 +32,7 @@ class JSONFormatter(logging.Formatter):
|
||||
"message": record.getMessage(),
|
||||
"module": record.module,
|
||||
"function": record.funcName,
|
||||
"line": record.lineno
|
||||
"line": record.lineno,
|
||||
}
|
||||
|
||||
# Add exception info if present
|
||||
@@ -55,9 +58,9 @@ def setup_logging() -> logging.Logger:
|
||||
if settings.is_production:
|
||||
console_handler.setFormatter(JSONFormatter())
|
||||
else:
|
||||
console_handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s [%(levelname)s] - %(name)s - %(message)s"
|
||||
))
|
||||
console_handler.setFormatter(
|
||||
logging.Formatter("%(asctime)s [%(levelname)s] - %(name)s - %(message)s")
|
||||
)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# File handler if configured
|
||||
@@ -74,9 +77,12 @@ logger = setup_logging()
|
||||
|
||||
def acquire_lock() -> int:
|
||||
"""Acquire single instance lock"""
|
||||
lock_file = Path(os.getenv("LOCAL_STATE_DIR", str(Path(__file__).parent))) / ".main_service.lock"
|
||||
lock_file = (
|
||||
Path(os.getenv("LOCAL_STATE_DIR", str(Path(__file__).parent)))
|
||||
/ ".main_service.lock"
|
||||
)
|
||||
lock_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_fd = open(lock_file, 'w')
|
||||
lock_fd = open(lock_file, "w")
|
||||
fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
lock_fd.write(str(os.getpid()))
|
||||
lock_fd.flush()
|
||||
@@ -100,7 +106,9 @@ def validate_configuration() -> None:
|
||||
try:
|
||||
warnings = validate_environment()
|
||||
if warnings:
|
||||
logger.info(f"Configuration validation completed with {len(warnings)} warnings")
|
||||
logger.info(
|
||||
f"Configuration validation completed with {len(warnings)} warnings"
|
||||
)
|
||||
except ConfigurationError as e:
|
||||
logger.error(f"Configuration validation failed: {e}")
|
||||
raise
|
||||
@@ -117,7 +125,7 @@ def check_service_health() -> dict:
|
||||
health_status = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"status": "healthy",
|
||||
"services": {}
|
||||
"services": {},
|
||||
}
|
||||
|
||||
# Check WebDAV
|
||||
@@ -129,15 +137,13 @@ def check_service_health() -> dict:
|
||||
else:
|
||||
health_status["services"]["webdav"] = {"status": "not_configured"}
|
||||
except Exception as e:
|
||||
health_status["services"]["webdav"] = {
|
||||
"status": "unhealthy",
|
||||
"error": str(e)
|
||||
}
|
||||
health_status["services"]["webdav"] = {"status": "unhealthy", "error": str(e)}
|
||||
health_status["status"] = "degraded"
|
||||
|
||||
# Check Telegram
|
||||
try:
|
||||
from services.telegram_service import telegram_service
|
||||
|
||||
if telegram_service.is_configured:
|
||||
health_status["services"]["telegram"] = {"status": "healthy"}
|
||||
else:
|
||||
@@ -145,22 +151,20 @@ def check_service_health() -> dict:
|
||||
except Exception as e:
|
||||
health_status["services"]["telegram"] = {
|
||||
"status": "unavailable",
|
||||
"error": str(e)
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
# Check VRAM manager
|
||||
try:
|
||||
from services.vram_manager import vram_manager
|
||||
|
||||
vram_info = vram_manager.get_vram_info()
|
||||
health_status["services"]["vram"] = {
|
||||
"status": "healthy",
|
||||
"available_gb": vram_info.get("free", 0) / (1024**3)
|
||||
"available_gb": vram_info.get("free", 0) / (1024**3),
|
||||
}
|
||||
except Exception as e:
|
||||
health_status["services"]["vram"] = {
|
||||
"status": "unavailable",
|
||||
"error": str(e)
|
||||
}
|
||||
health_status["services"]["vram"] = {"status": "unavailable", "error": str(e)}
|
||||
|
||||
return health_status
|
||||
|
||||
@@ -189,12 +193,28 @@ def initialize_services() -> None:
|
||||
# Configure Telegram if credentials available
|
||||
if settings.TELEGRAM_TOKEN and settings.TELEGRAM_CHAT_ID:
|
||||
try:
|
||||
telegram_service.configure(settings.TELEGRAM_TOKEN, settings.TELEGRAM_CHAT_ID)
|
||||
telegram_service.configure(
|
||||
settings.TELEGRAM_TOKEN, settings.TELEGRAM_CHAT_ID
|
||||
)
|
||||
telegram_service.send_start_notification()
|
||||
logger.info("Telegram notifications enabled")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to configure Telegram: {e}")
|
||||
|
||||
# Configure Notion if credentials available
|
||||
if settings.has_notion_config:
|
||||
try:
|
||||
from services.notion_service import notion_service
|
||||
|
||||
notion_service.configure(
|
||||
settings.NOTION_API_TOKEN, settings.NOTION_DATABASE_ID
|
||||
)
|
||||
logger.info("✅ Notion integration enabled")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to configure Notion: {e}")
|
||||
else:
|
||||
logger.info("Notion not configured - upload to Notion disabled")
|
||||
|
||||
# Initialize WebDAV if configured
|
||||
if settings.has_webdav_config:
|
||||
try:
|
||||
@@ -233,6 +253,7 @@ def send_error_notification(error_type: str, error_message: str) -> None:
|
||||
"""Send error notification via Telegram"""
|
||||
try:
|
||||
from services.telegram_service import telegram_service
|
||||
|
||||
if telegram_service.is_configured:
|
||||
telegram_service.send_error_notification(error_type, error_message)
|
||||
except Exception as e:
|
||||
@@ -243,15 +264,16 @@ def run_dashboard_thread() -> None:
|
||||
"""Run Flask dashboard in a separate thread"""
|
||||
try:
|
||||
from api.routes import create_app
|
||||
|
||||
app = create_app()
|
||||
|
||||
# Run Flask in production mode with threaded=True
|
||||
app.run(
|
||||
host='0.0.0.0',
|
||||
host="0.0.0.0",
|
||||
port=5000,
|
||||
debug=False,
|
||||
threaded=True,
|
||||
use_reloader=False # Important: disable reloader in thread
|
||||
use_reloader=False, # Important: disable reloader in thread
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Dashboard thread error: {e}")
|
||||
@@ -260,14 +282,12 @@ def run_dashboard_thread() -> None:
|
||||
|
||||
def start_dashboard() -> threading.Thread:
|
||||
"""Start dashboard in a background daemon thread"""
|
||||
dashboard_port = int(os.getenv('DASHBOARD_PORT', '5000'))
|
||||
dashboard_port = int(os.getenv("DASHBOARD_PORT", "5000"))
|
||||
logger.info(f"Starting dashboard on port {dashboard_port}...")
|
||||
|
||||
# Create daemon thread so it doesn't block shutdown
|
||||
dashboard_thread = threading.Thread(
|
||||
target=run_dashboard_thread,
|
||||
name="DashboardThread",
|
||||
daemon=True
|
||||
target=run_dashboard_thread, name="DashboardThread", daemon=True
|
||||
)
|
||||
dashboard_thread.start()
|
||||
logger.info(f"Dashboard thread started (Thread-ID: {dashboard_thread.ident})")
|
||||
@@ -301,9 +321,37 @@ def run_main_loop() -> None:
|
||||
webdav_service.mkdir(settings.REMOTE_PDF_FOLDER)
|
||||
pdf_files = webdav_service.list(settings.REMOTE_PDF_FOLDER)
|
||||
for file_path in pdf_files:
|
||||
if file_path.lower().endswith('.pdf'):
|
||||
if file_path.lower().endswith(".pdf"):
|
||||
if not processed_registry.is_processed(file_path):
|
||||
pdf_processor.process(file_path)
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote
|
||||
from services.telegram_service import telegram_service
|
||||
|
||||
local_filename = unquote(Path(file_path).name)
|
||||
base_name = Path(local_filename).stem
|
||||
local_path = (
|
||||
settings.LOCAL_DOWNLOADS_PATH / local_filename
|
||||
)
|
||||
settings.LOCAL_DOWNLOADS_PATH.mkdir(
|
||||
parents=True, exist_ok=True
|
||||
)
|
||||
|
||||
# Step 1: Notify and download
|
||||
telegram_service.send_message(
|
||||
f"📄 Nuevo PDF detectado: {local_filename}\n"
|
||||
f"⬇️ Descargando..."
|
||||
)
|
||||
logger.info(
|
||||
f"Downloading PDF: {file_path} -> {local_path}"
|
||||
)
|
||||
webdav_service.download(file_path, local_path)
|
||||
|
||||
# Step 2: Process PDF
|
||||
telegram_service.send_message(
|
||||
f"🔍 Procesando PDF con OCR..."
|
||||
)
|
||||
pdf_processor.process(str(local_path))
|
||||
|
||||
processed_registry.save(file_path)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error processing PDFs: {e}")
|
||||
@@ -314,7 +362,10 @@ def run_main_loop() -> None:
|
||||
try:
|
||||
audio_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER)
|
||||
for file_path in audio_files:
|
||||
if any(file_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
|
||||
if any(
|
||||
file_path.lower().endswith(ext)
|
||||
for ext in settings.AUDIO_EXTENSIONS
|
||||
):
|
||||
if not processed_registry.is_processed(file_path):
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote
|
||||
@@ -323,36 +374,55 @@ def run_main_loop() -> None:
|
||||
|
||||
local_filename = unquote(Path(file_path).name)
|
||||
base_name = Path(local_filename).stem
|
||||
local_path = settings.LOCAL_DOWNLOADS_PATH / local_filename
|
||||
settings.LOCAL_DOWNLOADS_PATH.mkdir(parents=True, exist_ok=True)
|
||||
local_path = (
|
||||
settings.LOCAL_DOWNLOADS_PATH / local_filename
|
||||
)
|
||||
settings.LOCAL_DOWNLOADS_PATH.mkdir(
|
||||
parents=True, exist_ok=True
|
||||
)
|
||||
|
||||
# Step 1: Notify and download
|
||||
telegram_service.send_message(
|
||||
f"🎵 Nuevo audio detectado: {local_filename}\n"
|
||||
f"⬇️ Descargando..."
|
||||
)
|
||||
logger.info(f"Downloading audio: {file_path} -> {local_path}")
|
||||
logger.info(
|
||||
f"Downloading audio: {file_path} -> {local_path}"
|
||||
)
|
||||
webdav_service.download(file_path, local_path)
|
||||
|
||||
# Step 2: Transcribe
|
||||
telegram_service.send_message(f"📝 Transcribiendo audio con Whisper...")
|
||||
telegram_service.send_message(
|
||||
f"📝 Transcribiendo audio con Whisper..."
|
||||
)
|
||||
result = audio_processor.process(str(local_path))
|
||||
|
||||
if result.get("success") and result.get("transcription_path"):
|
||||
transcription_file = Path(result["transcription_path"])
|
||||
if result.get("success") and result.get(
|
||||
"transcription_path"
|
||||
):
|
||||
transcription_file = Path(
|
||||
result["transcription_path"]
|
||||
)
|
||||
transcription_text = result.get("text", "")
|
||||
|
||||
# Step 3: Generate AI summary and documents
|
||||
telegram_service.send_message(f"🤖 Generando resumen con IA...")
|
||||
telegram_service.send_message(
|
||||
f"🤖 Generando resumen con IA..."
|
||||
)
|
||||
doc_generator = DocumentGenerator()
|
||||
success, summary, output_files = doc_generator.generate_summary(
|
||||
success, summary, output_files = (
|
||||
doc_generator.generate_summary(
|
||||
transcription_text, base_name
|
||||
)
|
||||
)
|
||||
|
||||
# Step 4: Upload all files to Nextcloud
|
||||
if success and output_files:
|
||||
# Create folders
|
||||
for folder in [settings.RESUMENES_FOLDER, settings.DOCX_FOLDER]:
|
||||
for folder in [
|
||||
settings.RESUMENES_FOLDER,
|
||||
settings.DOCX_FOLDER,
|
||||
]:
|
||||
try:
|
||||
webdav_service.makedirs(folder)
|
||||
except Exception:
|
||||
@@ -361,25 +431,35 @@ def run_main_loop() -> None:
|
||||
# Upload transcription TXT
|
||||
if transcription_file.exists():
|
||||
remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
|
||||
webdav_service.upload(transcription_file, remote_txt)
|
||||
webdav_service.upload(
|
||||
transcription_file, remote_txt
|
||||
)
|
||||
logger.info(f"Uploaded: {remote_txt}")
|
||||
|
||||
# Upload DOCX
|
||||
docx_path = Path(output_files.get('docx_path', ''))
|
||||
docx_path = Path(
|
||||
output_files.get("docx_path", "")
|
||||
)
|
||||
if docx_path.exists():
|
||||
remote_docx = f"{settings.DOCX_FOLDER}/{docx_path.name}"
|
||||
webdav_service.upload(docx_path, remote_docx)
|
||||
webdav_service.upload(
|
||||
docx_path, remote_docx
|
||||
)
|
||||
logger.info(f"Uploaded: {remote_docx}")
|
||||
|
||||
# Upload PDF
|
||||
pdf_path = Path(output_files.get('pdf_path', ''))
|
||||
pdf_path = Path(
|
||||
output_files.get("pdf_path", "")
|
||||
)
|
||||
if pdf_path.exists():
|
||||
remote_pdf = f"{settings.DOCX_FOLDER}/{pdf_path.name}"
|
||||
webdav_service.upload(pdf_path, remote_pdf)
|
||||
logger.info(f"Uploaded: {remote_pdf}")
|
||||
|
||||
# Upload Markdown
|
||||
md_path = Path(output_files.get('markdown_path', ''))
|
||||
md_path = Path(
|
||||
output_files.get("markdown_path", "")
|
||||
)
|
||||
if md_path.exists():
|
||||
remote_md = f"{settings.RESUMENES_FOLDER}/{md_path.name}"
|
||||
webdav_service.upload(md_path, remote_md)
|
||||
@@ -396,11 +476,15 @@ def run_main_loop() -> None:
|
||||
# Just upload transcription if summary failed
|
||||
if transcription_file.exists():
|
||||
try:
|
||||
webdav_service.makedirs(settings.RESUMENES_FOLDER)
|
||||
webdav_service.makedirs(
|
||||
settings.RESUMENES_FOLDER
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
|
||||
webdav_service.upload(transcription_file, remote_txt)
|
||||
webdav_service.upload(
|
||||
transcription_file, remote_txt
|
||||
)
|
||||
telegram_service.send_message(
|
||||
f"⚠️ Resumen fallido, solo transcripción subida:\n{transcription_file.name}"
|
||||
)
|
||||
@@ -415,7 +499,10 @@ def run_main_loop() -> None:
|
||||
try:
|
||||
text_files = webdav_service.list(settings.REMOTE_TXT_FOLDER)
|
||||
for file_path in text_files:
|
||||
if any(file_path.lower().endswith(ext) for ext in settings.TXT_EXTENSIONS):
|
||||
if any(
|
||||
file_path.lower().endswith(ext)
|
||||
for ext in settings.TXT_EXTENSIONS
|
||||
):
|
||||
if not processed_registry.is_processed(file_path):
|
||||
text_processor.process(file_path)
|
||||
processed_registry.save(file_path)
|
||||
@@ -443,7 +530,7 @@ def run_main_loop() -> None:
|
||||
)
|
||||
send_error_notification(
|
||||
"consecutive_errors",
|
||||
f"Service has failed {consecutive_errors} consecutive times"
|
||||
f"Service has failed {consecutive_errors} consecutive times",
|
||||
)
|
||||
|
||||
# Don't exit, let the loop continue with backoff
|
||||
@@ -462,7 +549,9 @@ def main():
|
||||
try:
|
||||
logger.info("=== CBCFacil Service Started ===")
|
||||
logger.info(f"Version: {os.getenv('APP_VERSION', '8.0')}")
|
||||
logger.info(f"Environment: {'production' if os.getenv('DEBUG', 'false').lower() != 'true' else 'development'}")
|
||||
logger.info(
|
||||
f"Environment: {'production' if os.getenv('DEBUG', 'false').lower() != 'true' else 'development'}"
|
||||
)
|
||||
|
||||
lock_fd = acquire_lock()
|
||||
initialize_services()
|
||||
@@ -491,12 +580,15 @@ if __name__ == "__main__":
|
||||
command = sys.argv[1]
|
||||
if command == "whisper" and len(sys.argv) == 4:
|
||||
from processors.audio_processor import AudioProcessor
|
||||
|
||||
AudioProcessor().process(sys.argv[2])
|
||||
elif command == "pdf" and len(sys.argv) == 4:
|
||||
from processors.pdf_processor import PDFProcessor
|
||||
|
||||
PDFProcessor().process(sys.argv[2])
|
||||
elif command == "health":
|
||||
from main import check_service_health
|
||||
|
||||
health = check_service_health()
|
||||
print(json.dumps(health, indent=2))
|
||||
else:
|
||||
|
||||
10
restart_service.sh
Executable file
10
restart_service.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
# Detener servicio existente
|
||||
pkill -f "python main.py"
|
||||
sleep 2
|
||||
|
||||
# Reiniciar con log visible
|
||||
cd /home/ren/proyectos/cbc
|
||||
source .venv/bin/activate
|
||||
python main.py >> main.log 2>&1 &
|
||||
echo "Servicio reiniciado. Ver logs con: tail -f main.log"
|
||||
353
services/notion_service.py
Normal file
353
services/notion_service.py
Normal file
@@ -0,0 +1,353 @@
|
||||
"""
|
||||
Notion integration service with official SDK
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Optional, Dict, Any, List
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
try:
|
||||
from notion_client import Client
|
||||
from notion_client.errors import APIResponseError
|
||||
|
||||
NOTION_AVAILABLE = True
|
||||
except ImportError:
|
||||
NOTION_AVAILABLE = False
|
||||
Client = None
|
||||
APIResponseError = Exception
|
||||
|
||||
from config import settings
|
||||
|
||||
|
||||
class NotionService:
|
||||
"""Enhanced Notion API integration service"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._client: Optional[Client] = None
|
||||
self._database_id: Optional[str] = None
|
||||
|
||||
def configure(self, token: str, database_id: str) -> None:
|
||||
"""Configure Notion with official SDK"""
|
||||
if not NOTION_AVAILABLE:
|
||||
self.logger.error(
|
||||
"notion-client not installed. Install with: pip install notion-client"
|
||||
)
|
||||
return
|
||||
|
||||
self._client = Client(auth=token)
|
||||
self._database_id = database_id
|
||||
self.logger.info("Notion service configured with official SDK")
|
||||
|
||||
@property
|
||||
def is_configured(self) -> bool:
|
||||
"""Check if Notion is configured"""
|
||||
return bool(self._client and self._database_id and NOTION_AVAILABLE)
|
||||
|
||||
def _rate_limited_request(self, func, *args, **kwargs):
|
||||
"""Execute request with rate limiting and retry"""
|
||||
max_retries = 3
|
||||
base_delay = 1
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except APIResponseError as e:
|
||||
if hasattr(e, "code") and e.code == "rate_limited":
|
||||
delay = base_delay * (2**attempt)
|
||||
self.logger.warning(f"Rate limited by Notion, waiting {delay}s")
|
||||
time.sleep(delay)
|
||||
else:
|
||||
raise
|
||||
|
||||
raise Exception("Max retries exceeded for Notion API")
|
||||
|
||||
def create_page_with_summary(
|
||||
self, title: str, summary: str, metadata: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
"""Create a new page in Notion (database or parent page) with summary content"""
|
||||
if not self.is_configured:
|
||||
self.logger.warning("Notion not configured, skipping upload")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Determinar si es database o página padre
|
||||
use_as_page = metadata.get("use_as_page", False)
|
||||
|
||||
if use_as_page:
|
||||
# Crear página dentro de otra página
|
||||
page = self._rate_limited_request(
|
||||
self._client.pages.create,
|
||||
parent={"page_id": self._database_id},
|
||||
properties={"title": [{"text": {"content": title[:100]}}]},
|
||||
)
|
||||
else:
|
||||
# Crear página en database (método original)
|
||||
properties = {"Name": {"title": [{"text": {"content": title[:100]}}]}}
|
||||
|
||||
# Agregar status si la DB lo soporta
|
||||
if metadata.get("add_status", True):
|
||||
properties["Status"] = {"select": {"name": "Procesado"}}
|
||||
|
||||
# Agregar tipo de archivo si está disponible Y add_status está habilitado
|
||||
if metadata.get("add_status", False) and metadata.get("file_type"):
|
||||
properties["Tipo"] = {
|
||||
"select": {" name": metadata["file_type"].upper()}
|
||||
}
|
||||
|
||||
page = self._rate_limited_request(
|
||||
self._client.pages.create,
|
||||
parent={"database_id": self._database_id},
|
||||
properties=properties,
|
||||
)
|
||||
|
||||
page_id = page["id"]
|
||||
self.logger.info(f"✅ Notion page created: {page_id}")
|
||||
|
||||
# Agregar contenido del resumen como bloques
|
||||
self._add_summary_content(page_id, summary, metadata.get("pdf_path"))
|
||||
|
||||
return page_id
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ Error creating Notion page: {e}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Preparar properties de la página
|
||||
properties = {
|
||||
"Name": {
|
||||
"title": [
|
||||
{
|
||||
"text": {
|
||||
"content": title[:100] # Notion limit
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# Agregar status si la DB lo soporta
|
||||
if metadata.get("add_status", True):
|
||||
properties["Status"] = {"select": {"name": "Procesado"}}
|
||||
|
||||
# Agregar tipo de archivo si está disponible
|
||||
if metadata.get("file_type"):
|
||||
properties["Tipo"] = {"select": {"name": metadata["file_type"].upper()}}
|
||||
|
||||
# Crear página
|
||||
page = self._rate_limited_request(
|
||||
self._client.pages.create,
|
||||
parent={"database_id": self._database_id},
|
||||
properties=properties,
|
||||
)
|
||||
|
||||
page_id = page["id"]
|
||||
self.logger.info(f"✅ Notion page created: {page_id}")
|
||||
|
||||
# Agregar contenido del resumen como bloques
|
||||
self._add_summary_content(page_id, summary, metadata.get("pdf_path"))
|
||||
|
||||
return page_id
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ Error creating Notion page: {e}")
|
||||
return None
|
||||
|
||||
def _add_summary_content(
|
||||
self, page_id: str, summary: str, pdf_path: Optional[Path] = None
|
||||
) -> bool:
|
||||
"""Add summary content as Notion blocks"""
|
||||
try:
|
||||
blocks = []
|
||||
|
||||
# Agregar nota sobre el PDF si existe
|
||||
if pdf_path and pdf_path.exists():
|
||||
blocks.append(
|
||||
{
|
||||
"object": "block",
|
||||
"type": "callout",
|
||||
"callout": {
|
||||
"rich_text": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": {
|
||||
"content": f"📄 Documento generado automáticamente: {pdf_path.name}"
|
||||
},
|
||||
}
|
||||
],
|
||||
"icon": {"emoji": "📄"},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Agregar bloques del resumen
|
||||
summary_blocks = self._parse_markdown_to_blocks(summary)
|
||||
blocks.extend(summary_blocks)
|
||||
|
||||
# Agregar footer
|
||||
blocks.append({"object": "block", "type": "divider", "divider": {}})
|
||||
blocks.append(
|
||||
{
|
||||
"object": "block",
|
||||
"type": "paragraph",
|
||||
"paragraph": {
|
||||
"rich_text": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": {
|
||||
"content": f"Generado por CBCFacil el {datetime.now().strftime('%d/%m/%Y %H:%M')}"
|
||||
},
|
||||
"annotations": {"italic": True, "color": "gray"},
|
||||
}
|
||||
]
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
# Notion API limita a 100 bloques por request
|
||||
if blocks:
|
||||
for i in range(0, len(blocks), 100):
|
||||
batch = blocks[i : i + 100]
|
||||
self._rate_limited_request(
|
||||
self._client.blocks.children.append,
|
||||
block_id=page_id,
|
||||
children=batch,
|
||||
)
|
||||
self.logger.info(f"✅ Added {len(blocks)} blocks to Notion page")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"❌ Error adding content blocks: {e}")
|
||||
return False
|
||||
|
||||
def _parse_markdown_to_blocks(self, markdown: str) -> List[Dict]:
|
||||
"""Convert markdown to Notion blocks"""
|
||||
blocks = []
|
||||
lines = markdown.split("\n")
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Headings
|
||||
if line.startswith("# "):
|
||||
text = line[2:].strip()[:2000]
|
||||
if text:
|
||||
blocks.append(
|
||||
{
|
||||
"object": "block",
|
||||
"type": "heading_1",
|
||||
"heading_1": {
|
||||
"rich_text": [
|
||||
{"type": "text", "text": {"content": text}}
|
||||
]
|
||||
},
|
||||
}
|
||||
)
|
||||
elif line.startswith("## "):
|
||||
text = line[3:].strip()[:2000]
|
||||
if text:
|
||||
blocks.append(
|
||||
{
|
||||
"object": "block",
|
||||
"type": "heading_2",
|
||||
"heading_2": {
|
||||
"rich_text": [
|
||||
{"type": "text", "text": {"content": text}}
|
||||
]
|
||||
},
|
||||
}
|
||||
)
|
||||
elif line.startswith("### "):
|
||||
text = line[4:].strip()[:2000]
|
||||
if text:
|
||||
blocks.append(
|
||||
{
|
||||
"object": "block",
|
||||
"type": "heading_3",
|
||||
"heading_3": {
|
||||
"rich_text": [
|
||||
{"type": "text", "text": {"content": text}}
|
||||
]
|
||||
},
|
||||
}
|
||||
)
|
||||
# Bullet points
|
||||
elif line.startswith("- ") or line.startswith("* "):
|
||||
text = line[2:].strip()[:2000]
|
||||
if text:
|
||||
blocks.append(
|
||||
{
|
||||
"object": "block",
|
||||
"type": "bulleted_list_item",
|
||||
"bulleted_list_item": {
|
||||
"rich_text": [
|
||||
{"type": "text", "text": {"content": text}}
|
||||
]
|
||||
},
|
||||
}
|
||||
)
|
||||
# Divider
|
||||
elif line.strip() == "---":
|
||||
blocks.append({"object": "block", "type": "divider", "divider": {}})
|
||||
# Paragraph (skip footer lines)
|
||||
elif not line.startswith("*Generado por"):
|
||||
text = line[:2000]
|
||||
if text:
|
||||
blocks.append(
|
||||
{
|
||||
"object": "block",
|
||||
"type": "paragraph",
|
||||
"paragraph": {
|
||||
"rich_text": [
|
||||
{"type": "text", "text": {"content": text}}
|
||||
]
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
return blocks
|
||||
|
||||
def upload_pdf_legacy(self, pdf_path: Path, title: str) -> bool:
|
||||
"""Legacy method - creates simple page (backward compatibility)"""
|
||||
if not self.is_configured:
|
||||
self.logger.warning("Notion not configured, skipping upload")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Crear página simple
|
||||
page_id = self.create_page_with_summary(
|
||||
title=title,
|
||||
summary=f"Documento procesado: {title}",
|
||||
metadata={"file_type": "PDF", "pdf_path": pdf_path},
|
||||
)
|
||||
|
||||
return bool(page_id)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error uploading PDF to Notion: {e}")
|
||||
return False
|
||||
|
||||
# Alias para backward compatibility
|
||||
def upload_pdf(self, pdf_path: Path, title: str) -> bool:
|
||||
"""Upload PDF info to Notion (alias for backward compatibility)"""
|
||||
return self.upload_pdf_legacy(pdf_path, title)
|
||||
|
||||
def upload_pdf_as_file(self, pdf_path: Path, title: str) -> bool:
|
||||
"""Upload PDF info as file (alias for backward compatibility)"""
|
||||
return self.upload_pdf_legacy(pdf_path, title)
|
||||
|
||||
|
||||
# Global instance
|
||||
notion_service = NotionService()
|
||||
|
||||
|
||||
def upload_to_notion(pdf_path: Path, title: str) -> bool:
|
||||
"""Legacy function for backward compatibility"""
|
||||
return notion_service.upload_pdf(pdf_path, title)
|
||||
203
services/notion_service_old.py
Normal file
203
services/notion_service_old.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""
|
||||
Notion integration service
|
||||
"""
|
||||
import logging
|
||||
import base64
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import requests
|
||||
REQUESTS_AVAILABLE = True
|
||||
except ImportError:
|
||||
REQUESTS_AVAILABLE = False
|
||||
requests = None
|
||||
|
||||
from config import settings
|
||||
|
||||
|
||||
class NotionService:
|
||||
"""Service for Notion API integration"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._token: Optional[str] = None
|
||||
self._database_id: Optional[str] = None
|
||||
self._base_url = "https://api.notion.com/v1"
|
||||
|
||||
def configure(self, token: str, database_id: str) -> None:
|
||||
"""Configure Notion credentials"""
|
||||
self._token = token
|
||||
self._database_id = database_id
|
||||
self.logger.info("Notion service configured")
|
||||
|
||||
@property
|
||||
def is_configured(self) -> bool:
|
||||
"""Check if Notion is configured"""
|
||||
return bool(self._token and self._database_id)
|
||||
|
||||
def _get_headers(self) -> dict:
|
||||
"""Get headers for Notion API requests"""
|
||||
return {
|
||||
"Authorization": f"Bearer {self._token}",
|
||||
"Content-Type": "application/json",
|
||||
"Notion-Version": "2022-06-28"
|
||||
}
|
||||
|
||||
def upload_pdf(self, pdf_path: Path, title: str) -> bool:
|
||||
"""Upload PDF to Notion database"""
|
||||
if not self.is_configured:
|
||||
self.logger.warning("Notion not configured, skipping upload")
|
||||
return False
|
||||
|
||||
if not REQUESTS_AVAILABLE:
|
||||
self.logger.error("requests library not available for Notion upload")
|
||||
return False
|
||||
|
||||
if not pdf_path.exists():
|
||||
self.logger.error(f"PDF file not found: {pdf_path}")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Read and encode PDF
|
||||
with open(pdf_path, 'rb') as f:
|
||||
pdf_data = base64.b64encode(f.read()).decode('utf-8')
|
||||
|
||||
# Prepare the page data
|
||||
page_data = {
|
||||
"parent": {"database_id": self._database_id},
|
||||
"properties": {
|
||||
"Name": {
|
||||
"title": [
|
||||
{
|
||||
"text": {
|
||||
"content": title
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"Status": {
|
||||
"select": {
|
||||
"name": "Procesado"
|
||||
}
|
||||
}
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"object": "block",
|
||||
"type": "paragraph",
|
||||
"paragraph": {
|
||||
"rich_text": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": {
|
||||
"content": f"Documento generado automáticamente: {title}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"object": "block",
|
||||
"type": "file",
|
||||
"file": {
|
||||
"type": "external",
|
||||
"external": {
|
||||
"url": f"data:application/pdf;base64,{pdf_data}"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Create page in database
|
||||
response = requests.post(
|
||||
f"{self._base_url}/pages",
|
||||
headers=self._get_headers(),
|
||||
json=page_data,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
self.logger.info(f"PDF uploaded to Notion successfully: {title}")
|
||||
return True
|
||||
else:
|
||||
self.logger.error(f"Notion API error: {response.status_code} - {response.text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error uploading PDF to Notion: {e}")
|
||||
return False
|
||||
|
||||
def upload_pdf_as_file(self, pdf_path: Path, title: str) -> bool:
|
||||
"""Upload PDF as a file block (alternative method)"""
|
||||
if not self.is_configured:
|
||||
self.logger.warning("Notion not configured, skipping upload")
|
||||
return False
|
||||
|
||||
if not REQUESTS_AVAILABLE:
|
||||
self.logger.error("requests library not available for Notion upload")
|
||||
return False
|
||||
|
||||
if not pdf_path.exists():
|
||||
self.logger.error(f"PDF file not found: {pdf_path}")
|
||||
return False
|
||||
|
||||
try:
|
||||
# For simplicity, we'll create a page with just the title and a link placeholder
|
||||
# In a real implementation, you'd need to upload the file to Notion's file storage
|
||||
page_data = {
|
||||
"parent": {"database_id": self._database_id},
|
||||
"properties": {
|
||||
"Name": {
|
||||
"title": [
|
||||
{
|
||||
"text": {
|
||||
"content": title
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"Status": {
|
||||
"select": {
|
||||
"name": "Procesado"
|
||||
}
|
||||
},
|
||||
"File Path": {
|
||||
"rich_text": [
|
||||
{
|
||||
"text": {
|
||||
"content": str(pdf_path)
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
f"{self._base_url}/pages",
|
||||
headers=self._get_headers(),
|
||||
json=page_data,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
self.logger.info(f"PDF uploaded to Notion successfully: {title}")
|
||||
return True
|
||||
else:
|
||||
self.logger.error(f"Notion API error: {response.status_code} - {response.text}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error uploading PDF to Notion: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# Global instance
|
||||
notion_service = NotionService()
|
||||
|
||||
|
||||
def upload_to_notion(pdf_path: Path, title: str) -> bool:
|
||||
"""Legacy function for backward compatibility"""
|
||||
return notion_service.upload_pdf(pdf_path, title)
|
||||
95
verify_notion_permissions.py
Normal file
95
verify_notion_permissions.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script para verificar y configurar permisos de Notion
|
||||
"""
|
||||
|
||||
import sys
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from config import settings
|
||||
from notion_client import Client
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def main():
|
||||
print("\n" + "=" * 60)
|
||||
print("🔧 VERIFICACIÓN DE PERMISOS DE NOTION")
|
||||
print("=" * 60 + "\n")
|
||||
|
||||
# Configuración
|
||||
token = settings.NOTION_API_TOKEN
|
||||
database_id = settings.NOTION_DATABASE_ID
|
||||
|
||||
if not token or not database_id:
|
||||
print("❌ Falta configuración de Notion en .env")
|
||||
print(f" NOTION_API: {'✅' if token else '❌'}")
|
||||
print(f" NOTION_DATABASE_ID: {'✅' if database_id else '❌'}")
|
||||
return
|
||||
|
||||
print(f"✅ Token configurado: {token[:20]}...")
|
||||
print(f"✅ Database ID: {database_id}\n")
|
||||
|
||||
# Crear cliente
|
||||
client = Client(auth=token)
|
||||
|
||||
print("📋 PASOS PARA CONFIGURAR LOS PERMISOS:\n")
|
||||
print("1. Abre Notion y ve a tu base de datos 'CBC'")
|
||||
print(f" URL: https://www.notion.so/{database_id}")
|
||||
print("\n2. Click en los 3 puntos (⋯) en la esquina superior derecha")
|
||||
print("\n3. Selecciona 'Connections' o 'Añadir conexiones'")
|
||||
print("\n4. Busca tu integración y actívala")
|
||||
print(f" (Debería aparecer con el nombre que le pusiste)")
|
||||
print("\n5. Confirma los permisos\n")
|
||||
|
||||
print("-" * 60)
|
||||
print("\n🧪 Intentando conectar con Notion...\n")
|
||||
|
||||
try:
|
||||
# Intentar obtener la base de datos
|
||||
database = client.databases.retrieve(database_id=database_id)
|
||||
|
||||
print("✅ ¡ÉXITO! La integración puede acceder a la base de datos")
|
||||
print(f"\n📊 Información de la base de datos:")
|
||||
print(
|
||||
f" Título: {database['title'][0]['plain_text'] if database.get('title') else 'Sin título'}"
|
||||
)
|
||||
print(f" ID: {database['id']}")
|
||||
print(f"\n Propiedades disponibles:")
|
||||
|
||||
for prop_name, prop_data in database.get("properties", {}).items():
|
||||
prop_type = prop_data.get("type", "unknown")
|
||||
print(f" - {prop_name}: {prop_type}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("✅ TODO CONFIGURADO CORRECTAMENTE")
|
||||
print("=" * 60 + "\n")
|
||||
|
||||
print("🚀 Ahora ejecuta: python test_notion_integration.py")
|
||||
print(" para probar subir un documento\n")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
|
||||
print("❌ ERROR AL CONECTAR CON NOTION\n")
|
||||
print(f"Error: {error_msg}\n")
|
||||
|
||||
if "Could not find database" in error_msg:
|
||||
print("⚠️ LA BASE DE DATOS NO ESTÁ COMPARTIDA CON TU INTEGRACIÓN")
|
||||
print("\nSigue los pasos arriba para compartir la base de datos.")
|
||||
elif "Unauthorized" in error_msg or "401" in error_msg:
|
||||
print("⚠️ EL TOKEN DE API ES INVÁLIDO")
|
||||
print("\nVerifica que el token esté correcto en .env")
|
||||
else:
|
||||
print("⚠️ ERROR DESCONOCIDO")
|
||||
print(f"\nDetalles: {error_msg}")
|
||||
|
||||
print("\n" + "=" * 60 + "\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user