Files
cbc2027/config/settings.py
renato97 6058dc642e feat: Integración automática con Notion + análisis completo del código
- Instalado notion-client SDK oficial para integración robusta
- Refactorizado services/notion_service.py con SDK oficial de Notion
  - Rate limiting con retry y exponential backoff
  - Parser Markdown → Notion blocks (headings, bullets, paragraphs)
  - Soporte para pages y databases
  - Manejo robusto de errores

- Integración automática en document/generators.py
  - PDFs se suben automáticamente a Notion después de generarse
  - Contenido completo del resumen formateado con bloques
  - Metadata rica (tipo de archivo, path, fecha)

- Configuración de Notion en main.py
  - Inicialización automática al arrancar el servicio
  - Validación de credenciales

- Actualizado config/settings.py
  - Agregado load_dotenv() para cargar variables de .env
  - Configuración de Notion (NOTION_API, NOTION_DATABASE_ID)

- Scripts de utilidad creados:
  - test_notion_integration.py: Test de subida a Notion
  - test_pipeline_notion.py: Test del pipeline completo
  - verify_notion_permissions.py: Verificación de permisos
  - list_notion_pages.py: Listar páginas accesibles
  - diagnose_notion.py: Diagnóstico completo
  - create_notion_database.py: Crear database automáticamente
  - restart_service.sh: Script de reinicio del servicio

- Documentación completa en opus.md:
  - Análisis exhaustivo del codebase (42 archivos Python)
  - Bugs críticos identificados y soluciones
  - Mejoras de seguridad (autenticación, rate limiting, CORS, CSP)
  - Optimizaciones de rendimiento (Celery, Redis, PostgreSQL, WebSockets)
  - Plan de testing (estructura, ejemplos, 80% coverage goal)
  - Roadmap de implementación (6 sprints detallados)
  - Integración avanzada con Notion documentada

Estado: Notion funcionando correctamente, PDFs se suben automáticamente
2026-01-26 17:31:17 +00:00

255 lines
8.8 KiB
Python

"""
Centralized configuration management for CBCFacil
"""
import os
from pathlib import Path
from typing import Optional, Set, Union
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
class ConfigurationError(Exception):
"""Raised when configuration is invalid"""
pass
class Settings:
"""Application settings loaded from environment variables"""
# Application
APP_NAME: str = "CBCFacil"
APP_VERSION: str = "8.0"
DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
# Nextcloud/WebDAV Configuration
NEXTCLOUD_URL: str = os.getenv("NEXTCLOUD_URL", "")
NEXTCLOUD_USER: str = os.getenv("NEXTCLOUD_USER", "")
NEXTCLOUD_PASSWORD: str = os.getenv("NEXTCLOUD_PASSWORD", "")
WEBDAV_ENDPOINT: str = NEXTCLOUD_URL
# Remote folders
REMOTE_AUDIOS_FOLDER: str = "Audios"
REMOTE_DOCX_AUDIO_FOLDER: str = "Documentos"
REMOTE_PDF_FOLDER: str = "Pdf"
REMOTE_TXT_FOLDER: str = "Textos"
RESUMENES_FOLDER: str = "Resumenes"
DOCX_FOLDER: str = "Documentos"
# Local paths
BASE_DIR: Path = Path(__file__).resolve().parent.parent
LOCAL_STATE_DIR: str = os.getenv("LOCAL_STATE_DIR", str(BASE_DIR))
LOCAL_DOWNLOADS_PATH: Path = BASE_DIR / "downloads"
LOCAL_RESUMENES: Path = LOCAL_DOWNLOADS_PATH
LOCAL_DOCX: Path = BASE_DIR / "resumenes_docx"
# Processing
POLL_INTERVAL: int = int(os.getenv("POLL_INTERVAL", "5"))
HTTP_TIMEOUT: int = int(os.getenv("HTTP_TIMEOUT", "30"))
WEBDAV_MAX_RETRIES: int = int(os.getenv("WEBDAV_MAX_RETRIES", "3"))
DOWNLOAD_CHUNK_SIZE: int = int(
os.getenv("DOWNLOAD_CHUNK_SIZE", "65536")
) # 64KB for better performance
MAX_FILENAME_LENGTH: int = int(os.getenv("MAX_FILENAME_LENGTH", "80"))
MAX_FILENAME_BASE_LENGTH: int = int(os.getenv("MAX_FILENAME_BASE_LENGTH", "40"))
MAX_FILENAME_TOPICS_LENGTH: int = int(os.getenv("MAX_FILENAME_TOPICS_LENGTH", "20"))
# File extensions
AUDIO_EXTENSIONS: Set[str] = {".mp3", ".wav", ".m4a", ".ogg", ".aac"}
PDF_EXTENSIONS: Set[str] = {".pdf"}
TXT_EXTENSIONS: Set[str] = {".txt"}
# AI Providers
ZAI_BASE_URL: str = os.getenv("ZAI_BASE_URL", "https://api.z.ai/api/anthropic")
ZAI_DEFAULT_MODEL: str = os.getenv("ZAI_MODEL", "glm-4.6")
ZAI_AUTH_TOKEN: Optional[str] = os.getenv("ANTHROPIC_AUTH_TOKEN") or os.getenv(
"ZAI_AUTH_TOKEN", ""
)
# Notion Integration
NOTION_API_TOKEN: Optional[str] = os.getenv("NOTION_API")
NOTION_DATABASE_ID: Optional[str] = os.getenv("NOTION_DATABASE_ID")
# Gemini
GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
GEMINI_FLASH_MODEL: str = os.getenv("GEMINI_FLASH_MODEL", "gemini-2.5-flash")
GEMINI_PRO_MODEL: str = os.getenv("GEMINI_PRO_MODEL", "gemini-1.5-pro")
# CLI paths
GEMINI_CLI_PATH: Optional[str] = os.getenv("GEMINI_CLI_PATH")
CLAUDE_CLI_PATH: Optional[str] = os.getenv("CLAUDE_CLI_PATH")
# Telegram
TELEGRAM_TOKEN: Optional[str] = os.getenv("TELEGRAM_TOKEN")
TELEGRAM_CHAT_ID: Optional[str] = os.getenv("TELEGRAM_CHAT_ID")
# PDF Processing Configuration
CPU_COUNT: int = os.cpu_count() or 1
PDF_MAX_PAGES_PER_CHUNK: int = int(os.getenv("PDF_MAX_PAGES_PER_CHUNK", "2"))
PDF_DPI: int = int(os.getenv("PDF_DPI", "200"))
PDF_RENDER_THREAD_COUNT: int = int(
os.getenv("PDF_RENDER_THREAD_COUNT", str(min(4, CPU_COUNT)))
)
PDF_BATCH_SIZE: int = int(os.getenv("PDF_BATCH_SIZE", "2"))
PDF_TROCR_MAX_BATCH: int = int(
os.getenv("PDF_TROCR_MAX_BATCH", str(PDF_BATCH_SIZE))
)
PDF_TESSERACT_THREADS: int = int(
os.getenv("PDF_TESSERACT_THREADS", str(max(1, min(2, max(1, CPU_COUNT // 3)))))
)
PDF_PREPROCESS_THREADS: int = int(
os.getenv("PDF_PREPROCESS_THREADS", str(PDF_TESSERACT_THREADS))
)
PDF_TEXT_DETECTION_MIN_RATIO: float = float(
os.getenv("PDF_TEXT_DETECTION_MIN_RATIO", "0.6")
)
PDF_TEXT_DETECTION_MIN_AVG_CHARS: int = int(
os.getenv("PDF_TEXT_DETECTION_MIN_AVG_CHARS", "120")
)
# Error handling
ERROR_THROTTLE_SECONDS: int = int(os.getenv("ERROR_THROTTLE_SECONDS", "600"))
# GPU/VRAM Management
MODEL_TIMEOUT_SECONDS: int = int(os.getenv("MODEL_TIMEOUT_SECONDS", "300"))
CUDA_VISIBLE_DEVICES: str = os.getenv("CUDA_VISIBLE_DEVICES", "all")
PYTORCH_CUDA_ALLOC_CONF: str = os.getenv(
"PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:512"
)
# GPU Detection (auto, nvidia, amd, cpu)
GPU_PREFERENCE: str = os.getenv("GPU_PREFERENCE", "auto")
# AMD ROCm HSA override for RX 6000 series (gfx1030)
HSA_OVERRIDE_GFX_VERSION: str = os.getenv("HSA_OVERRIDE_GFX_VERSION", "10.3.0")
# Dashboard
DASHBOARD_SECRET_KEY: str = os.getenv("DASHBOARD_SECRET_KEY", "")
DASHBOARD_PORT: int = int(os.getenv("DASHBOARD_PORT", "5000"))
DASHBOARD_HOST: str = os.getenv("DASHBOARD_HOST", "0.0.0.0")
# Logging
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
LOG_FILE: Optional[str] = os.getenv("LOG_FILE")
# Threading optimization
OMP_NUM_THREADS: int = int(os.getenv("OMP_NUM_THREADS", "4"))
MKL_NUM_THREADS: int = int(os.getenv("MKL_NUM_THREADS", "4"))
# ========================================================================
# PROPERTIES WITH VALIDATION
# ========================================================================
@property
def is_production(self) -> bool:
"""Check if running in production mode"""
return not self.DEBUG
@property
def has_webdav_config(self) -> bool:
"""Check if WebDAV credentials are configured"""
return all([self.NEXTCLOUD_URL, self.NEXTCLOUD_USER, self.NEXTCLOUD_PASSWORD])
@property
def has_ai_config(self) -> bool:
"""Check if AI providers are configured"""
return any(
[
self.ZAI_AUTH_TOKEN,
self.GEMINI_API_KEY,
self.CLAUDE_CLI_PATH,
self.GEMINI_CLI_PATH,
]
)
@property
def has_notion_config(self) -> bool:
"""Check if Notion is configured"""
return bool(self.NOTION_API_TOKEN and self.NOTION_DATABASE_ID)
@property
def processed_files_path(self) -> Path:
"""Get the path to the processed files registry"""
return Path(
os.getenv(
"PROCESSED_FILES_PATH",
str(Path(self.LOCAL_STATE_DIR) / "processed_files.txt"),
)
)
@property
def nextcloud_url(self) -> str:
"""Get Nextcloud URL with validation"""
if not self.NEXTCLOUD_URL and self.is_production:
raise ConfigurationError("NEXTCLOUD_URL is required in production mode")
return self.NEXTCLOUD_URL
@property
def nextcloud_user(self) -> str:
"""Get Nextcloud username with validation"""
if not self.NEXTCLOUD_USER and self.is_production:
raise ConfigurationError("NEXTCLOUD_USER is required in production mode")
return self.NEXTCLOUD_USER
@property
def nextcloud_password(self) -> str:
"""Get Nextcloud password with validation"""
if not self.NEXTCLOUD_PASSWORD and self.is_production:
raise ConfigurationError(
"NEXTCLOUD_PASSWORD is required in production mode"
)
return self.NEXTCLOUD_PASSWORD
@property
def valid_webdav_config(self) -> bool:
"""Validate WebDAV configuration completeness"""
try:
_ = self.nextcloud_url
_ = self.nextcloud_user
_ = self.nextcloud_password
return True
except ConfigurationError:
return False
@property
def telegram_configured(self) -> bool:
"""Check if Telegram is properly configured"""
return bool(self.TELEGRAM_TOKEN and self.TELEGRAM_CHAT_ID)
@property
def has_gpu_support(self) -> bool:
"""Check if GPU support is available"""
try:
import torch
return torch.cuda.is_available()
except ImportError:
return False
@property
def environment_type(self) -> str:
"""Get environment type as string"""
return "production" if self.is_production else "development"
@property
def config_summary(self) -> dict:
"""Get configuration summary for logging"""
return {
"app_name": self.APP_NAME,
"version": self.APP_VERSION,
"environment": self.environment_type,
"debug": self.DEBUG,
"webdav_configured": self.has_webdav_config,
"ai_configured": self.has_ai_config,
"telegram_configured": self.telegram_configured,
"gpu_support": self.has_gpu_support,
"cpu_count": self.CPU_COUNT,
"poll_interval": self.POLL_INTERVAL,
}
# Create global settings instance
settings = Settings()