feat: Integración automática con Notion + análisis completo del código
- Instalado notion-client SDK oficial para integración robusta - Refactorizado services/notion_service.py con SDK oficial de Notion - Rate limiting con retry y exponential backoff - Parser Markdown → Notion blocks (headings, bullets, paragraphs) - Soporte para pages y databases - Manejo robusto de errores - Integración automática en document/generators.py - PDFs se suben automáticamente a Notion después de generarse - Contenido completo del resumen formateado con bloques - Metadata rica (tipo de archivo, path, fecha) - Configuración de Notion en main.py - Inicialización automática al arrancar el servicio - Validación de credenciales - Actualizado config/settings.py - Agregado load_dotenv() para cargar variables de .env - Configuración de Notion (NOTION_API, NOTION_DATABASE_ID) - Scripts de utilidad creados: - test_notion_integration.py: Test de subida a Notion - test_pipeline_notion.py: Test del pipeline completo - verify_notion_permissions.py: Verificación de permisos - list_notion_pages.py: Listar páginas accesibles - diagnose_notion.py: Diagnóstico completo - create_notion_database.py: Crear database automáticamente - restart_service.sh: Script de reinicio del servicio - Documentación completa en opus.md: - Análisis exhaustivo del codebase (42 archivos Python) - Bugs críticos identificados y soluciones - Mejoras de seguridad (autenticación, rate limiting, CORS, CSP) - Optimizaciones de rendimiento (Celery, Redis, PostgreSQL, WebSockets) - Plan de testing (estructura, ejemplos, 80% coverage goal) - Roadmap de implementación (6 sprints detallados) - Integración avanzada con Notion documentada Estado: Notion funcionando correctamente, PDFs se suben automáticamente
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""
|
||||
Document generation utilities
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
@@ -17,7 +18,9 @@ class DocumentGenerator:
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.ai_provider = ai_provider_factory.get_best_provider()
|
||||
|
||||
def generate_summary(self, text: str, base_name: str) -> Tuple[bool, str, Dict[str, Any]]:
|
||||
def generate_summary(
|
||||
self, text: str, base_name: str
|
||||
) -> Tuple[bool, str, Dict[str, Any]]:
|
||||
"""Generate unified summary"""
|
||||
self.logger.info(f"Generating summary for {base_name}")
|
||||
|
||||
@@ -36,7 +39,7 @@ REGLAS ESTRICTAS:
|
||||
|
||||
Texto:
|
||||
{text[:15000]}""" # Truncate to avoid context limits if necessary, though providers handle it differently
|
||||
|
||||
|
||||
try:
|
||||
bullet_points = self.ai_provider.generate_text(bullet_prompt)
|
||||
self.logger.info(f"Bullet points generated: {len(bullet_points)}")
|
||||
@@ -85,13 +88,16 @@ Instrucciones:
|
||||
|
||||
# Use generic Gemini provider for formatting as requested
|
||||
from services.ai.gemini_provider import GeminiProvider
|
||||
|
||||
formatter = GeminiProvider()
|
||||
|
||||
|
||||
try:
|
||||
if formatter.is_available():
|
||||
summary = formatter.generate_text(format_prompt)
|
||||
else:
|
||||
self.logger.warning("Gemini formatter not available, using raw summary")
|
||||
self.logger.warning(
|
||||
"Gemini formatter not available, using raw summary"
|
||||
)
|
||||
summary = raw_summary
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Formatting failed ({e}), using raw summary")
|
||||
@@ -105,13 +111,51 @@ Instrucciones:
|
||||
docx_path = self._create_docx(summary, base_name)
|
||||
pdf_path = self._create_pdf(summary, base_name)
|
||||
|
||||
# Upload to Notion if configured
|
||||
from services.notion_service import notion_service
|
||||
|
||||
notion_uploaded = False
|
||||
notion_page_id = None
|
||||
if settings.has_notion_config:
|
||||
try:
|
||||
title = base_name.replace("_", " ").title()
|
||||
|
||||
# Crear página con el contenido completo del resumen
|
||||
notion_metadata = {
|
||||
"file_type": "Audio", # O 'PDF' dependiendo del origen
|
||||
"pdf_path": pdf_path,
|
||||
"add_status": False, # No usar Status/Tipo (no existen en la DB)
|
||||
"use_as_page": False, # Usar como database, no página
|
||||
}
|
||||
|
||||
notion_page_id = notion_service.create_page_with_summary(
|
||||
title=title, summary=summary, metadata=notion_metadata
|
||||
)
|
||||
|
||||
if notion_page_id:
|
||||
notion_uploaded = True
|
||||
self.logger.info(
|
||||
f"✅ Resumen subido a Notion: {title} (ID: {notion_page_id})"
|
||||
)
|
||||
else:
|
||||
self.logger.warning(f"⚠️ No se pudo subir a Notion: {title}")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"❌ Error al subir a Notion: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
else:
|
||||
self.logger.info("Notion not configured - skipping upload")
|
||||
|
||||
metadata = {
|
||||
'markdown_path': str(markdown_path),
|
||||
'docx_path': str(docx_path),
|
||||
'pdf_path': str(pdf_path),
|
||||
'docx_name': Path(docx_path).name,
|
||||
'summary': summary,
|
||||
'filename': filename
|
||||
"markdown_path": str(markdown_path),
|
||||
"docx_path": str(docx_path),
|
||||
"pdf_path": str(pdf_path),
|
||||
"docx_name": Path(docx_path).name,
|
||||
"summary": summary,
|
||||
"filename": filename,
|
||||
"notion_uploaded": notion_uploaded,
|
||||
"notion_page_id": notion_page_id,
|
||||
}
|
||||
|
||||
return True, summary, metadata
|
||||
@@ -129,22 +173,26 @@ Summary: {summary}
|
||||
|
||||
Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
|
||||
topics_text = self.ai_provider.sanitize_input(prompt) if hasattr(self.ai_provider, 'sanitize_input') else summary[:100]
|
||||
topics_text = (
|
||||
self.ai_provider.sanitize_input(prompt)
|
||||
if hasattr(self.ai_provider, "sanitize_input")
|
||||
else summary[:100]
|
||||
)
|
||||
|
||||
# Simple topic extraction
|
||||
topics = re.findall(r'\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b', topics_text)[:3]
|
||||
topics = re.findall(r"\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b", topics_text)[:3]
|
||||
if not topics:
|
||||
topics = ['documento']
|
||||
topics = ["documento"]
|
||||
|
||||
# Limit topic length
|
||||
topics = [t[:settings.MAX_FILENAME_TOPICS_LENGTH] for t in topics]
|
||||
topics = [t[: settings.MAX_FILENAME_TOPICS_LENGTH] for t in topics]
|
||||
|
||||
filename = '_'.join(topics)[:settings.MAX_FILENAME_LENGTH]
|
||||
filename = "_".join(topics)[: settings.MAX_FILENAME_LENGTH]
|
||||
return filename
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Filename generation failed: {e}")
|
||||
return base_name[:settings.MAX_FILENAME_BASE_LENGTH]
|
||||
return base_name[: settings.MAX_FILENAME_BASE_LENGTH]
|
||||
|
||||
def _create_markdown(self, summary: str, base_name: str) -> Path:
|
||||
"""Create Markdown document"""
|
||||
@@ -153,7 +201,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
|
||||
output_path = output_dir / f"{base_name}_unificado.md"
|
||||
|
||||
content = f"""# {base_name.replace('_', ' ').title()}
|
||||
content = f"""# {base_name.replace("_", " ").title()}
|
||||
|
||||
## Resumen
|
||||
|
||||
@@ -164,7 +212,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
*Generado por CBCFacil*
|
||||
"""
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
return output_path
|
||||
@@ -183,51 +231,53 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
output_path = output_dir / f"{base_name}_unificado.docx"
|
||||
|
||||
doc = Document()
|
||||
doc.add_heading(base_name.replace('_', ' ').title(), 0)
|
||||
doc.add_heading(base_name.replace("_", " ").title(), 0)
|
||||
|
||||
# Parse and render Markdown content line by line
|
||||
lines = summary.splitlines()
|
||||
current_paragraph = []
|
||||
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p = doc.add_paragraph(" ".join(current_paragraph))
|
||||
p.alignment = 3 # JUSTIFY alignment (WD_ALIGN_PARAGRAPH.JUSTIFY=3)
|
||||
current_paragraph = []
|
||||
continue
|
||||
|
||||
if line.startswith('#'):
|
||||
|
||||
if line.startswith("#"):
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p = doc.add_paragraph(" ".join(current_paragraph))
|
||||
p.alignment = 3
|
||||
current_paragraph = []
|
||||
# Process heading
|
||||
level = len(line) - len(line.lstrip('#'))
|
||||
heading_text = line.lstrip('#').strip()
|
||||
level = len(line) - len(line.lstrip("#"))
|
||||
heading_text = line.lstrip("#").strip()
|
||||
if level <= 6:
|
||||
doc.add_heading(heading_text, level=level)
|
||||
else:
|
||||
current_paragraph.append(heading_text)
|
||||
elif line.startswith('-') or line.startswith('*') or line.startswith('•'):
|
||||
elif line.startswith("-") or line.startswith("*") or line.startswith("•"):
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p = doc.add_paragraph(" ".join(current_paragraph))
|
||||
p.alignment = 3
|
||||
current_paragraph = []
|
||||
bullet_text = line.lstrip('-*• ').strip()
|
||||
p = doc.add_paragraph(bullet_text, style='List Bullet')
|
||||
bullet_text = line.lstrip("-*• ").strip()
|
||||
p = doc.add_paragraph(bullet_text, style="List Bullet")
|
||||
# Remove bold markers from bullets if present
|
||||
if '**' in bullet_text:
|
||||
if "**" in bullet_text:
|
||||
# Basic cleanup for bullets
|
||||
pass
|
||||
pass
|
||||
else:
|
||||
# Clean up excessive bold markers in body text if user requested
|
||||
clean_line = line.replace('**', '') # Removing asterisks as per user complaint "se abusa de los asteriscos"
|
||||
clean_line = line.replace(
|
||||
"**", ""
|
||||
) # Removing asterisks as per user complaint "se abusa de los asteriscos"
|
||||
current_paragraph.append(clean_line)
|
||||
|
||||
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p = doc.add_paragraph(" ".join(current_paragraph))
|
||||
p.alignment = 3
|
||||
|
||||
doc.add_page_break()
|
||||
@@ -258,18 +308,20 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
def new_page():
|
||||
nonlocal y_position
|
||||
c.showPage()
|
||||
c.setFont('Helvetica', 11)
|
||||
c.setFont("Helvetica", 11)
|
||||
y_position = height - margin
|
||||
|
||||
c.setFont('Helvetica', 11)
|
||||
c.setFont("Helvetica", 11)
|
||||
|
||||
# Title
|
||||
c.setFont('Helvetica-Bold', 16)
|
||||
c.drawString(margin, y_position, base_name.replace('_', ' ').title()[:100])
|
||||
c.setFont("Helvetica-Bold", 16)
|
||||
c.drawString(margin, y_position, base_name.replace("_", " ").title()[:100])
|
||||
y_position -= 28
|
||||
c.setFont('Helvetica', 11)
|
||||
c.setFont("Helvetica", 11)
|
||||
|
||||
summary_clean = summary.replace('**', '') # Remove asterisks globally for cleaner PDF
|
||||
summary_clean = summary.replace(
|
||||
"**", ""
|
||||
) # Remove asterisks globally for cleaner PDF
|
||||
|
||||
for raw_line in summary_clean.splitlines():
|
||||
line = raw_line.rstrip()
|
||||
@@ -282,24 +334,24 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
|
||||
stripped = line.lstrip()
|
||||
|
||||
if stripped.startswith('#'):
|
||||
level = len(stripped) - len(stripped.lstrip('#'))
|
||||
heading_text = stripped.lstrip('#').strip()
|
||||
if stripped.startswith("#"):
|
||||
level = len(stripped) - len(stripped.lstrip("#"))
|
||||
heading_text = stripped.lstrip("#").strip()
|
||||
if heading_text:
|
||||
font_size = 16 if level == 1 else 14 if level == 2 else 12
|
||||
c.setFont('Helvetica-Bold', font_size)
|
||||
c.setFont("Helvetica-Bold", font_size)
|
||||
c.drawString(margin, y_position, heading_text[:90])
|
||||
y_position -= font_size + 6
|
||||
if y_position < margin:
|
||||
new_page()
|
||||
c.setFont('Helvetica', 11)
|
||||
c.setFont("Helvetica", 11)
|
||||
continue
|
||||
|
||||
if stripped.startswith(('-', '*', '•')):
|
||||
bullet_text = stripped.lstrip('-*•').strip()
|
||||
wrapped_lines = textwrap.wrap(bullet_text, width=80) or ['']
|
||||
if stripped.startswith(("-", "*", "•")):
|
||||
bullet_text = stripped.lstrip("-*•").strip()
|
||||
wrapped_lines = textwrap.wrap(bullet_text, width=80) or [""]
|
||||
for idx, wrapped in enumerate(wrapped_lines):
|
||||
prefix = '• ' if idx == 0 else ' '
|
||||
prefix = "• " if idx == 0 else " "
|
||||
c.drawString(margin, y_position, f"{prefix}{wrapped}")
|
||||
y_position -= 14
|
||||
if y_position < margin:
|
||||
@@ -307,7 +359,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
continue
|
||||
|
||||
# Body text - Justified approximation (ReportLab native justification requires Paragraph styles, defaulting to wrap)
|
||||
wrapped_lines = textwrap.wrap(stripped, width=90) or ['']
|
||||
wrapped_lines = textwrap.wrap(stripped, width=90) or [""]
|
||||
for wrapped in wrapped_lines:
|
||||
c.drawString(margin, y_position, wrapped)
|
||||
y_position -= 14
|
||||
|
||||
Reference in New Issue
Block a user