Files
cbc2027/document/generators.py

165 lines
5.0 KiB
Python

"""
Document generation utilities
"""
import logging
import re
from pathlib import Path
from typing import Dict, Any, List, Tuple
from ..core import FileProcessingError
from ..config import settings
from ..services.ai import ai_provider_factory
class DocumentGenerator:
"""Generate documents from processed text"""
def __init__(self):
self.logger = logging.getLogger(__name__)
self.ai_provider = ai_provider_factory.get_best_provider()
def generate_summary(self, text: str, base_name: str) -> Tuple[bool, str, Dict[str, Any]]:
"""Generate unified summary"""
self.logger.info(f"Generating summary for {base_name}")
try:
# Generate summary
summary = self.ai_provider.summarize(text)
# Generate filename
filename = self._generate_filename(text, summary)
# Create document
markdown_path = self._create_markdown(summary, base_name)
docx_path = self._create_docx(summary, base_name)
pdf_path = self._create_pdf(summary, base_name)
metadata = {
'markdown_path': str(markdown_path),
'docx_path': str(docx_path),
'pdf_path': str(pdf_path),
'docx_name': Path(docx_path).name,
'summary': summary,
'filename': filename
}
return True, summary, metadata
except Exception as e:
self.logger.error(f"Summary generation failed: {e}")
return False, "", {}
def _generate_filename(self, text: str, summary: str) -> str:
"""Generate intelligent filename"""
try:
# Use AI to extract key topics
prompt = f"""Extract 2-3 key topics from this summary to create a filename.
Summary: {summary}
Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
topics_text = self.ai_provider.sanitize_input(prompt) if hasattr(self.ai_provider, 'sanitize_input') else summary[:100]
# Simple topic extraction
topics = re.findall(r'\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b', topics_text)[:3]
if not topics:
topics = ['documento']
# Limit topic length
topics = [t[:settings.MAX_FILENAME_TOPICS_LENGTH] for t in topics]
filename = '_'.join(topics)[:settings.MAX_FILENAME_LENGTH]
return filename
except Exception as e:
self.logger.error(f"Filename generation failed: {e}")
return base_name[:settings.MAX_FILENAME_BASE_LENGTH]
def _create_markdown(self, summary: str, base_name: str) -> Path:
"""Create Markdown document"""
output_dir = settings.LOCAL_DOWNLOADS_PATH
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / f"{base_name}_unificado.md"
content = f"""# {base_name.replace('_', ' ').title()}
## Resumen
{summary}
---
*Generado por CBCFacil*
"""
with open(output_path, 'w', encoding='utf-8') as f:
f.write(content)
return output_path
def _create_docx(self, summary: str, base_name: str) -> Path:
"""Create DOCX document"""
try:
from docx import Document
from docx.shared import Inches
except ImportError:
raise FileProcessingError("python-docx not installed")
output_dir = settings.LOCAL_DOCX
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / f"{base_name}_unificado.docx"
doc = Document()
doc.add_heading(base_name.replace('_', ' ').title(), 0)
doc.add_heading('Resumen', level=1)
doc.add_paragraph(summary)
doc.add_page_break()
doc.add_paragraph(f"*Generado por CBCFacil*")
doc.save(output_path)
return output_path
def _create_pdf(self, summary: str, base_name: str) -> Path:
"""Create PDF document"""
try:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
except ImportError:
raise FileProcessingError("reportlab not installed")
output_dir = settings.LOCAL_DOWNLOADS_PATH
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / f"{base_name}_unificado.pdf"
c = canvas.Canvas(str(output_path), pagesize=letter)
width, height = letter
# Add title
c.setFont("Helvetica-Bold", 16)
title = base_name.replace('_', ' ').title()
c.drawString(100, height - 100, title)
# Add summary
c.setFont("Helvetica", 12)
y_position = height - 140
# Simple text wrapping
lines = summary.split('\n')
for line in lines:
if y_position < 100:
c.showPage()
y_position = height - 100
c.setFont("Helvetica", 12)
c.drawString(100, y_position, line)
y_position -= 20
c.showPage()
c.save()
return output_path