""" Document generation utilities """ import logging import re from pathlib import Path from typing import Dict, Any, List, Tuple from core import FileProcessingError from config import settings from services.ai import ai_provider_factory class DocumentGenerator: """Generate documents from processed text""" def __init__(self): self.logger = logging.getLogger(__name__) self.ai_provider = ai_provider_factory.get_best_provider() def generate_summary(self, text: str, base_name: str) -> Tuple[bool, str, Dict[str, Any]]: """Generate unified summary""" self.logger.info(f"Generating summary for {base_name}") try: # Step 1: Generate Bullet Points (Chunking handled by provider or single prompt for now) # Note: We use the main provider (Claude/Zai) for content generation self.logger.info("Generating bullet points...") bullet_prompt = f"""Analiza el siguiente texto y extrae entre 5 y 8 bullet points clave en español. REGLAS ESTRICTAS: 1. Devuelve ÚNICAMENTE bullet points, cada línea iniciando con "- " 2. Cada bullet debe ser conciso (12-20 palabras) y resaltar datos, fechas, conceptos o conclusiones importantes 3. NO agregues introducciones, conclusiones ni texto explicativo 4. Concéntrate en los puntos más importantes del texto 5. Incluye fechas, datos específicos y nombres relevantes si los hay Texto: {text[:15000]}""" # Truncate to avoid context limits if necessary, though providers handle it differently try: bullet_points = self.ai_provider.generate_text(bullet_prompt) self.logger.info(f"Bullet points generated: {len(bullet_points)}") except Exception as e: self.logger.warning(f"Bullet point generation failed: {e}") bullet_points = "- Puntos clave no disponibles por error en IA" # Step 2: Generate Unified Summary self.logger.info("Generating unified summary...") summary_prompt = f"""Eres un profesor universitario experto en historia del siglo XX. Redacta un resumen académico integrado en español usando el texto y los bullet points extraídos. REQUISITOS ESTRICTOS: - Extensión entre 500-700 palabras - Usa encabezados Markdown con jerarquía clara (##, ###) - Desarrolla los puntos clave con profundidad y contexto histórico - Mantén un tono académico y analítico - Incluye conclusiones significativas - NO agregues texto fuera del resumen - Devuelve únicamente el resumen en formato Markdown Contenido a resumir: {text[:20000]} Puntos clave a incluir obligatoriamente: {bullet_points}""" try: raw_summary = self.ai_provider.generate_text(summary_prompt) except Exception as e: self.logger.error(f"Raw summary generation failed: {e}") raise e # Step 3: Format with Gemini (using GeminiProvider explicitly) self.logger.info("Formatting summary with Gemini...") format_prompt = f"""Revisa y mejora el siguiente resumen en Markdown para que sea perfectamente legible: {raw_summary} Instrucciones: - Corrige cualquier error de formato - Asegúrate de que los encabezados estén bien espaciados - Verifica que las viñetas usen "- " correctamente - Mantén exactamente el contenido existente - Devuelve únicamente el resumen formateado sin texto adicional""" # Use generic Gemini provider for formatting as requested from services.ai.gemini_provider import GeminiProvider formatter = GeminiProvider() try: if formatter.is_available(): summary = formatter.generate_text(format_prompt) else: self.logger.warning("Gemini formatter not available, using raw summary") summary = raw_summary except Exception as e: self.logger.warning(f"Formatting failed ({e}), using raw summary") summary = raw_summary # Generate filename filename = self._generate_filename(text, summary) # Create document markdown_path = self._create_markdown(summary, base_name) docx_path = self._create_docx(summary, base_name) pdf_path = self._create_pdf(summary, base_name) metadata = { 'markdown_path': str(markdown_path), 'docx_path': str(docx_path), 'pdf_path': str(pdf_path), 'docx_name': Path(docx_path).name, 'summary': summary, 'filename': filename } return True, summary, metadata except Exception as e: self.logger.error(f"Document generation process failed: {e}") return False, "", {} def _generate_filename(self, text: str, summary: str) -> str: """Generate intelligent filename""" try: # Use AI to extract key topics prompt = f"""Extract 2-3 key topics from this summary to create a filename. Summary: {summary} Return only the topics separated by hyphens, max 20 chars each, in Spanish:""" topics_text = self.ai_provider.sanitize_input(prompt) if hasattr(self.ai_provider, 'sanitize_input') else summary[:100] # Simple topic extraction topics = re.findall(r'\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b', topics_text)[:3] if not topics: topics = ['documento'] # Limit topic length topics = [t[:settings.MAX_FILENAME_TOPICS_LENGTH] for t in topics] filename = '_'.join(topics)[:settings.MAX_FILENAME_LENGTH] return filename except Exception as e: self.logger.error(f"Filename generation failed: {e}") return base_name[:settings.MAX_FILENAME_BASE_LENGTH] def _create_markdown(self, summary: str, base_name: str) -> Path: """Create Markdown document""" output_dir = settings.LOCAL_DOWNLOADS_PATH output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / f"{base_name}_unificado.md" content = f"""# {base_name.replace('_', ' ').title()} ## Resumen {summary} --- *Generado por CBCFacil* """ with open(output_path, 'w', encoding='utf-8') as f: f.write(content) return output_path def _create_docx(self, summary: str, base_name: str) -> Path: """Create DOCX document""" try: from docx import Document from docx.shared import Inches except ImportError: raise FileProcessingError("python-docx not installed") output_dir = settings.LOCAL_DOCX output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / f"{base_name}_unificado.docx" doc = Document() doc.add_heading(base_name.replace('_', ' ').title(), 0) doc.add_heading('Resumen', level=1) doc.add_paragraph(summary) doc.add_page_break() doc.add_paragraph(f"*Generado por CBCFacil*") doc.save(output_path) return output_path def _create_pdf(self, summary: str, base_name: str) -> Path: """Create PDF document""" try: from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas except ImportError: raise FileProcessingError("reportlab not installed") output_dir = settings.LOCAL_DOWNLOADS_PATH output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / f"{base_name}_unificado.pdf" c = canvas.Canvas(str(output_path), pagesize=letter) width, height = letter # Add title c.setFont("Helvetica-Bold", 16) title = base_name.replace('_', ' ').title() c.drawString(100, height - 100, title) # Add summary c.setFont("Helvetica", 12) y_position = height - 140 # Simple text wrapping lines = summary.split('\n') for line in lines: if y_position < 100: c.showPage() y_position = height - 100 c.setFont("Helvetica", 12) c.drawString(100, y_position, line) y_position -= 20 c.showPage() c.save() return output_path