CBCFacil v8.0 - Refactored with AMD GPU support
This commit is contained in:
164
document/generators.py
Normal file
164
document/generators.py
Normal file
@@ -0,0 +1,164 @@
|
||||
"""
|
||||
Document generation utilities
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List, Tuple
|
||||
from ..core import FileProcessingError
|
||||
from ..config import settings
|
||||
from ..services.ai import ai_provider_factory
|
||||
|
||||
|
||||
class DocumentGenerator:
|
||||
"""Generate documents from processed text"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.ai_provider = ai_provider_factory.get_best_provider()
|
||||
|
||||
def generate_summary(self, text: str, base_name: str) -> Tuple[bool, str, Dict[str, Any]]:
|
||||
"""Generate unified summary"""
|
||||
self.logger.info(f"Generating summary for {base_name}")
|
||||
|
||||
try:
|
||||
# Generate summary
|
||||
summary = self.ai_provider.summarize(text)
|
||||
|
||||
# Generate filename
|
||||
filename = self._generate_filename(text, summary)
|
||||
|
||||
# Create document
|
||||
markdown_path = self._create_markdown(summary, base_name)
|
||||
docx_path = self._create_docx(summary, base_name)
|
||||
pdf_path = self._create_pdf(summary, base_name)
|
||||
|
||||
metadata = {
|
||||
'markdown_path': str(markdown_path),
|
||||
'docx_path': str(docx_path),
|
||||
'pdf_path': str(pdf_path),
|
||||
'docx_name': Path(docx_path).name,
|
||||
'summary': summary,
|
||||
'filename': filename
|
||||
}
|
||||
|
||||
return True, summary, metadata
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Summary generation failed: {e}")
|
||||
return False, "", {}
|
||||
|
||||
def _generate_filename(self, text: str, summary: str) -> str:
|
||||
"""Generate intelligent filename"""
|
||||
try:
|
||||
# Use AI to extract key topics
|
||||
prompt = f"""Extract 2-3 key topics from this summary to create a filename.
|
||||
Summary: {summary}
|
||||
|
||||
Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
|
||||
topics_text = self.ai_provider.sanitize_input(prompt) if hasattr(self.ai_provider, 'sanitize_input') else summary[:100]
|
||||
|
||||
# Simple topic extraction
|
||||
topics = re.findall(r'\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b', topics_text)[:3]
|
||||
if not topics:
|
||||
topics = ['documento']
|
||||
|
||||
# Limit topic length
|
||||
topics = [t[:settings.MAX_FILENAME_TOPICS_LENGTH] for t in topics]
|
||||
|
||||
filename = '_'.join(topics)[:settings.MAX_FILENAME_LENGTH]
|
||||
return filename
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Filename generation failed: {e}")
|
||||
return base_name[:settings.MAX_FILENAME_BASE_LENGTH]
|
||||
|
||||
def _create_markdown(self, summary: str, base_name: str) -> Path:
|
||||
"""Create Markdown document"""
|
||||
output_dir = settings.LOCAL_DOWNLOADS_PATH
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output_path = output_dir / f"{base_name}_unificado.md"
|
||||
|
||||
content = f"""# {base_name.replace('_', ' ').title()}
|
||||
|
||||
## Resumen
|
||||
|
||||
{summary}
|
||||
|
||||
---
|
||||
|
||||
*Generado por CBCFacil*
|
||||
"""
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
return output_path
|
||||
|
||||
def _create_docx(self, summary: str, base_name: str) -> Path:
|
||||
"""Create DOCX document"""
|
||||
try:
|
||||
from docx import Document
|
||||
from docx.shared import Inches
|
||||
except ImportError:
|
||||
raise FileProcessingError("python-docx not installed")
|
||||
|
||||
output_dir = settings.LOCAL_DOCX
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output_path = output_dir / f"{base_name}_unificado.docx"
|
||||
|
||||
doc = Document()
|
||||
doc.add_heading(base_name.replace('_', ' ').title(), 0)
|
||||
|
||||
doc.add_heading('Resumen', level=1)
|
||||
doc.add_paragraph(summary)
|
||||
|
||||
doc.add_page_break()
|
||||
doc.add_paragraph(f"*Generado por CBCFacil*")
|
||||
|
||||
doc.save(output_path)
|
||||
return output_path
|
||||
|
||||
def _create_pdf(self, summary: str, base_name: str) -> Path:
|
||||
"""Create PDF document"""
|
||||
try:
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.pdfgen import canvas
|
||||
except ImportError:
|
||||
raise FileProcessingError("reportlab not installed")
|
||||
|
||||
output_dir = settings.LOCAL_DOWNLOADS_PATH
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output_path = output_dir / f"{base_name}_unificado.pdf"
|
||||
|
||||
c = canvas.Canvas(str(output_path), pagesize=letter)
|
||||
width, height = letter
|
||||
|
||||
# Add title
|
||||
c.setFont("Helvetica-Bold", 16)
|
||||
title = base_name.replace('_', ' ').title()
|
||||
c.drawString(100, height - 100, title)
|
||||
|
||||
# Add summary
|
||||
c.setFont("Helvetica", 12)
|
||||
y_position = height - 140
|
||||
|
||||
# Simple text wrapping
|
||||
lines = summary.split('\n')
|
||||
for line in lines:
|
||||
if y_position < 100:
|
||||
c.showPage()
|
||||
y_position = height - 100
|
||||
c.setFont("Helvetica", 12)
|
||||
|
||||
c.drawString(100, y_position, line)
|
||||
y_position -= 20
|
||||
|
||||
c.showPage()
|
||||
c.save()
|
||||
|
||||
return output_path
|
||||
Reference in New Issue
Block a user