Refine formatting: Justified text, robust PDF/DOCX generation, clean markdown style
This commit is contained in:
@@ -80,6 +80,7 @@ Instrucciones:
|
||||
- Asegúrate de que los encabezados estén bien espaciados
|
||||
- Verifica que las viñetas usen "- " correctamente
|
||||
- Mantén exactamente el contenido existente
|
||||
- EVITA el uso excesivo de negritas (asteriscos), úsalas solo para conceptos clave
|
||||
- Devuelve únicamente el resumen formateado sin texto adicional"""
|
||||
|
||||
# Use generic Gemini provider for formatting as requested
|
||||
@@ -169,7 +170,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
return output_path
|
||||
|
||||
def _create_docx(self, summary: str, base_name: str) -> Path:
|
||||
"""Create DOCX document"""
|
||||
"""Create DOCX document with Markdown parsing (Legacy method ported)"""
|
||||
try:
|
||||
from docx import Document
|
||||
from docx.shared import Inches
|
||||
@@ -184,8 +185,50 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
doc = Document()
|
||||
doc.add_heading(base_name.replace('_', ' ').title(), 0)
|
||||
|
||||
doc.add_heading('Resumen', level=1)
|
||||
doc.add_paragraph(summary)
|
||||
# Parse and render Markdown content line by line
|
||||
lines = summary.splitlines()
|
||||
current_paragraph = []
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p.alignment = 3 # JUSTIFY alignment (WD_ALIGN_PARAGRAPH.JUSTIFY=3)
|
||||
current_paragraph = []
|
||||
continue
|
||||
|
||||
if line.startswith('#'):
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p.alignment = 3
|
||||
current_paragraph = []
|
||||
# Process heading
|
||||
level = len(line) - len(line.lstrip('#'))
|
||||
heading_text = line.lstrip('#').strip()
|
||||
if level <= 6:
|
||||
doc.add_heading(heading_text, level=level)
|
||||
else:
|
||||
current_paragraph.append(heading_text)
|
||||
elif line.startswith('-') or line.startswith('*') or line.startswith('•'):
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p.alignment = 3
|
||||
current_paragraph = []
|
||||
bullet_text = line.lstrip('-*• ').strip()
|
||||
p = doc.add_paragraph(bullet_text, style='List Bullet')
|
||||
# Remove bold markers from bullets if present
|
||||
if '**' in bullet_text:
|
||||
# Basic cleanup for bullets
|
||||
pass
|
||||
else:
|
||||
# Clean up excessive bold markers in body text if user requested
|
||||
clean_line = line.replace('**', '') # Removing asterisks as per user complaint "se abusa de los asteriscos"
|
||||
current_paragraph.append(clean_line)
|
||||
|
||||
if current_paragraph:
|
||||
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||
p.alignment = 3
|
||||
|
||||
doc.add_page_break()
|
||||
doc.add_paragraph(f"*Generado por CBCFacil*")
|
||||
@@ -194,10 +237,11 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
return output_path
|
||||
|
||||
def _create_pdf(self, summary: str, base_name: str) -> Path:
|
||||
"""Create PDF document"""
|
||||
"""Create PDF document with Markdown parsing (Legacy method ported)"""
|
||||
try:
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.pdfgen import canvas
|
||||
import textwrap
|
||||
except ImportError:
|
||||
raise FileProcessingError("reportlab not installed")
|
||||
|
||||
@@ -208,28 +252,67 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
||||
|
||||
c = canvas.Canvas(str(output_path), pagesize=letter)
|
||||
width, height = letter
|
||||
margin = 72
|
||||
y_position = height - margin
|
||||
|
||||
# Add title
|
||||
c.setFont("Helvetica-Bold", 16)
|
||||
title = base_name.replace('_', ' ').title()
|
||||
c.drawString(100, height - 100, title)
|
||||
def new_page():
|
||||
nonlocal y_position
|
||||
c.showPage()
|
||||
c.setFont('Helvetica', 11)
|
||||
y_position = height - margin
|
||||
|
||||
# Add summary
|
||||
c.setFont("Helvetica", 12)
|
||||
y_position = height - 140
|
||||
c.setFont('Helvetica', 11)
|
||||
|
||||
# Simple text wrapping
|
||||
lines = summary.split('\n')
|
||||
for line in lines:
|
||||
if y_position < 100:
|
||||
c.showPage()
|
||||
y_position = height - 100
|
||||
c.setFont("Helvetica", 12)
|
||||
# Title
|
||||
c.setFont('Helvetica-Bold', 16)
|
||||
c.drawString(margin, y_position, base_name.replace('_', ' ').title()[:100])
|
||||
y_position -= 28
|
||||
c.setFont('Helvetica', 11)
|
||||
|
||||
c.drawString(100, y_position, line)
|
||||
y_position -= 20
|
||||
summary_clean = summary.replace('**', '') # Remove asterisks globally for cleaner PDF
|
||||
|
||||
for raw_line in summary_clean.splitlines():
|
||||
line = raw_line.rstrip()
|
||||
|
||||
if not line.strip():
|
||||
y_position -= 14
|
||||
if y_position < margin:
|
||||
new_page()
|
||||
continue
|
||||
|
||||
stripped = line.lstrip()
|
||||
|
||||
if stripped.startswith('#'):
|
||||
level = len(stripped) - len(stripped.lstrip('#'))
|
||||
heading_text = stripped.lstrip('#').strip()
|
||||
if heading_text:
|
||||
font_size = 16 if level == 1 else 14 if level == 2 else 12
|
||||
c.setFont('Helvetica-Bold', font_size)
|
||||
c.drawString(margin, y_position, heading_text[:90])
|
||||
y_position -= font_size + 6
|
||||
if y_position < margin:
|
||||
new_page()
|
||||
c.setFont('Helvetica', 11)
|
||||
continue
|
||||
|
||||
if stripped.startswith(('-', '*', '•')):
|
||||
bullet_text = stripped.lstrip('-*•').strip()
|
||||
wrapped_lines = textwrap.wrap(bullet_text, width=80) or ['']
|
||||
for idx, wrapped in enumerate(wrapped_lines):
|
||||
prefix = '• ' if idx == 0 else ' '
|
||||
c.drawString(margin, y_position, f"{prefix}{wrapped}")
|
||||
y_position -= 14
|
||||
if y_position < margin:
|
||||
new_page()
|
||||
continue
|
||||
|
||||
# Body text - Justified approximation (ReportLab native justification requires Paragraph styles, defaulting to wrap)
|
||||
wrapped_lines = textwrap.wrap(stripped, width=90) or ['']
|
||||
for wrapped in wrapped_lines:
|
||||
c.drawString(margin, y_position, wrapped)
|
||||
y_position -= 14
|
||||
if y_position < margin:
|
||||
new_page()
|
||||
|
||||
c.showPage()
|
||||
c.save()
|
||||
|
||||
return output_path
|
||||
|
||||
Reference in New Issue
Block a user