Refine formatting: Justified text, robust PDF/DOCX generation, clean markdown style
This commit is contained in:
@@ -80,6 +80,7 @@ Instrucciones:
|
|||||||
- Asegúrate de que los encabezados estén bien espaciados
|
- Asegúrate de que los encabezados estén bien espaciados
|
||||||
- Verifica que las viñetas usen "- " correctamente
|
- Verifica que las viñetas usen "- " correctamente
|
||||||
- Mantén exactamente el contenido existente
|
- Mantén exactamente el contenido existente
|
||||||
|
- EVITA el uso excesivo de negritas (asteriscos), úsalas solo para conceptos clave
|
||||||
- Devuelve únicamente el resumen formateado sin texto adicional"""
|
- Devuelve únicamente el resumen formateado sin texto adicional"""
|
||||||
|
|
||||||
# Use generic Gemini provider for formatting as requested
|
# Use generic Gemini provider for formatting as requested
|
||||||
@@ -169,7 +170,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
|||||||
return output_path
|
return output_path
|
||||||
|
|
||||||
def _create_docx(self, summary: str, base_name: str) -> Path:
|
def _create_docx(self, summary: str, base_name: str) -> Path:
|
||||||
"""Create DOCX document"""
|
"""Create DOCX document with Markdown parsing (Legacy method ported)"""
|
||||||
try:
|
try:
|
||||||
from docx import Document
|
from docx import Document
|
||||||
from docx.shared import Inches
|
from docx.shared import Inches
|
||||||
@@ -184,8 +185,50 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
|||||||
doc = Document()
|
doc = Document()
|
||||||
doc.add_heading(base_name.replace('_', ' ').title(), 0)
|
doc.add_heading(base_name.replace('_', ' ').title(), 0)
|
||||||
|
|
||||||
doc.add_heading('Resumen', level=1)
|
# Parse and render Markdown content line by line
|
||||||
doc.add_paragraph(summary)
|
lines = summary.splitlines()
|
||||||
|
current_paragraph = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
if current_paragraph:
|
||||||
|
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||||
|
p.alignment = 3 # JUSTIFY alignment (WD_ALIGN_PARAGRAPH.JUSTIFY=3)
|
||||||
|
current_paragraph = []
|
||||||
|
continue
|
||||||
|
|
||||||
|
if line.startswith('#'):
|
||||||
|
if current_paragraph:
|
||||||
|
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||||
|
p.alignment = 3
|
||||||
|
current_paragraph = []
|
||||||
|
# Process heading
|
||||||
|
level = len(line) - len(line.lstrip('#'))
|
||||||
|
heading_text = line.lstrip('#').strip()
|
||||||
|
if level <= 6:
|
||||||
|
doc.add_heading(heading_text, level=level)
|
||||||
|
else:
|
||||||
|
current_paragraph.append(heading_text)
|
||||||
|
elif line.startswith('-') or line.startswith('*') or line.startswith('•'):
|
||||||
|
if current_paragraph:
|
||||||
|
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||||
|
p.alignment = 3
|
||||||
|
current_paragraph = []
|
||||||
|
bullet_text = line.lstrip('-*• ').strip()
|
||||||
|
p = doc.add_paragraph(bullet_text, style='List Bullet')
|
||||||
|
# Remove bold markers from bullets if present
|
||||||
|
if '**' in bullet_text:
|
||||||
|
# Basic cleanup for bullets
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Clean up excessive bold markers in body text if user requested
|
||||||
|
clean_line = line.replace('**', '') # Removing asterisks as per user complaint "se abusa de los asteriscos"
|
||||||
|
current_paragraph.append(clean_line)
|
||||||
|
|
||||||
|
if current_paragraph:
|
||||||
|
p = doc.add_paragraph(' '.join(current_paragraph))
|
||||||
|
p.alignment = 3
|
||||||
|
|
||||||
doc.add_page_break()
|
doc.add_page_break()
|
||||||
doc.add_paragraph(f"*Generado por CBCFacil*")
|
doc.add_paragraph(f"*Generado por CBCFacil*")
|
||||||
@@ -194,10 +237,11 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
|||||||
return output_path
|
return output_path
|
||||||
|
|
||||||
def _create_pdf(self, summary: str, base_name: str) -> Path:
|
def _create_pdf(self, summary: str, base_name: str) -> Path:
|
||||||
"""Create PDF document"""
|
"""Create PDF document with Markdown parsing (Legacy method ported)"""
|
||||||
try:
|
try:
|
||||||
from reportlab.lib.pagesizes import letter
|
from reportlab.lib.pagesizes import letter
|
||||||
from reportlab.pdfgen import canvas
|
from reportlab.pdfgen import canvas
|
||||||
|
import textwrap
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise FileProcessingError("reportlab not installed")
|
raise FileProcessingError("reportlab not installed")
|
||||||
|
|
||||||
@@ -208,28 +252,67 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
|
|||||||
|
|
||||||
c = canvas.Canvas(str(output_path), pagesize=letter)
|
c = canvas.Canvas(str(output_path), pagesize=letter)
|
||||||
width, height = letter
|
width, height = letter
|
||||||
|
margin = 72
|
||||||
|
y_position = height - margin
|
||||||
|
|
||||||
# Add title
|
def new_page():
|
||||||
c.setFont("Helvetica-Bold", 16)
|
nonlocal y_position
|
||||||
title = base_name.replace('_', ' ').title()
|
|
||||||
c.drawString(100, height - 100, title)
|
|
||||||
|
|
||||||
# Add summary
|
|
||||||
c.setFont("Helvetica", 12)
|
|
||||||
y_position = height - 140
|
|
||||||
|
|
||||||
# Simple text wrapping
|
|
||||||
lines = summary.split('\n')
|
|
||||||
for line in lines:
|
|
||||||
if y_position < 100:
|
|
||||||
c.showPage()
|
c.showPage()
|
||||||
y_position = height - 100
|
c.setFont('Helvetica', 11)
|
||||||
c.setFont("Helvetica", 12)
|
y_position = height - margin
|
||||||
|
|
||||||
c.drawString(100, y_position, line)
|
c.setFont('Helvetica', 11)
|
||||||
y_position -= 20
|
|
||||||
|
# Title
|
||||||
|
c.setFont('Helvetica-Bold', 16)
|
||||||
|
c.drawString(margin, y_position, base_name.replace('_', ' ').title()[:100])
|
||||||
|
y_position -= 28
|
||||||
|
c.setFont('Helvetica', 11)
|
||||||
|
|
||||||
|
summary_clean = summary.replace('**', '') # Remove asterisks globally for cleaner PDF
|
||||||
|
|
||||||
|
for raw_line in summary_clean.splitlines():
|
||||||
|
line = raw_line.rstrip()
|
||||||
|
|
||||||
|
if not line.strip():
|
||||||
|
y_position -= 14
|
||||||
|
if y_position < margin:
|
||||||
|
new_page()
|
||||||
|
continue
|
||||||
|
|
||||||
|
stripped = line.lstrip()
|
||||||
|
|
||||||
|
if stripped.startswith('#'):
|
||||||
|
level = len(stripped) - len(stripped.lstrip('#'))
|
||||||
|
heading_text = stripped.lstrip('#').strip()
|
||||||
|
if heading_text:
|
||||||
|
font_size = 16 if level == 1 else 14 if level == 2 else 12
|
||||||
|
c.setFont('Helvetica-Bold', font_size)
|
||||||
|
c.drawString(margin, y_position, heading_text[:90])
|
||||||
|
y_position -= font_size + 6
|
||||||
|
if y_position < margin:
|
||||||
|
new_page()
|
||||||
|
c.setFont('Helvetica', 11)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if stripped.startswith(('-', '*', '•')):
|
||||||
|
bullet_text = stripped.lstrip('-*•').strip()
|
||||||
|
wrapped_lines = textwrap.wrap(bullet_text, width=80) or ['']
|
||||||
|
for idx, wrapped in enumerate(wrapped_lines):
|
||||||
|
prefix = '• ' if idx == 0 else ' '
|
||||||
|
c.drawString(margin, y_position, f"{prefix}{wrapped}")
|
||||||
|
y_position -= 14
|
||||||
|
if y_position < margin:
|
||||||
|
new_page()
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Body text - Justified approximation (ReportLab native justification requires Paragraph styles, defaulting to wrap)
|
||||||
|
wrapped_lines = textwrap.wrap(stripped, width=90) or ['']
|
||||||
|
for wrapped in wrapped_lines:
|
||||||
|
c.drawString(margin, y_position, wrapped)
|
||||||
|
y_position -= 14
|
||||||
|
if y_position < margin:
|
||||||
|
new_page()
|
||||||
|
|
||||||
c.showPage()
|
|
||||||
c.save()
|
c.save()
|
||||||
|
|
||||||
return output_path
|
return output_path
|
||||||
|
|||||||
Reference in New Issue
Block a user