fix: Mejoras en generación de PDFs y resúmenes

- Corrige PDFGenerator para pasar contenido (no ruta) - Agrega prompt siguiendo código.md (español, estructura académica) - Limpia thinking tokens de respuesta AI - Agrega skip de archivos ya procesados en watcher - Implementa tablas LaTeX en PDFs (reportlab Table) - Agrega load_dotenv() en main.py - Actualiza .env con MiniMax config - Agrega transcriptions/ a .gitignore Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 17:12:00 +00:00
parent ee8fc183be
commit 1f6bfa771b
5 changed files with 207 additions and 11 deletions
--- a/services/pdf_generator.py
+++ b/services/pdf_generator.py
@@ -5,13 +5,13 @@ Utiliza reportlab para la generación de PDFs con soporte UTF-8.
 """
 import logging
 from pathlib import Path
-from typing import Union
+from typing import Optional, Union

 from reportlab.lib import colors
 from reportlab.lib.pagesizes import A4
 from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
 from reportlab.lib.units import cm
-from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
+from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle

 logger = logging.getLogger(__name__)

@@ -64,6 +64,92 @@ class PDFGenerator:
            .replace("\n", "<br/>")
        )

+    def _parse_latex_table(self, lines: list[str], start_idx: int) -> tuple[Optional[Table], int]:
+        """
+        Parsea una tabla LaTeX y la convierte a reportlab Table.
+
+        Returns:
+            (Table, end_index) - La tabla y el índice donde termina
+        """
+        # Buscar begin/end tabular
+        table_lines = []
+        i = start_idx
+        in_table = False
+
+        while i < len(lines):
+            line = lines[i].strip()
+
+            if "\\begin{tabular}" in line or "begin{tabular}" in line:
+                in_table = True
+                # Extraer especificaciones de columnas
+                col_spec = "l"
+                if "{" in line:
+                    col_spec = line.split("{")[1].split("}")[0] if "}" in line else "l"
+                table_lines.append({"type": "spec", "data": col_spec})
+                i += 1
+                continue
+
+            if "\\end{tabular}" in line or "end{tabular}" in line:
+                in_table = False
+                break
+
+            if in_table:
+                # Procesar línea de tabla
+                # Reemplazar & por separador y eliminar \\
+                row_data = line.replace("&", "|").replace("\\", "").replace("\\\\", "")
+                # Limpiar formato LaTeX básico
+                row_data = row_data.replace("hline", "").replace("\\hline", "")
+                cells = [c.strip() for c in row_data.split("|") if c.strip()]
+                if cells:
+                    table_lines.append({"type": "row", "data": cells})
+
+            i += 1
+
+        if not table_lines:
+            return None, start_idx
+
+        # Convertir a Table de reportlab
+        data = []
+        col_widths = None
+
+        for tl in table_lines:
+            if tl["type"] == "row":
+                # Limpiar celdas de LaTeX
+                row = []
+                for cell in tl["data"]:
+                    cell = cell.strip()
+                    # Eliminar comandos LaTeX restantes
+                    cell = cell.replace("\\textbf{", "").replace("}", "")
+                    cell = cell.replace("\\textit{", "")
+                    cell = cell.replace("\\emph{", "")
+                    cell = cell.strip()
+                    row.append(cell)
+                if row:
+                    data.append(row)
+
+        if not data:
+            return None, start_idx
+
+        # Crear tabla
+        try:
+            num_cols = len(data[0]) if data else 1
+            table = Table(data)
+            table.setStyle(TableStyle([
+                ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
+                ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+                ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                ('FONTSIZE', (0, 0), (-0, -1), 10),
+                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+                ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
+                ('GRID', (0, 0), (-1, -1), 1, colors.black),
+                ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+            ]))
+            return table, i
+        except Exception as e:
+            logger.warning(f"Error parsing LaTeX table: {e}")
+            return None, start_idx
+
    def _parse_markdown_basic(self, markdown: str) -> list[Paragraph]:
        """
        Convierte markdown básico a una lista de Paragraphs de reportlab.
@@ -101,6 +187,14 @@ class PDFGenerator:
            # Línea horizontal
            elif line == "---" or line == "***":
                elements.append(Spacer(1, 0.2 * cm))
+            # Tabla LaTeX
+            elif "begin{tabular}" in line or "begin{tabular" in line:
+                latex_table, end_idx = self._parse_latex_table(lines, idx)
+                if latex_table:
+                    elements.append(Spacer(1, 0.3 * cm))
+                    elements.append(latex_table)
+                    elements.append(Spacer(1, 0.3 * cm))
+                    idx = end_idx - 1  # Saltar las líneas de la tabla
            # Lista con guiones
            elif line.startswith("- ") or line.startswith("* "):
                text = self._escape_xml(line[2:])