fix: Mejoras en generación de PDFs y resúmenes

- Corrige PDFGenerator para pasar contenido (no ruta)
- Agrega prompt siguiendo código.md (español, estructura académica)
- Limpia thinking tokens de respuesta AI
- Agrega skip de archivos ya procesados en watcher
- Implementa tablas LaTeX en PDFs (reportlab Table)
- Agrega load_dotenv() en main.py
- Actualiza .env con MiniMax config
- Agrega transcriptions/ a .gitignore

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
renato97
2026-02-25 17:12:00 +00:00
parent ee8fc183be
commit 1f6bfa771b
5 changed files with 207 additions and 11 deletions

View File

@@ -5,13 +5,13 @@ Utiliza reportlab para la generación de PDFs con soporte UTF-8.
"""
import logging
from pathlib import Path
from typing import Union
from typing import Optional, Union
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.units import cm
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
logger = logging.getLogger(__name__)
@@ -64,6 +64,92 @@ class PDFGenerator:
.replace("\n", "<br/>")
)
def _parse_latex_table(self, lines: list[str], start_idx: int) -> tuple[Optional[Table], int]:
"""
Parsea una tabla LaTeX y la convierte a reportlab Table.
Returns:
(Table, end_index) - La tabla y el índice donde termina
"""
# Buscar begin/end tabular
table_lines = []
i = start_idx
in_table = False
while i < len(lines):
line = lines[i].strip()
if "\\begin{tabular}" in line or "begin{tabular}" in line:
in_table = True
# Extraer especificaciones de columnas
col_spec = "l"
if "{" in line:
col_spec = line.split("{")[1].split("}")[0] if "}" in line else "l"
table_lines.append({"type": "spec", "data": col_spec})
i += 1
continue
if "\\end{tabular}" in line or "end{tabular}" in line:
in_table = False
break
if in_table:
# Procesar línea de tabla
# Reemplazar & por separador y eliminar \\
row_data = line.replace("&", "|").replace("\\", "").replace("\\\\", "")
# Limpiar formato LaTeX básico
row_data = row_data.replace("hline", "").replace("\\hline", "")
cells = [c.strip() for c in row_data.split("|") if c.strip()]
if cells:
table_lines.append({"type": "row", "data": cells})
i += 1
if not table_lines:
return None, start_idx
# Convertir a Table de reportlab
data = []
col_widths = None
for tl in table_lines:
if tl["type"] == "row":
# Limpiar celdas de LaTeX
row = []
for cell in tl["data"]:
cell = cell.strip()
# Eliminar comandos LaTeX restantes
cell = cell.replace("\\textbf{", "").replace("}", "")
cell = cell.replace("\\textit{", "")
cell = cell.replace("\\emph{", "")
cell = cell.strip()
row.append(cell)
if row:
data.append(row)
if not data:
return None, start_idx
# Crear tabla
try:
num_cols = len(data[0]) if data else 1
table = Table(data)
table.setStyle(TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
('FONTSIZE', (0, 0), (-0, -1), 10),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('VALIGN', (0, 0), (-1, -1), 'TOP'),
]))
return table, i
except Exception as e:
logger.warning(f"Error parsing LaTeX table: {e}")
return None, start_idx
def _parse_markdown_basic(self, markdown: str) -> list[Paragraph]:
"""
Convierte markdown básico a una lista de Paragraphs de reportlab.
@@ -101,6 +187,14 @@ class PDFGenerator:
# Línea horizontal
elif line == "---" or line == "***":
elements.append(Spacer(1, 0.2 * cm))
# Tabla LaTeX
elif "begin{tabular}" in line or "begin{tabular" in line:
latex_table, end_idx = self._parse_latex_table(lines, idx)
if latex_table:
elements.append(Spacer(1, 0.3 * cm))
elements.append(latex_table)
elements.append(Spacer(1, 0.3 * cm))
idx = end_idx - 1 # Saltar las líneas de la tabla
# Lista con guiones
elif line.startswith("- ") or line.startswith("* "):
text = self._escape_xml(line[2:])