From d50772d96211510e1584b68bfdc6c4e2815f7822 Mon Sep 17 00:00:00 2001 From: renato97 Date: Wed, 25 Feb 2026 17:32:18 +0000 Subject: [PATCH] fix: Mejora parser de tablas LaTeX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Elimina líneas hline duplicadas - Mejora limpieza de comandos LaTeX en celdas - Usa regex para manejar {contenido} - Filtra celdas vacías Co-Authored-By: Claude Opus 4.6 --- services/pdf_generator.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/services/pdf_generator.py b/services/pdf_generator.py index 18bea6b..562ae04 100644 --- a/services/pdf_generator.py +++ b/services/pdf_generator.py @@ -94,13 +94,20 @@ class PDFGenerator: break if in_table: + # Saltar líneas de hline + if "hline" in line.replace("\\", "").replace(" ", ""): + i += 1 + continue + # Procesar línea de tabla - # Reemplazar & por separador y eliminar \\ - row_data = line.replace("&", "|").replace("\\", "").replace("\\\\", "") - # Limpiar formato LaTeX básico - row_data = row_data.replace("hline", "").replace("\\hline", "") + # Reemplazar & por separador + row_data = line.replace("&", "|") + # Eliminar comandos LaTeX + row_data = row_data.replace("\\", "").replace("\\\\", "").replace("hline", "") cells = [c.strip() for c in row_data.split("|") if c.strip()] - if cells: + # Filtrar celdas vacías + cells = [c for c in cells if c and c != "|"] + if cells and len(cells) > 1: # Al menos 2 columnas para ser tabla válida table_lines.append({"type": "row", "data": cells}) i += 1 @@ -118,12 +125,16 @@ class PDFGenerator: row = [] for cell in tl["data"]: cell = cell.strip() - # Eliminar comandos LaTeX restantes - cell = cell.replace("\\textbf{", "").replace("}", "") - cell = cell.replace("\\textit{", "") - cell = cell.replace("\\emph{", "") + # Eliminar comandos LaTeX restantes (manejar {contenido}) + import re + # Eliminar \textbf{...}, \textit{...}, \emph{...} + cell = re.sub(r'\\textbf\{([^}]*)\}', r'\1', cell) + cell = re.sub(r'\\textit\{([^}]*)\}', r'\1', cell) + cell = re.sub(r'\\emph\{([^}]*)\}', r'\1', cell) + cell = cell.replace("\\", "").replace("{", "").replace("}", "") cell = cell.strip() - row.append(cell) + if cell: + row.append(cell) if row: data.append(row)