fix: Mejora parser de tablas LaTeX
- Elimina líneas hline duplicadas
- Mejora limpieza de comandos LaTeX en celdas
- Usa regex para manejar {contenido}
- Filtra celdas vacías
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -94,13 +94,20 @@ class PDFGenerator:
|
|||||||
break
|
break
|
||||||
|
|
||||||
if in_table:
|
if in_table:
|
||||||
|
# Saltar líneas de hline
|
||||||
|
if "hline" in line.replace("\\", "").replace(" ", ""):
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
# Procesar línea de tabla
|
# Procesar línea de tabla
|
||||||
# Reemplazar & por separador y eliminar \\
|
# Reemplazar & por separador
|
||||||
row_data = line.replace("&", "|").replace("\\", "").replace("\\\\", "")
|
row_data = line.replace("&", "|")
|
||||||
# Limpiar formato LaTeX básico
|
# Eliminar comandos LaTeX
|
||||||
row_data = row_data.replace("hline", "").replace("\\hline", "")
|
row_data = row_data.replace("\\", "").replace("\\\\", "").replace("hline", "")
|
||||||
cells = [c.strip() for c in row_data.split("|") if c.strip()]
|
cells = [c.strip() for c in row_data.split("|") if c.strip()]
|
||||||
if cells:
|
# Filtrar celdas vacías
|
||||||
|
cells = [c for c in cells if c and c != "|"]
|
||||||
|
if cells and len(cells) > 1: # Al menos 2 columnas para ser tabla válida
|
||||||
table_lines.append({"type": "row", "data": cells})
|
table_lines.append({"type": "row", "data": cells})
|
||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
@@ -118,12 +125,16 @@ class PDFGenerator:
|
|||||||
row = []
|
row = []
|
||||||
for cell in tl["data"]:
|
for cell in tl["data"]:
|
||||||
cell = cell.strip()
|
cell = cell.strip()
|
||||||
# Eliminar comandos LaTeX restantes
|
# Eliminar comandos LaTeX restantes (manejar {contenido})
|
||||||
cell = cell.replace("\\textbf{", "").replace("}", "")
|
import re
|
||||||
cell = cell.replace("\\textit{", "")
|
# Eliminar \textbf{...}, \textit{...}, \emph{...}
|
||||||
cell = cell.replace("\\emph{", "")
|
cell = re.sub(r'\\textbf\{([^}]*)\}', r'\1', cell)
|
||||||
|
cell = re.sub(r'\\textit\{([^}]*)\}', r'\1', cell)
|
||||||
|
cell = re.sub(r'\\emph\{([^}]*)\}', r'\1', cell)
|
||||||
|
cell = cell.replace("\\", "").replace("{", "").replace("}", "")
|
||||||
cell = cell.strip()
|
cell = cell.strip()
|
||||||
row.append(cell)
|
if cell:
|
||||||
|
row.append(cell)
|
||||||
if row:
|
if row:
|
||||||
data.append(row)
|
data.append(row)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user