""" Generador de PDFs desde texto y markdown. Utiliza reportlab para la generación de PDFs con soporte UTF-8. """ import logging from pathlib import Path from typing import Optional, Union from reportlab.lib import colors from reportlab.lib.pagesizes import A4 from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet from reportlab.lib.units import cm from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle logger = logging.getLogger(__name__) class PDFGenerator: """Generador de PDFs desde texto plano o markdown.""" def __init__(self) -> None: """Inicializa el generador de PDFs.""" self._styles = getSampleStyleSheet() self._setup_styles() logger.info("PDFGenerator inicializado") def _setup_styles(self) -> None: """Configura los estilos personalizados para el documento.""" self._styles.add( ParagraphStyle( name="CustomNormal", parent=self._styles["Normal"], fontSize=11, leading=14, spaceAfter=6, ) ) self._styles.add( ParagraphStyle( name="CustomHeading1", parent=self._styles["Heading1"], fontSize=18, leading=22, spaceAfter=12, ) ) self._styles.add( ParagraphStyle( name="CustomHeading2", parent=self._styles["Heading2"], fontSize=14, leading=18, spaceAfter=10, ) ) def _escape_xml(self, text: str) -> str: """Escapa caracteres especiales para XML/HTML.""" return ( text.replace("&", "&") .replace("<", "<") .replace(">", ">") .replace("\n", "
") ) def _parse_latex_table(self, lines: list[str], start_idx: int) -> tuple[Optional[Table], int]: """ Parsea una tabla LaTeX y la convierte a reportlab Table. Returns: (Table, end_index) - La tabla y el índice donde termina """ # Buscar begin/end tabular table_lines = [] i = start_idx in_table = False while i < len(lines): line = lines[i].strip() if "\\begin{tabular}" in line or "begin{tabular}" in line: in_table = True # Extraer especificaciones de columnas col_spec = "l" if "{" in line: col_spec = line.split("{")[1].split("}")[0] if "}" in line else "l" table_lines.append({"type": "spec", "data": col_spec}) i += 1 continue if "\\end{tabular}" in line or "end{tabular}" in line: in_table = False break if in_table: # Saltar líneas de hline if "hline" in line.replace("\\", "").replace(" ", ""): i += 1 continue # Procesar línea de tabla # Reemplazar & por separador row_data = line.replace("&", "|") # Eliminar comandos LaTeX row_data = row_data.replace("\\", "").replace("\\\\", "").replace("hline", "") cells = [c.strip() for c in row_data.split("|") if c.strip()] # Filtrar celdas vacías cells = [c for c in cells if c and c != "|"] if cells and len(cells) > 1: # Al menos 2 columnas para ser tabla válida table_lines.append({"type": "row", "data": cells}) i += 1 if not table_lines: return None, start_idx # Convertir a Table de reportlab data = [] col_widths = None for tl in table_lines: if tl["type"] == "row": # Limpiar celdas de LaTeX row = [] for cell in tl["data"]: cell = cell.strip() # Eliminar comandos LaTeX restantes (manejar {contenido}) import re # Eliminar \textbf{...}, \textit{...}, \emph{...} cell = re.sub(r'\\textbf\{([^}]*)\}', r'\1', cell) cell = re.sub(r'\\textit\{([^}]*)\}', r'\1', cell) cell = re.sub(r'\\emph\{([^}]*)\}', r'\1', cell) cell = cell.replace("\\", "").replace("{", "").replace("}", "") cell = cell.strip() if cell: row.append(cell) if row: data.append(row) if not data: return None, start_idx # Crear tabla try: num_cols = len(data[0]) if data else 1 table = Table(data) table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), colors.grey), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke), ('ALIGN', (0, 0), (-1, -1), 'LEFT'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'), ('FONTSIZE', (0, 0), (-0, -1), 10), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), colors.beige), ('GRID', (0, 0), (-1, -1), 1, colors.black), ('VALIGN', (0, 0), (-1, -1), 'TOP'), ])) return table, i except Exception as e: logger.warning(f"Error parsing LaTeX table: {e}") return None, start_idx def _parse_markdown_basic(self, markdown: str) -> list[Paragraph]: """ Convierte markdown básico a una lista de Paragraphs de reportlab. Maneja: encabezados, negritas, italicas, lineas horizontales, y saltos de linea. """ elements: list[Paragraph] = [] lines = markdown.split("\n") in_list = False for line in lines: line = line.strip() if not line: elements.append(Spacer(1, 0.3 * cm)) continue # Encabezados if line.startswith("### "): text = self._escape_xml(line[4:]) elements.append( Paragraph(f"{text}", self._styles["CustomHeading2"]) ) elif line.startswith("## "): text = self._escape_xml(line[3:]) elements.append( Paragraph(f"{text}", self._styles["CustomHeading1"]) ) elif line.startswith("# "): text = self._escape_xml(line[2:]) elements.append( Paragraph(f"{text}", self._styles["CustomHeading1"]) ) # Línea horizontal elif line == "---" or line == "***": elements.append(Spacer(1, 0.2 * cm)) # Tabla LaTeX elif "begin{tabular}" in line or "begin{tabular" in line: latex_table, end_idx = self._parse_latex_table(lines, idx) if latex_table: elements.append(Spacer(1, 0.3 * cm)) elements.append(latex_table) elements.append(Spacer(1, 0.3 * cm)) idx = end_idx - 1 # Saltar las líneas de la tabla # Lista con guiones elif line.startswith("- ") or line.startswith("* "): text = self._escape_xml(line[2:]) text = f"• {self._format_inline_markdown(text)}" elements.append(Paragraph(text, self._styles["CustomNormal"])) # Lista numerada elif line[0].isdigit() and ". " in line: idx = line.index(". ") text = self._escape_xml(line[idx + 2 :]) text = self._format_inline_markdown(text) elements.append(Paragraph(text, self._styles["CustomNormal"])) # Párrafo normal else: text = self._escape_xml(line) text = self._format_inline_markdown(text) elements.append(Paragraph(text, self._styles["CustomNormal"])) return elements def _format_inline_markdown(self, text: str) -> str: """Convierte formato inline de markdown a HTML.""" # Negritas: **texto** -> texto while "**" in text: start = text.find("**") end = text.find("**", start + 2) if end == -1: break text = ( text[:start] + f"{text[start+2:end]}" + text[end + 2 :] ) # Italicas: *texto* -> texto while "*" in text: start = text.find("*") end = text.find("*", start + 1) if end == -1: break text = ( text[:start] + f"{text[start+1:end]}" + text[end + 1 :] ) return text def markdown_to_pdf(self, markdown_text: str, output_path: Path) -> Path: """ Convierte markdown a PDF. Args: markdown_text: Contenido en formato markdown. output_path: Ruta donde se guardará el PDF. Returns: Path: Ruta del archivo PDF generado. Raises: ValueError: Si el contenido está vacío. IOError: Si hay error al escribir el archivo. """ if not markdown_text or not markdown_text.strip(): logger.warning("markdown_to_pdf llamado con contenido vacío") raise ValueError("El contenido markdown no puede estar vacío") logger.info( "Convirtiendo markdown a PDF", extra={ "content_length": len(markdown_text), "output_path": str(output_path), }, ) try: # Crear documento doc = SimpleDocTemplate( str(output_path), pagesize=A4, leftMargin=2 * cm, rightMargin=2 * cm, topMargin=2 * cm, bottomMargin=2 * cm, ) # Convertir markdown a elementos elements = self._parse_markdown_basic(markdown_text) # Generar PDF doc.build(elements) logger.info( "PDF generado exitosamente", extra={"output_path": str(output_path), "pages": "unknown"}, ) return output_path except Exception as e: logger.error(f"Error al generar PDF desde markdown: {e}") raise IOError(f"Error al generar PDF: {e}") from e def text_to_pdf(self, text: str, output_path: Path) -> Path: """ Convierte texto plano a PDF. Args: text: Contenido de texto plano. output_path: Ruta donde se guardará el PDF. Returns: Path: Ruta del archivo PDF generado. Raises: ValueError: Si el contenido está vacío. IOError: Si hay error al escribir el archivo. """ if not text or not text.strip(): logger.warning("text_to_pdf llamado con contenido vacío") raise ValueError("El contenido de texto no puede estar vacío") logger.info( "Convirtiendo texto a PDF", extra={ "content_length": len(text), "output_path": str(output_path), }, ) try: # Crear documento doc = SimpleDocTemplate( str(output_path), pagesize=A4, leftMargin=2 * cm, rightMargin=2 * cm, topMargin=2 * cm, bottomMargin=2 * cm, ) # Convertir texto a párrafos (uno por línea) elements: list[Union[Paragraph, Spacer]] = [] lines = text.split("\n") for line in lines: line = line.strip() if not line: elements.append(Spacer(1, 0.3 * cm)) else: escaped = self._escape_xml(line) elements.append(Paragraph(escaped, self._styles["CustomNormal"])) # Generar PDF doc.build(elements) logger.info( "PDF generado exitosamente", extra={"output_path": str(output_path), "pages": "unknown"}, ) return output_path except Exception as e: logger.error(f"Error al generar PDF desde texto: {e}") raise IOError(f"Error al generar PDF: {e}") from e # Instancia global del generador pdf_generator = PDFGenerator()