- Implementa ProcessingMonitor singleton para procesamiento secuencial de archivos - Agrega AI summary service con soporte para MiniMax API - Agrega PDF generator para resúmenes - Agrega watchers para monitoreo de carpeta remota - Mejora sistema de notificaciones Telegram - Implementa gestión de VRAM para GPU - Configuración mediante variables de entorno (sin hardcoded secrets) - .env y transcriptions/ agregados a .gitignore Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
271 lines
8.5 KiB
Python
271 lines
8.5 KiB
Python
"""
|
|
Generador de PDFs desde texto y markdown.
|
|
|
|
Utiliza reportlab para la generación de PDFs con soporte UTF-8.
|
|
"""
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Union
|
|
|
|
from reportlab.lib import colors
|
|
from reportlab.lib.pagesizes import A4
|
|
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
|
|
from reportlab.lib.units import cm
|
|
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PDFGenerator:
|
|
"""Generador de PDFs desde texto plano o markdown."""
|
|
|
|
def __init__(self) -> None:
|
|
"""Inicializa el generador de PDFs."""
|
|
self._styles = getSampleStyleSheet()
|
|
self._setup_styles()
|
|
logger.info("PDFGenerator inicializado")
|
|
|
|
def _setup_styles(self) -> None:
|
|
"""Configura los estilos personalizados para el documento."""
|
|
self._styles.add(
|
|
ParagraphStyle(
|
|
name="CustomNormal",
|
|
parent=self._styles["Normal"],
|
|
fontSize=11,
|
|
leading=14,
|
|
spaceAfter=6,
|
|
)
|
|
)
|
|
self._styles.add(
|
|
ParagraphStyle(
|
|
name="CustomHeading1",
|
|
parent=self._styles["Heading1"],
|
|
fontSize=18,
|
|
leading=22,
|
|
spaceAfter=12,
|
|
)
|
|
)
|
|
self._styles.add(
|
|
ParagraphStyle(
|
|
name="CustomHeading2",
|
|
parent=self._styles["Heading2"],
|
|
fontSize=14,
|
|
leading=18,
|
|
spaceAfter=10,
|
|
)
|
|
)
|
|
|
|
def _escape_xml(self, text: str) -> str:
|
|
"""Escapa caracteres especiales para XML/HTML."""
|
|
return (
|
|
text.replace("&", "&")
|
|
.replace("<", "<")
|
|
.replace(">", ">")
|
|
.replace("\n", "<br/>")
|
|
)
|
|
|
|
def _parse_markdown_basic(self, markdown: str) -> list[Paragraph]:
|
|
"""
|
|
Convierte markdown básico a una lista de Paragraphs de reportlab.
|
|
|
|
Maneja: encabezados, negritas, italicas, lineas horizontales,
|
|
y saltos de linea.
|
|
"""
|
|
elements: list[Paragraph] = []
|
|
lines = markdown.split("\n")
|
|
in_list = False
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
|
|
if not line:
|
|
elements.append(Spacer(1, 0.3 * cm))
|
|
continue
|
|
|
|
# Encabezados
|
|
if line.startswith("### "):
|
|
text = self._escape_xml(line[4:])
|
|
elements.append(
|
|
Paragraph(f"<b>{text}</b>", self._styles["CustomHeading2"])
|
|
)
|
|
elif line.startswith("## "):
|
|
text = self._escape_xml(line[3:])
|
|
elements.append(
|
|
Paragraph(f"<b>{text}</b>", self._styles["CustomHeading1"])
|
|
)
|
|
elif line.startswith("# "):
|
|
text = self._escape_xml(line[2:])
|
|
elements.append(
|
|
Paragraph(f"<b><i>{text}</i></b>", self._styles["CustomHeading1"])
|
|
)
|
|
# Línea horizontal
|
|
elif line == "---" or line == "***":
|
|
elements.append(Spacer(1, 0.2 * cm))
|
|
# Lista con guiones
|
|
elif line.startswith("- ") or line.startswith("* "):
|
|
text = self._escape_xml(line[2:])
|
|
text = f"• {self._format_inline_markdown(text)}"
|
|
elements.append(Paragraph(text, self._styles["CustomNormal"]))
|
|
# Lista numerada
|
|
elif line[0].isdigit() and ". " in line:
|
|
idx = line.index(". ")
|
|
text = self._escape_xml(line[idx + 2 :])
|
|
text = self._format_inline_markdown(text)
|
|
elements.append(Paragraph(text, self._styles["CustomNormal"]))
|
|
# Párrafo normal
|
|
else:
|
|
text = self._escape_xml(line)
|
|
text = self._format_inline_markdown(text)
|
|
elements.append(Paragraph(text, self._styles["CustomNormal"]))
|
|
|
|
return elements
|
|
|
|
def _format_inline_markdown(self, text: str) -> str:
|
|
"""Convierte formato inline de markdown a HTML."""
|
|
# Negritas: **texto** -> <b>texto</b>
|
|
while "**" in text:
|
|
start = text.find("**")
|
|
end = text.find("**", start + 2)
|
|
if end == -1:
|
|
break
|
|
text = (
|
|
text[:start]
|
|
+ f"<b>{text[start+2:end]}</b>"
|
|
+ text[end + 2 :]
|
|
)
|
|
# Italicas: *texto* -> <i>texto</i>
|
|
while "*" in text:
|
|
start = text.find("*")
|
|
end = text.find("*", start + 1)
|
|
if end == -1:
|
|
break
|
|
text = (
|
|
text[:start]
|
|
+ f"<i>{text[start+1:end]}</i>"
|
|
+ text[end + 1 :]
|
|
)
|
|
return text
|
|
|
|
def markdown_to_pdf(self, markdown_text: str, output_path: Path) -> Path:
|
|
"""
|
|
Convierte markdown a PDF.
|
|
|
|
Args:
|
|
markdown_text: Contenido en formato markdown.
|
|
output_path: Ruta donde se guardará el PDF.
|
|
|
|
Returns:
|
|
Path: Ruta del archivo PDF generado.
|
|
|
|
Raises:
|
|
ValueError: Si el contenido está vacío.
|
|
IOError: Si hay error al escribir el archivo.
|
|
"""
|
|
if not markdown_text or not markdown_text.strip():
|
|
logger.warning("markdown_to_pdf llamado con contenido vacío")
|
|
raise ValueError("El contenido markdown no puede estar vacío")
|
|
|
|
logger.info(
|
|
"Convirtiendo markdown a PDF",
|
|
extra={
|
|
"content_length": len(markdown_text),
|
|
"output_path": str(output_path),
|
|
},
|
|
)
|
|
|
|
try:
|
|
# Crear documento
|
|
doc = SimpleDocTemplate(
|
|
str(output_path),
|
|
pagesize=A4,
|
|
leftMargin=2 * cm,
|
|
rightMargin=2 * cm,
|
|
topMargin=2 * cm,
|
|
bottomMargin=2 * cm,
|
|
)
|
|
|
|
# Convertir markdown a elementos
|
|
elements = self._parse_markdown_basic(markdown_text)
|
|
|
|
# Generar PDF
|
|
doc.build(elements)
|
|
|
|
logger.info(
|
|
"PDF generado exitosamente",
|
|
extra={"output_path": str(output_path), "pages": "unknown"},
|
|
)
|
|
|
|
return output_path
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error al generar PDF desde markdown: {e}")
|
|
raise IOError(f"Error al generar PDF: {e}") from e
|
|
|
|
def text_to_pdf(self, text: str, output_path: Path) -> Path:
|
|
"""
|
|
Convierte texto plano a PDF.
|
|
|
|
Args:
|
|
text: Contenido de texto plano.
|
|
output_path: Ruta donde se guardará el PDF.
|
|
|
|
Returns:
|
|
Path: Ruta del archivo PDF generado.
|
|
|
|
Raises:
|
|
ValueError: Si el contenido está vacío.
|
|
IOError: Si hay error al escribir el archivo.
|
|
"""
|
|
if not text or not text.strip():
|
|
logger.warning("text_to_pdf llamado con contenido vacío")
|
|
raise ValueError("El contenido de texto no puede estar vacío")
|
|
|
|
logger.info(
|
|
"Convirtiendo texto a PDF",
|
|
extra={
|
|
"content_length": len(text),
|
|
"output_path": str(output_path),
|
|
},
|
|
)
|
|
|
|
try:
|
|
# Crear documento
|
|
doc = SimpleDocTemplate(
|
|
str(output_path),
|
|
pagesize=A4,
|
|
leftMargin=2 * cm,
|
|
rightMargin=2 * cm,
|
|
topMargin=2 * cm,
|
|
bottomMargin=2 * cm,
|
|
)
|
|
|
|
# Convertir texto a párrafos (uno por línea)
|
|
elements: list[Union[Paragraph, Spacer]] = []
|
|
lines = text.split("\n")
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
elements.append(Spacer(1, 0.3 * cm))
|
|
else:
|
|
escaped = self._escape_xml(line)
|
|
elements.append(Paragraph(escaped, self._styles["CustomNormal"]))
|
|
|
|
# Generar PDF
|
|
doc.build(elements)
|
|
|
|
logger.info(
|
|
"PDF generado exitosamente",
|
|
extra={"output_path": str(output_path), "pages": "unknown"},
|
|
)
|
|
|
|
return output_path
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error al generar PDF desde texto: {e}")
|
|
raise IOError(f"Error al generar PDF: {e}") from e
|
|
|
|
|
|
# Instancia global del generador
|
|
pdf_generator = PDFGenerator()
|