feat: Sistema LaTeX mejorado con sanitización automática y corrección de TikZ

Cambios principales: ## Nuevos archivos - services/ai/parallel_provider.py: Ejecución paralela de múltiples proveedores AI - services/ai/prompt_manager.py: Gestión centralizada de prompts (resumen.md como fuente) - latex/resumen.md: Template del prompt para resúmenes académicos LaTeX ## Mejoras en generación LaTeX (document/generators.py) - Nueva función _sanitize_latex(): Corrige automáticamente errores comunes de AI - Agrega align=center a nodos TikZ con saltos de línea (\\) - Previene errores 'Not allowed in LR mode' antes de compilar - Soporte para procesamiento paralelo de proveedores AI - Conversión DOCX en paralelo con generación PDF - Uploads a Notion en background (non-blocking) - Callbacks de notificación para progreso en Telegram ## Mejoras en proveedores AI - claude_provider.py: fix_latex() con instrucciones específicas para errores TikZ - gemini_provider.py: fix_latex() mejorado + rate limiting + circuit breaker - provider_factory.py: Soporte para parallel provider ## Otros cambios - config/settings.py: Nuevas configuraciones para Gemini models - services/webdav_service.py: Mejoras en manejo de conexión - .gitignore: Ignora archivos LaTeX auxiliares (.aux, .toc, .out, .pdf) ## Archivos de ejemplo - latex/imperio_romano.tex, latex/clase_revolucion_rusa_crisis_30.tex - resumen_curiosidades.tex (corregido y compilado exitosamente)
2026-02-07 20:50:27 +00:00
parent 915f827305
commit dcf887c510
15 changed files with 4309 additions and 409 deletions
--- a/document/generators.py
+++ b/document/generators.py
@@ -1,352 +1,669 @@
 """
-Document generation utilities
+Document generation utilities - LaTeX Academic Summary System
+
+This module generates comprehensive academic summaries in LaTeX format
+following the specifications in latex/resumen.md (the SINGLE SOURCE OF TRUTH).
+
+Parallel Processing: Uses multiple agents for accelerated summary generation:
+- AI Provider Racing: Multiple AI providers generate in parallel
+- Parallel Format Conversion: PDF + DOCX generated simultaneously
+- Background Notion Uploads: Non-blocking uploads to Notion
 """

 import logging
 import subprocess
+import shutil
 import re
+import threading
 from pathlib import Path
-from typing import Dict, Any, List, Tuple
+from typing import Dict, Any, Optional, Tuple, Callable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
 from core import FileProcessingError
 from config import settings
 from services.ai import ai_provider_factory
+from services.ai.prompt_manager import prompt_manager
+
+
+def _sanitize_latex(latex_code: str) -> str:
+    """
+    Pre-process LaTeX code to fix common errors before compilation.
+
+    This function applies automated fixes for known issues that AI models
+    frequently generate, reducing the need for fix_latex() iterations.
+
+    Currently handles:
+    - TikZ nodes with line breaks (\\\\) missing align=center
+    - Unbalanced environments (best effort)
+    """
+    if not latex_code:
+        return latex_code
+
+    result = latex_code
+
+    # Fix TikZ nodes with \\\\ but missing align=center
+    # Pattern: \node[...] (name) {Text\\More};
+    # This is a common AI error - TikZ requires align=center for \\\\ in nodes
+
+    # We need to find \node commands and add align=center if they have \\\\ in content
+    # but don't already have align= in their options
+
+    def fix_tikz_node(match):
+        """Fix a single TikZ node by adding align=center if needed"""
+        full_match = match.group(0)
+        options = match.group(1)  # Content inside [...]
+        rest = match.group(2)  # Everything after options
+
+        # Check if this node has \\\\ in its content (text between { })
+        # and doesn't already have align=
+        if "\\\\" in rest and "align=" not in options:
+            # Add align=center to the options
+            if options.strip():
+                new_options = options.rstrip() + ", align=center"
+            else:
+                new_options = "align=center"
+            return f"\\node[{new_options}]{rest}"
+
+        return full_match
+
+    # Match \node[options] followed by rest of the line
+    # Capture options and the rest separately
+    tikz_node_pattern = r"\\node\[([^\]]*)\]([^;]*;)"
+    result = re.sub(tikz_node_pattern, fix_tikz_node, result)
+
+    return result


 class DocumentGenerator:
-    """Generate documents from processed text"""
+    """
+    Generates academic summary documents in LaTeX format.

-    def __init__(self):
+    The system follows these principles:
+    1. latex/resumen.md is the SINGLE SOURCE OF TRUTH for prompt structure
+    2. Generates full LaTeX documents (not Markdown)
+    3. Compiles to PDF using pdflatex
+    4. Supports iterative fixing with AI if compilation fails
+    5. Supports progress notifications via callback
+    """
+
+    def __init__(self, notification_callback: Optional[Callable[[str], None]] = None):
+        """
+        Initialize DocumentGenerator.
+
+        Args:
+            notification_callback: Optional callback function for progress notifications
+                                  Takes a single string argument (message to send)
+        """
        self.logger = logging.getLogger(__name__)
        self.ai_provider = ai_provider_factory.get_best_provider()
+        self.notification_callback = notification_callback
+        self.use_parallel = ai_provider_factory.use_parallel()
+        self.executor = ThreadPoolExecutor(max_workers=4)
+
+        # Ensure output directories exist
+        settings.LOCAL_DOWNLOADS_PATH.mkdir(parents=True, exist_ok=True)
+        settings.LOCAL_DOCX.mkdir(parents=True, exist_ok=True)
+
+        if self.use_parallel:
+            self.logger.info(
+                "🚀 Parallel processing enabled: Multiple AI providers available"
+            )
+
+    def _notify(self, message: str) -> None:
+        """Send notification if callback is configured"""
+        if self.notification_callback:
+            try:
+                self.notification_callback(message)
+            except Exception as e:
+                self.logger.warning(f"Failed to send notification: {e}")
+
+    def _generate_with_parallel_provider(self, prompt: str, **kwargs) -> str:
+        """
+        Generate content using multiple AI providers in parallel.
+
+        Races multiple providers and returns the first successful response,
+        or the best quality response if using consensus strategy.
+        """
+        try:
+            parallel_provider = ai_provider_factory.get_parallel_provider(max_workers=4)
+            self.logger.info("🚀 Using parallel AI provider (race mode)")
+
+            result = parallel_provider.generate_parallel(
+                prompt=prompt,
+                strategy="race",  # Use first successful response
+                timeout_ms=300000,  # 5 minutes
+                **kwargs,
+            )
+
+            self.logger.info(
+                f"✅ Parallel generation complete: {result.selected_provider} selected, "
+                f"{result.total_duration_ms}ms"
+            )
+
+            return result.content
+
+        except Exception as e:
+            self.logger.warning(
+                f"⚠️ Parallel generation failed: {e}, falling back to single provider"
+            )
+            return self.ai_provider.generate_text(prompt, **kwargs)
+
+    def _convert_formats_parallel(
+        self, tex_path: Path, pdf_path: Optional[Path], base_name: str
+    ) -> Optional[Path]:
+        """
+        Convert to multiple formats in parallel (DOCX, optionally PDF).
+
+        If PDF is already compiled, only DOCX is generated.
+        Otherwise, both PDF and DOCX are generated in parallel.
+        """
+        futures = {}
+
+        # Generate DOCX
+        if shutil.which("pandoc"):
+            futures["docx"] = self.executor.submit(
+                self._convert_tex_to_docx, tex_path, base_name
+            )
+
+        # Wait for DOCX completion
+        docx_path = None
+        if "docx" in futures:
+            try:
+                docx_path = futures["docx"].result(timeout=60)
+                if docx_path:
+                    self.logger.info(f"✅ Parallel DOCX generated: {docx_path}")
+            except Exception as e:
+                self.logger.warning(f"⚠️ DOCX generation failed: {e}")
+
+        return docx_path
+
+    def _upload_to_notion_background(
+        self,
+        base_name: str,
+        summary: str,
+        pdf_path: Optional[Path],
+        metadata: Dict[str, Any],
+    ):
+        """Upload to Notion in background thread (non-blocking)."""
+
+        def upload_worker():
+            try:
+                from services.notion_service import notion_service
+
+                title = base_name.replace("_", " ").title()
+                notion_metadata = {
+                    "file_type": "Audio",
+                    "pdf_path": pdf_path or Path(""),
+                    "add_status": False,
+                    "use_as_page": False,
+                }
+
+                page_id = notion_service.create_page_with_summary(
+                    title=title, summary=summary, metadata=notion_metadata
+                )
+
+                if page_id:
+                    metadata["notion_uploaded"] = True
+                    metadata["notion_page_id"] = page_id
+                    self.logger.info(
+                        f"✅ Background upload to Notion complete: {title}"
+                    )
+                else:
+                    self.logger.warning(f"⚠️ Background Notion upload failed: {title}")
+
+            except Exception as e:
+                self.logger.warning(f"❌ Background Notion upload error: {e}")
+
+        # Start background thread
+        thread = threading.Thread(target=upload_worker, daemon=True)
+        thread.start()
+        self.logger.info("🔄 Notion upload started in background")

    def generate_summary(
-        self, text: str, base_name: str
+        self,
+        text: str,
+        base_name: str,
+        materia: str = "Economía",
+        bibliographic_text: Optional[str] = None,
+        class_number: Optional[int] = None,
    ) -> Tuple[bool, str, Dict[str, Any]]:
-        """Generate unified summary"""
-        self.logger.info(f"Generating summary for {base_name}")
+        """
+        Generate comprehensive academic summary in LaTeX format.
+
+        Args:
+            text: The class transcription text
+            base_name: Base filename for output files
+            materia: Subject name (default: "Economía")
+            bibliographic_text: Optional supporting material from books/notes
+            class_number: Optional class number for header
+
+        Returns:
+            Tuple of (success, summary_text, metadata)
+        """
+        self.logger.info(
+            f"🚀 Starting LaTeX academic summary generation for: {base_name}"
+        )
+
+        metadata = {
+            "filename": base_name,
+            "tex_path": "",
+            "pdf_path": "",
+            "markdown_path": "",
+            "docx_path": "",
+            "summary_snippet": "",
+            "notion_uploaded": False,
+            "notion_page_id": None,
+            "materia": materia,
+        }

        try:
-            # Step 1: Generate Bullet Points (Chunking handled by provider or single prompt for now)
-            # Note: We use the main provider (Claude/Zai) for content generation
-            self.logger.info("Generating bullet points...")
-            bullet_prompt = f"""Analiza el siguiente texto y extrae entre 5 y 8 bullet points clave en español.
+            # === STEP 1: Generate LaTeX content using AI ===
+            self.logger.info(
+                "🧠 Sending request to AI Provider for LaTeX generation..."
+            )
+            self._notify("📝 Preparando prompt de resumen académico...")

-REGLAS ESTRICTAS:
-1. Devuelve ÚNICAMENTE bullet points, cada línea iniciando con "- "
-2. Cada bullet debe ser conciso (12-20 palabras) y resaltar datos, fechas, conceptos o conclusiones importantes
-3. NO agregues introducciones, conclusiones ni texto explicativo
-4. Concéntrate en los puntos más importantes del texto
-5. Incluye fechas, datos específicos y nombres relevantes si los hay
+            prompt = prompt_manager.get_latex_summary_prompt(
+                transcription=text,
+                materia=materia,
+                bibliographic_text=bibliographic_text,
+                class_number=class_number,
+            )

-Texto:
-{text[:15000]}"""  # Truncate to avoid context limits if necessary, though providers handle it differently
+            self._notify(
+                "🧠 Enviando solicitud a la IA (esto puede tardar unos minutos)..."
+            )

-            try:
-                bullet_points = self.ai_provider.generate_text(bullet_prompt)
-                self.logger.info(f"Bullet points generated: {len(bullet_points)}")
-            except Exception as e:
-                self.logger.warning(f"Bullet point generation failed: {e}")
-                bullet_points = "- Puntos clave no disponibles por error en IA"
+            # Use parallel provider if multiple AI providers are available
+            if self.use_parallel:
+                raw_response = self._generate_with_parallel_provider(prompt)
+            else:
+                raw_response = self.ai_provider.generate_text(prompt)

-            # Step 2: Generate Unified Summary
-            self.logger.info("Generating unified summary...")
-            summary_prompt = f"""Eres un profesor universitario experto en historia y economía. Redacta un resumen académico integrado en español usando el texto y los bullet points extraídos.
+            if not raw_response:
+                raise FileProcessingError("AI returned empty response")

-REQUISITOS ESTRICTOS DE CONTENIDO:
- Extensión entre 500-700 palabras
- Usa encabezados Markdown con jerarquía clara (##, ###)
- Desarrolla los puntos clave con profundidad y contexto histórico/económico
- Mantén un tono académico y analítico
- Incluye conclusiones significativas
- NO agregues texto fuera del resumen
- Devuelve únicamente el resumen en formato Markdown
+            self.logger.info(f"📝 AI response received: {len(raw_response)} characters")
+            self._notify(f"✅ Respuesta recibida ({len(raw_response)} caracteres)")

-REQUISITOS ESTRICTOS DE FORMATO MATEMÁTICO (LaTeX):
- Si el texto incluye fórmulas matemáticas o económicas, DEBES usar formato LaTeX.
- Usa bloques $$ ... $$ para ecuaciones centradas importantes.
- Usa $ ... $ para ecuaciones en línea.
- Ejemplo: La fórmula del interés compuesto es $A = P(1 + r/n)^{{nt}}$.
- NO uses bloques de código (```latex) para las fórmulas, úsalas directamente en el texto para que Pandoc las renderice.
+            # === STEP 2: Extract clean LaTeX from AI response ===
+            self._notify("🔍 Extrayendo código LaTeX...")

-Contenido a resumir:
-{text[:20000]}
+            latex_content = prompt_manager.extract_latex_from_response(raw_response)

-Puntos clave a incluir obligatoriamente:
-{bullet_points}"""
+            if not latex_content:
+                self.logger.warning(
+                    "⚠️ No valid LaTeX found in response, treating as Markdown"
+                )
+                self._notify("⚠️ No se detectó LaTeX válido, usando modo compatible...")
+                # Fallback to Markdown processing
+                return self._fallback_to_markdown(raw_response, base_name, metadata)

-            try:
-                raw_summary = self.ai_provider.generate_text(summary_prompt)
-            except Exception as e:
-                self.logger.error(f"Raw summary generation failed: {e}")
-                raise e
+            self.logger.info("✨ Valid LaTeX content detected")
+            self._notify(f"✨ LaTeX detectado: {len(latex_content)} caracteres")

-            # Step 3: Format with IA (using main provider instead of Gemini)
-            self.logger.info("Formatting summary with IA...")
-            format_prompt = f"""Revisa y mejora el siguiente resumen en Markdown para que sea perfectamente legible y compatible con Pandoc:
+            # === STEP 3: Compilation Loop with Self-Correction ===
+            max_retries = 3
+            current_latex = latex_content

-{raw_summary}
+            for attempt in range(max_retries + 1):
+                # Sanitize LaTeX before saving (fix common AI errors like TikZ nodes)
+                current_latex = _sanitize_latex(current_latex)

-Instrucciones:
- Corrige cualquier error de formato Markdown
- Asegúrate de que los encabezados estén bien espaciados
- Verifica que las viñetas usen "- " correctamente
- Mantén exactamente el contenido existente
- EVITA el uso excesivo de negritas (asteriscos), úsalas solo para conceptos clave
- VERIFICA que todas las fórmulas matemáticas estén correctamente encerradas en $...$ (inline) o $$...$$ (display)
- NO alteres la sintaxis LaTeX dentro de los delimitadores $...$ o $$...$$
- Devuelve únicamente el resumen formateado sin texto adicional"""
+                # Save current .tex file
+                self._notify(
+                    f"📄 Guardando archivo .tex (intento {attempt + 1}/{max_retries + 1})..."
+                )

-            try:
-                # Use the main provider (Claude/GLM) for formatting too
-                if self.ai_provider.is_available():
-                    summary = self.ai_provider.generate_text(format_prompt)
-                else:
-                    self.logger.warning(
-                        "AI provider not available for formatting, using raw summary"
+                tex_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.tex"
+                tex_path.write_text(current_latex, encoding="utf-8")
+                metadata["tex_path"] = str(tex_path)
+
+                # Try to compile
+                self._notify("⚙️ Primera pasada de compilación LaTeX...")
+
+                pdf_path = self._compile_latex(
+                    tex_path, output_dir=settings.LOCAL_DOWNLOADS_PATH
+                )
+
+                if pdf_path:
+                    self.logger.info(
+                        f"✅ Compilation success on attempt {attempt + 1}!"
                    )
-                    summary = raw_summary
-            except Exception as e:
-                self.logger.warning(f"Formatting failed ({e}), using raw summary")
-                summary = raw_summary
+                    self._notify("✅ PDF generado exitosamente!")
+                    metadata["pdf_path"] = str(pdf_path)

-            # Generate filename
-            filename = self._generate_filename(text, summary)
+                    # Generate DOCX in parallel
+                    self._notify("📄 Generando archivo DOCX en paralelo...")
+                    docx_path = self._convert_formats_parallel(
+                        tex_path, pdf_path, base_name
+                    )
+                    if docx_path:
+                        self._notify("✅ DOCX generado exitosamente!")
+                        metadata["docx_path"] = str(docx_path)

-            # Create document
-            markdown_path = self._create_markdown(summary, base_name)
+                    # Create a text summary for Notion/preview
+                    text_summary = self._create_text_summary(current_latex)
+                    metadata["summary_snippet"] = text_summary[:500] + "..."

-            docx_path = None
-            try:
-                docx_path = self._create_docx(markdown_path, base_name)
-            except Exception as e:
-                self.logger.error(f"Failed to create DOCX (non-critical): {e}")
+                    # Upload to Notion in background if configured
+                    if settings.has_notion_config:
+                        self._notify("📤 Iniciando carga a Notion en segundo plano...")
+                        self._upload_to_notion_background(
+                            base_name=base_name,
+                            summary=text_summary,
+                            pdf_path=pdf_path,
+                            metadata=metadata,
+                        )

-            pdf_path = None
-            try:
-                # Sanitize LaTeX before PDF generation
-                self._sanitize_latex(markdown_path)
-                pdf_path = self._create_pdf(markdown_path, base_name)
-            except Exception as e:
-                self.logger.error(f"Failed to create PDF (non-critical): {e}")
+                    self._notify("🎉 ¡Resumen completado con éxito!")
+                    return True, text_summary, metadata

-            # Upload to Notion if configured
+                # Compilation failed - ask AI to fix
+                if attempt < max_retries:
+                    self.logger.warning(
+                        f"⚠️ Compilation failed (Attempt {attempt + 1}/{max_retries + 1}). "
+                        f"Requesting AI fix..."
+                    )
+                    self._notify(
+                        f"⚠️ Error de compilación ({attempt + 1}/{max_retries + 1}), solicitando corrección a IA..."
+                    )
+
+                    # Get error log
+                    log_file = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.log"
+                    error_log = "Log file not found"
+                    if log_file.exists():
+                        error_log = log_file.read_text(
+                            encoding="utf-8", errors="ignore"
+                        )[-2000:]
+
+                    # Ask AI to fix
+                    try:
+                        self._notify("🔧 La IA está corrigiendo el código LaTeX...")
+                        if hasattr(self.ai_provider, "fix_latex"):
+                            fixed_latex = self.ai_provider.fix_latex(
+                                current_latex, error_log
+                            )
+                            cleaned = prompt_manager.extract_latex_from_response(
+                                fixed_latex
+                            )
+                            if cleaned:
+                                current_latex = cleaned
+                            else:
+                                current_latex = fixed_latex
+                            self._notify(
+                                "✅ Código LaTeX corregido, reintentando compilación..."
+                            )
+                        else:
+                            self.logger.error(
+                                "❌ AI provider doesn't support fix_latex()"
+                            )
+                            break
+                    except Exception as e:
+                        self.logger.error(f"❌ AI fix request failed: {e}")
+                        break
+                else:
+                    self.logger.error(
+                        "❌ Max retries reached. LaTeX compilation failed."
+                    )
+                    self._notify(
+                        "❌ No se pudo compilar el LaTeX después de varios intentos"
+                    )
+
+            # If we get here, all compilation attempts failed
+            self._notify("⚠️ Usando modo de compatibilidad Markdown...")
+            return self._fallback_to_markdown(
+                current_latex or raw_response, base_name, metadata
+            )
+
+        except Exception as e:
+            self.logger.error(
+                f"❌ Critical error in document generation: {e}", exc_info=True
+            )
+            self._notify(f"❌ Error en la generación: {str(e)[:100]}")
+            return False, "", metadata
+
+    def _compile_latex(self, tex_path: Path, output_dir: Path) -> Optional[Path]:
+        """
+        Compile LaTeX to PDF using pdflatex. Runs twice for TOC.
+
+        Args:
+            tex_path: Path to .tex file
+            output_dir: Directory for output files
+
+        Returns:
+            Path to generated PDF or None if failed
+        """
+        base_name = tex_path.stem
+        expected_pdf = output_dir / f"{base_name}.pdf"
+
+        # Check if pdflatex is available
+        if not shutil.which("pdflatex"):
+            self.logger.error("🚫 pdflatex not found in system PATH")
+            return None
+
+        cmd = [
+            "pdflatex",
+            "-interaction=nonstopmode",
+            "-halt-on-error",
+            f"-output-directory={output_dir}",
+            str(tex_path),
+        ]
+
+        try:
+            # Pass 1
+            self.logger.info("⚙️ Compiling LaTeX (Pass 1/2)...")
+            subprocess.run(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                check=False,
+                timeout=120,
+            )
+
+            # Pass 2 (for TOC resolution)
+            self.logger.info("⚙️ Compiling LaTeX (Pass 2/2)...")
+            result = subprocess.run(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                check=False,
+                timeout=120,
+            )
+
+            if result.returncode == 0 and expected_pdf.exists():
+                self.logger.info(f"✅ PDF generated: {expected_pdf}")
+                self._cleanup_latex_aux(output_dir, base_name)
+                return expected_pdf
+            else:
+                # Read log file for error info
+                log_file = output_dir / f"{base_name}.log"
+                error_snippet = "Unknown error"
+                if log_file.exists():
+                    try:
+                        log_content = log_file.read_text(
+                            encoding="utf-8", errors="ignore"
+                        )
+                        errors = [
+                            line
+                            for line in log_content.splitlines()
+                            if line.startswith("!")
+                        ]
+                        if errors:
+                            error_snippet = errors[0][:200]
+                    except:
+                        pass
+
+                self.logger.error(f"❌ LaTeX compilation failed: {error_snippet}")
+                return None
+
+        except subprocess.TimeoutExpired:
+            self.logger.error("❌ LaTeX compilation timed out")
+            return None
+        except Exception as e:
+            self.logger.error(f"❌ Error during LaTeX execution: {e}")
+            return None
+
+    def _convert_tex_to_docx(self, tex_path: Path, base_name: str) -> Optional[Path]:
+        """Convert .tex to .docx using Pandoc."""
+        if not shutil.which("pandoc"):
+            self.logger.warning("⚠️ pandoc not found, skipping DOCX generation")
+            return None
+
+        docx_path = settings.LOCAL_DOCX / f"{base_name}.docx"
+        cmd = ["pandoc", str(tex_path), "-o", str(docx_path)]
+
+        try:
+            subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=60)
+            self.logger.info(f"✅ DOCX generated: {docx_path}")
+            return docx_path
+        except Exception as e:
+            self.logger.warning(f"⚠️ DOCX generation failed: {e}")
+            return None
+
+    def _create_text_summary(self, latex_content: str) -> str:
+        """Extract a plain text summary from LaTeX content for Notion/preview."""
+        # Remove LaTeX commands and keep content
+        text = latex_content
+
+        # Remove document class and packages
+        text = re.sub(r"\\documentclass\[?[^\]]*\]?\{[^\}]+\}", "", text)
+        text = re.sub(r"\\usepackage\{[^\}]+\}", "", text)
+        text = re.sub(r"\\geometry\{[^\}]+\}", "", text)
+        text = re.sub(r"\\pagestyle\{[^\}]+\}", "", text)
+        text = re.sub(r"\\fancyhf\{\}", "", text)
+        text = re.sub(r"\\fancyhead\[?[^\]]*\]?\{[^\}]+\}", "", text)
+        text = re.sub(r"\\fancyfoot\[?[^\]]*\]?\{[^\}]+\}", "", text)
+
+        # Convert sections to markdown-style
+        text = re.sub(r"\\section\*?\{([^\}]+)\}", r"# \1", text)
+        text = re.sub(r"\\subsection\*?\{([^\}]+)\}", r"## \1", text)
+        text = re.sub(r"\\subsubsection\*?\{([^\}]+)\}", r"### \1", text)
+
+        # Remove tcolorbox environments (keep content)
+        text = re.sub(
+            r"\\begin\{(definicion|importante|ejemplo)\}\[?[^\]]*\]?",
+            r"\n**\1:** ",
+            text,
+        )
+        text = re.sub(r"\\end\{(definicion|importante|ejemplo)\}", "", text)
+
+        # Convert itemize to bullets
+        text = re.sub(r"\\item\s*", "- ", text)
+        text = re.sub(r"\\begin\{(itemize|enumerate)\}", "", text)
+        text = re.sub(r"\\end\{(itemize|enumerate)\}", "", text)
+
+        # Clean up math (basic)
+        text = re.sub(r"\$\$([^\$]+)\$\$", r"\n\n\1\n\n", text)
+        text = re.sub(r"\$([^\$]+)\$", r"\1", text)
+
+        # Remove remaining LaTeX commands
+        text = re.sub(r"\\[a-zA-Z]+(\{[^\}]*\})*", "", text)
+        text = re.sub(r"[{}]", "", text)
+
+        # Clean whitespace
+        text = re.sub(r"\n\s*\n\s*\n", "\n\n", text)
+        text = text.strip()
+
+        return text
+
+    def _fallback_to_markdown(
+        self, content: str, base_name: str, metadata: Dict[str, Any]
+    ) -> Tuple[bool, str, Dict[str, Any]]:
+        """Fallback when LaTeX generation fails."""
+        self.logger.warning("⚠️ Falling back to Markdown processing")
+
+        md_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_resumen.md"
+        md_path.write_text(content, encoding="utf-8")
+        metadata["markdown_path"] = str(md_path)
+
+        # Try to convert to PDF via pandoc
+        if shutil.which("pandoc"):
+            pdf_path = self._convert_md_to_pdf(md_path, base_name)
+            if pdf_path:
+                metadata["pdf_path"] = str(pdf_path)
+
+        docx_path = self._convert_md_to_docx(md_path, base_name)
+        if docx_path:
+            metadata["docx_path"] = str(docx_path)
+
+        metadata["summary_snippet"] = content[:500] + "..."
+        return True, content, metadata
+
+    def _convert_md_to_pdf(self, md_path: Path, base_name: str) -> Optional[Path]:
+        """Convert Markdown to PDF using pandoc."""
+        pdf_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.pdf"
+        cmd = [
+            "pandoc",
+            str(md_path),
+            "-o",
+            str(pdf_path),
+            "--pdf-engine=pdflatex",
+            "-V",
+            "geometry:margin=2.5cm",
+        ]
+
+        try:
+            subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=60)
+            self.logger.info(f"✅ PDF from Markdown: {pdf_path}")
+            return pdf_path
+        except Exception as e:
+            self.logger.warning(f"⚠️ PDF from Markdown failed: {e}")
+            return None
+
+    def _convert_md_to_docx(self, md_path: Path, base_name: str) -> Optional[Path]:
+        """Convert Markdown to DOCX using pandoc."""
+        docx_path = settings.LOCAL_DOCX / f"{base_name}.docx"
+        cmd = ["pandoc", str(md_path), "-o", str(docx_path)]
+
+        try:
+            subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=60)
+            self.logger.info(f"✅ DOCX from Markdown: {docx_path}")
+            return docx_path
+        except Exception as e:
+            self.logger.warning(f"⚠️ DOCX from Markdown failed: {e}")
+            return None
+
+    def _cleanup_latex_aux(self, output_dir: Path, base_name: str):
+        """Clean up auxiliary LaTeX files."""
+        extensions = [".aux", ".log", ".out", ".toc"]
+        for ext in extensions:
+            aux_file = output_dir / f"{base_name}{ext}"
+            if aux_file.exists():
+                try:
+                    aux_file.unlink()
+                except:
+                    pass
+
+    def _upload_to_notion(
+        self,
+        base_name: str,
+        summary: str,
+        pdf_path: Optional[Path],
+        metadata: Dict[str, Any],
+    ):
+        """Upload summary to Notion if configured."""
+        try:
            from services.notion_service import notion_service

-            notion_uploaded = False
-            notion_page_id = None
-            if settings.has_notion_config:
-                try:
-                    title = base_name.replace("_", " ").title()
+            title = base_name.replace("_", " ").title()
+            notion_metadata = {
+                "file_type": "Audio",
+                "pdf_path": pdf_path or Path(""),
+                "add_status": False,
+                "use_as_page": False,
+            }

-                    # Crear página con el contenido completo del resumen
-                    notion_metadata = {
-                        "file_type": "Audio",  # O 'PDF' dependiendo del origen
-                        "pdf_path": pdf_path if pdf_path else Path(""),
-                        "add_status": False,  # No usar Status/Tipo (no existen en la DB)
-                        "use_as_page": False,  # Usar como database, no página
-                    }
+            page_id = notion_service.create_page_with_summary(
+                title=title, summary=summary, metadata=notion_metadata
+            )

-                    notion_page_id = notion_service.create_page_with_summary(
-                        title=title, summary=summary, metadata=notion_metadata
-                    )
-
-                    if notion_page_id:
-                        notion_uploaded = True
-                        self.logger.info(
-                            f"✅ Resumen subido a Notion: {title} (ID: {notion_page_id})"
-                        )
-                    else:
-                        self.logger.warning(f"⚠️ No se pudo subir a Notion: {title}")
-                except Exception as e:
-                    self.logger.warning(f"❌ Error al subir a Notion: {e}")
-                    import traceback
-
-                    traceback.print_exc()
+            if page_id:
+                metadata["notion_uploaded"] = True
+                metadata["notion_page_id"] = page_id
+                self.logger.info(f"✅ Uploaded to Notion: {title}")
            else:
-                self.logger.info("Notion not configured - skipping upload")
-
-            metadata = {
-                "markdown_path": str(markdown_path),
-                "docx_path": str(docx_path) if docx_path else "",
-                "pdf_path": str(pdf_path) if pdf_path else "",
-                "docx_name": Path(docx_path).name if docx_path else "",
-                "summary": summary,
-                "filename": filename,
-                "notion_uploaded": notion_uploaded,
-                "notion_page_id": notion_page_id,
-            }
-
-            return True, summary, metadata
+                self.logger.warning(f"⚠️ Notion upload failed: {title}")

        except Exception as e:
-            self.logger.error(f"Document generation process failed: {e}")
-            return False, "", {}
-
-    def _sanitize_latex(self, markdown_path: Path) -> None:
-        """Sanitize LaTeX syntax in Markdown file to prevent Pandoc errors"""
-        try:
-            content = markdown_path.read_text(encoding="utf-8")
-
-            # 1. Unescape escaped dollar signs which are common LLM errors for math
-            content = content.replace(r"\$", "$")
-
-            # 2. Fix common Cyrillic and Greek characters that sneak in via LLMs
-            replacements = {
-                "ч": "ch",
-                "в": "v",
-                "к": "k",
-                "м": "m",
-                "н": "n",
-                "т": "t",
-                "—": "-",
-                "–": "-",
-                "“": '"',
-                "”": '"',
-                "’": "'",
-                "Δ": "$\\Delta$",
-                "δ": "$\\delta$",
-                "Σ": "$\\Sigma$",
-                "σ": "$\\sigma$",
-                "π": "$\\pi$",
-                "Π": "$\\Pi$",
-                "α": "$\\alpha$",
-                "β": "$\\beta$",
-                "γ": "$\\gamma$",
-                "θ": "$\\theta$",
-                "λ": "$\\lambda$",
-                "μ": "$\\mu$",
-            }
-
-            # Be careful not to double-replace already correct LaTeX
-            for char, repl in replacements.items():
-                if char in content:
-                    # Check if it's already inside math mode would be complex,
-                    # but for now we assume raw unicode greek chars should become latex
-                    content = content.replace(char, repl)
-
-            markdown_path.write_text(content, encoding="utf-8")
-            self.logger.info(f"Sanitized LaTeX in {markdown_path}")
-        except Exception as e:
-            self.logger.warning(f"Failed to sanitize LaTeX: {e}")
-
-    def _generate_filename(self, text: str, summary: str) -> str:
-        """Generate intelligent filename"""
-        try:
-            # Use AI to extract key topics
-            prompt = f"""Extract 2-3 key topics from this summary to create a filename.
-Summary: {summary}
-
-Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
-
-            try:
-                topics_text = self.ai_provider.generate_text(prompt)
-            except Exception:
-                topics_text = summary[:100]
-
-            # Simple topic extraction
-            topics = re.findall(r"\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b", topics_text)[:3]
-            if not topics:
-                topics = ["documento"]
-
-            # Limit topic length
-            topics = [t[: settings.MAX_FILENAME_TOPICS_LENGTH] for t in topics]
-
-            filename = "_".join(topics)[: settings.MAX_FILENAME_LENGTH]
-            return filename
-
-        except Exception as e:
-            self.logger.error(f"Filename generation failed: {e}")
-            return "documento"
-
-    def _create_markdown(self, summary: str, base_name: str) -> Path:
-        """Create Markdown document"""
-        output_dir = settings.LOCAL_DOWNLOADS_PATH
-        output_dir.mkdir(parents=True, exist_ok=True)
-
-        output_path = output_dir / f"{base_name}_unificado.md"
-
-        content = f"""# {base_name.replace("_", " ").title()}
-
-## Resumen
-
-{summary}
-
---
-
-*Generado por CBCFacil*
-"""
-
-        with open(output_path, "w", encoding="utf-8") as f:
-            f.write(content)
-
-        return output_path
-
-    def _create_docx(self, markdown_path: Path, base_name: str) -> Path:
-        """Create DOCX document using pandoc"""
-        output_dir = settings.LOCAL_DOCX
-        output_dir.mkdir(parents=True, exist_ok=True)
-
-        output_path = output_dir / f"{base_name}_unificado.docx"
-
-        self.logger.info(
-            f"Converting Markdown to DOCX: {markdown_path} -> {output_path}"
-        )
-
-        try:
-            cmd = [
-                "pandoc",
-                str(markdown_path),
-                "-o",
-                str(output_path),
-                "--from=markdown",
-                "--to=docx",
-            ]
-
-            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-
-            self.logger.info("DOCX generated successfully with pandoc")
-            return output_path
-
-        except subprocess.CalledProcessError as e:
-            self.logger.error(f"Pandoc DOCX conversion failed: {e.stderr}")
-            raise FileProcessingError(f"Failed to generate DOCX: {e.stderr}")
-        except Exception as e:
-            self.logger.error(f"Error generating DOCX: {e}")
-            raise FileProcessingError(f"Error generating DOCX: {e}")
-
-    def _create_pdf(self, markdown_path: Path, base_name: str) -> Path:
-        """Create PDF document using pandoc and pdflatex"""
-        output_dir = settings.LOCAL_DOWNLOADS_PATH
-        output_dir.mkdir(parents=True, exist_ok=True)
-
-        output_path = output_dir / f"{base_name}_unificado.pdf"
-
-        self.logger.info(
-            f"Converting Markdown to PDF: {markdown_path} -> {output_path}"
-        )
-
-        try:
-            cmd = [
-                "pandoc",
-                str(markdown_path),
-                "-o",
-                str(output_path),
-                "--pdf-engine=pdflatex",
-                "-V",
-                "geometry:margin=2.5cm",
-                "-V",
-                "fontsize=12pt",
-                "--highlight-style=tango",
-            ]
-
-            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-
-            self.logger.info("PDF generated successfully with pandoc")
-            return output_path
-
-        except subprocess.CalledProcessError as e:
-            self.logger.error(f"Pandoc PDF conversion failed: {e.stderr}")
-            raise FileProcessingError(f"Failed to generate PDF: {e.stderr}")
-        except Exception as e:
-            self.logger.error(f"Error generating PDF: {e}")
-            raise FileProcessingError(f"Error generating PDF: {e}")
+            self.logger.warning(f"❌ Notion upload error: {e}")