feat: Sistema LaTeX mejorado con sanitización automática y corrección de TikZ

Cambios principales: ## Nuevos archivos - services/ai/parallel_provider.py: Ejecución paralela de múltiples proveedores AI - services/ai/prompt_manager.py: Gestión centralizada de prompts (resumen.md como fuente) - latex/resumen.md: Template del prompt para resúmenes académicos LaTeX ## Mejoras en generación LaTeX (document/generators.py) - Nueva función _sanitize_latex(): Corrige automáticamente errores comunes de AI - Agrega align=center a nodos TikZ con saltos de línea (\\) - Previene errores 'Not allowed in LR mode' antes de compilar - Soporte para procesamiento paralelo de proveedores AI - Conversión DOCX en paralelo con generación PDF - Uploads a Notion en background (non-blocking) - Callbacks de notificación para progreso en Telegram ## Mejoras en proveedores AI - claude_provider.py: fix_latex() con instrucciones específicas para errores TikZ - gemini_provider.py: fix_latex() mejorado + rate limiting + circuit breaker - provider_factory.py: Soporte para parallel provider ## Otros cambios - config/settings.py: Nuevas configuraciones para Gemini models - services/webdav_service.py: Mejoras en manejo de conexión - .gitignore: Ignora archivos LaTeX auxiliares (.aux, .toc, .out, .pdf) ## Archivos de ejemplo - latex/imperio_romano.tex, latex/clase_revolucion_rusa_crisis_30.tex - resumen_curiosidades.tex (corregido y compilado exitosamente)
2026-02-07 20:50:27 +00:00
parent 915f827305
commit dcf887c510
15 changed files with 4309 additions and 409 deletions
--- a/services/webdav_service.py
+++ b/services/webdav_service.py
@@ -7,8 +7,9 @@ import time
 import unicodedata
 import re
 from pathlib import Path
-from typing import Optional, List, Dict
+from typing import Optional, List, Dict, Tuple
 from contextlib import contextmanager
+from concurrent.futures import ThreadPoolExecutor, as_completed
 import requests
 from requests.auth import HTTPBasicAuth
 from requests.adapters import HTTPAdapter
@@ -107,7 +108,7 @@ class WebDAVService:
        return self._parse_propfind_response(response.text)

    def _parse_propfind_response(self, xml_response: str) -> List[str]:
-        """Parse PROPFIND XML response"""
+        """Parse PROPFIND XML response and return only files (not directories)"""
        # Simple parser for PROPFIND response
        files = []
        try:
@@ -119,20 +120,41 @@ class WebDAVService:
            parsed_url = urlparse(settings.NEXTCLOUD_URL)
            webdav_path = parsed_url.path.rstrip('/')  # e.g. /remote.php/webdav

-            # Find all href elements
-            for href in root.findall('.//{DAV:}href'):
-                href_text = href.text or ""
-                href_text = unquote(href_text)  # Decode URL encoding
-                
+            # Find all response elements
+            for response in root.findall('.//{DAV:}response'):
+                href = response.find('.//{DAV:}href')
+                if href is None or href.text is None:
+                    continue
+
+                href_text = unquote(href.text)  # Decode URL encoding
+
+                # Check if this is a directory (has collection resourcetype)
+                propstat = response.find('.//{DAV:}propstat')
+                is_directory = False
+                if propstat is not None:
+                    prop = propstat.find('.//{DAV:}prop')
+                    if prop is not None:
+                        resourcetype = prop.find('.//{DAV:}resourcetype')
+                        if resourcetype is not None and resourcetype.find('.//{DAV:}collection') is not None:
+                            is_directory = True
+
+                # Skip directories
+                if is_directory:
+                    continue
+
+                # Also skip paths ending with / (another way to detect directories)
+                if href_text.endswith('/'):
+                    continue
+
                # Remove base URL from href
                base_url = settings.NEXTCLOUD_URL.rstrip('/')
                if href_text.startswith(base_url):
                    href_text = href_text[len(base_url):]
-                
+
                # Also strip the webdav path if it's there
                if href_text.startswith(webdav_path):
                    href_text = href_text[len(webdav_path):]
-                
+
                # Clean up the path
                href_text = href_text.lstrip('/')
                if href_text:  # Skip empty paths (root directory)
@@ -210,6 +232,59 @@ class WebDAVService:
        except WebDAVError:
            return False

+    def upload_batch(
+        self,
+        files: List[Tuple[Path, str]],
+        max_workers: int = 4,
+        timeout: int = 120
+    ) -> Dict[str, bool]:
+        """
+        Upload multiple files concurrently.
+
+        Args:
+            files: List of (local_path, remote_path) tuples
+            max_workers: Maximum concurrent uploads
+            timeout: Timeout per upload in seconds
+
+        Returns:
+            Dict mapping remote_path to success status
+        """
+        if not files:
+            return {}
+
+        results: Dict[str, bool] = {}
+
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all upload tasks
+            future_to_path = {
+                executor.submit(self.upload, local, remote): remote
+                for local, remote in files
+            }
+
+            # Collect results as they complete
+            for future in as_completed(future_to_path, timeout=timeout):
+                remote_path = future_to_path[future]
+                try:
+                    future.result()
+                    results[remote_path] = True
+                    self.logger.info(f"Successfully uploaded: {remote_path}")
+                except Exception as e:
+                    results[remote_path] = False
+                    self.logger.error(f"Failed to upload {remote_path}: {e}")
+
+        failed_count = sum(1 for success in results.values() if not success)
+        if failed_count > 0:
+            self.logger.warning(
+                f"Batch upload completed with {failed_count} failures "
+                f"({len(results) - failed_count}/{len(results)} successful)"
+            )
+        else:
+            self.logger.info(
+                f"Batch upload completed: {len(results)} files uploaded successfully"
+            )
+
+        return results
+

 # Global instance
 webdav_service = WebDAVService()