feat: Sistema LaTeX mejorado con sanitización automática y corrección de TikZ
Cambios principales: ## Nuevos archivos - services/ai/parallel_provider.py: Ejecución paralela de múltiples proveedores AI - services/ai/prompt_manager.py: Gestión centralizada de prompts (resumen.md como fuente) - latex/resumen.md: Template del prompt para resúmenes académicos LaTeX ## Mejoras en generación LaTeX (document/generators.py) - Nueva función _sanitize_latex(): Corrige automáticamente errores comunes de AI - Agrega align=center a nodos TikZ con saltos de línea (\\) - Previene errores 'Not allowed in LR mode' antes de compilar - Soporte para procesamiento paralelo de proveedores AI - Conversión DOCX en paralelo con generación PDF - Uploads a Notion en background (non-blocking) - Callbacks de notificación para progreso en Telegram ## Mejoras en proveedores AI - claude_provider.py: fix_latex() con instrucciones específicas para errores TikZ - gemini_provider.py: fix_latex() mejorado + rate limiting + circuit breaker - provider_factory.py: Soporte para parallel provider ## Otros cambios - config/settings.py: Nuevas configuraciones para Gemini models - services/webdav_service.py: Mejoras en manejo de conexión - .gitignore: Ignora archivos LaTeX auxiliares (.aux, .toc, .out, .pdf) ## Archivos de ejemplo - latex/imperio_romano.tex, latex/clase_revolucion_rusa_crisis_30.tex - resumen_curiosidades.tex (corregido y compilado exitosamente)
This commit is contained in:
@@ -7,8 +7,9 @@ import time
|
||||
import unicodedata
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Dict
|
||||
from typing import Optional, List, Dict, Tuple
|
||||
from contextlib import contextmanager
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from requests.adapters import HTTPAdapter
|
||||
@@ -107,7 +108,7 @@ class WebDAVService:
|
||||
return self._parse_propfind_response(response.text)
|
||||
|
||||
def _parse_propfind_response(self, xml_response: str) -> List[str]:
|
||||
"""Parse PROPFIND XML response"""
|
||||
"""Parse PROPFIND XML response and return only files (not directories)"""
|
||||
# Simple parser for PROPFIND response
|
||||
files = []
|
||||
try:
|
||||
@@ -119,20 +120,41 @@ class WebDAVService:
|
||||
parsed_url = urlparse(settings.NEXTCLOUD_URL)
|
||||
webdav_path = parsed_url.path.rstrip('/') # e.g. /remote.php/webdav
|
||||
|
||||
# Find all href elements
|
||||
for href in root.findall('.//{DAV:}href'):
|
||||
href_text = href.text or ""
|
||||
href_text = unquote(href_text) # Decode URL encoding
|
||||
|
||||
# Find all response elements
|
||||
for response in root.findall('.//{DAV:}response'):
|
||||
href = response.find('.//{DAV:}href')
|
||||
if href is None or href.text is None:
|
||||
continue
|
||||
|
||||
href_text = unquote(href.text) # Decode URL encoding
|
||||
|
||||
# Check if this is a directory (has collection resourcetype)
|
||||
propstat = response.find('.//{DAV:}propstat')
|
||||
is_directory = False
|
||||
if propstat is not None:
|
||||
prop = propstat.find('.//{DAV:}prop')
|
||||
if prop is not None:
|
||||
resourcetype = prop.find('.//{DAV:}resourcetype')
|
||||
if resourcetype is not None and resourcetype.find('.//{DAV:}collection') is not None:
|
||||
is_directory = True
|
||||
|
||||
# Skip directories
|
||||
if is_directory:
|
||||
continue
|
||||
|
||||
# Also skip paths ending with / (another way to detect directories)
|
||||
if href_text.endswith('/'):
|
||||
continue
|
||||
|
||||
# Remove base URL from href
|
||||
base_url = settings.NEXTCLOUD_URL.rstrip('/')
|
||||
if href_text.startswith(base_url):
|
||||
href_text = href_text[len(base_url):]
|
||||
|
||||
|
||||
# Also strip the webdav path if it's there
|
||||
if href_text.startswith(webdav_path):
|
||||
href_text = href_text[len(webdav_path):]
|
||||
|
||||
|
||||
# Clean up the path
|
||||
href_text = href_text.lstrip('/')
|
||||
if href_text: # Skip empty paths (root directory)
|
||||
@@ -210,6 +232,59 @@ class WebDAVService:
|
||||
except WebDAVError:
|
||||
return False
|
||||
|
||||
def upload_batch(
|
||||
self,
|
||||
files: List[Tuple[Path, str]],
|
||||
max_workers: int = 4,
|
||||
timeout: int = 120
|
||||
) -> Dict[str, bool]:
|
||||
"""
|
||||
Upload multiple files concurrently.
|
||||
|
||||
Args:
|
||||
files: List of (local_path, remote_path) tuples
|
||||
max_workers: Maximum concurrent uploads
|
||||
timeout: Timeout per upload in seconds
|
||||
|
||||
Returns:
|
||||
Dict mapping remote_path to success status
|
||||
"""
|
||||
if not files:
|
||||
return {}
|
||||
|
||||
results: Dict[str, bool] = {}
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
# Submit all upload tasks
|
||||
future_to_path = {
|
||||
executor.submit(self.upload, local, remote): remote
|
||||
for local, remote in files
|
||||
}
|
||||
|
||||
# Collect results as they complete
|
||||
for future in as_completed(future_to_path, timeout=timeout):
|
||||
remote_path = future_to_path[future]
|
||||
try:
|
||||
future.result()
|
||||
results[remote_path] = True
|
||||
self.logger.info(f"Successfully uploaded: {remote_path}")
|
||||
except Exception as e:
|
||||
results[remote_path] = False
|
||||
self.logger.error(f"Failed to upload {remote_path}: {e}")
|
||||
|
||||
failed_count = sum(1 for success in results.values() if not success)
|
||||
if failed_count > 0:
|
||||
self.logger.warning(
|
||||
f"Batch upload completed with {failed_count} failures "
|
||||
f"({len(results) - failed_count}/{len(results)} successful)"
|
||||
)
|
||||
else:
|
||||
self.logger.info(
|
||||
f"Batch upload completed: {len(results)} files uploaded successfully"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# Global instance
|
||||
webdav_service = WebDAVService()
|
||||
|
||||
Reference in New Issue
Block a user