feat: Implementación de Resúmenes Matemáticos con LaTeX y Pandoc

##  Novedades
- **Soporte LaTeX**: Generación de PDFs y DOCX con fórmulas matemáticas renderizadas correctamente usando Pandoc.
- **Sanitización Automática**: Corrección de caracteres Unicode (griegos/cirílicos) y sintaxis LaTeX para evitar errores de compilación.
- **GLM/Claude Prioritario**: Cambio de proveedor de IA predeterminado a Claude/GLM para mayor estabilidad y capacidad de razonamiento.
- **Mejoras en Formato**: El formateo final del resumen ahora usa el modelo principal (GLM) en lugar de Gemini para consistencia.

## 🛠️ Cambios Técnicos
- `document/generators.py`: Reemplazo de generación manual por `pandoc`. Añadida función `_sanitize_latex`.
- `services/ai/claude_provider.py`: Soporte mejorado para variables de entorno de Z.ai.
- `services/ai/provider_factory.py`: Prioridad ajustada `Claude > Gemini`.
- `latex/`: Añadida documentación de referencia para el pipeline LaTeX.
This commit is contained in:
renato97
2026-01-26 23:40:16 +00:00
parent f9d245a58e
commit 915f827305
4 changed files with 384 additions and 178 deletions

View File

@@ -1,6 +1,7 @@
"""
Claude AI Provider implementation
"""
import logging
import subprocess
import shutil
@@ -30,11 +31,25 @@ class ClaudeProvider(AIProvider):
def _get_env(self) -> Dict[str, str]:
"""Get environment variables for Claude"""
env = {
'ANTHROPIC_AUTH_TOKEN': self._token,
'ANTHROPIC_BASE_URL': self._base_url,
'PYTHONUNBUFFERED': '1'
}
# Load all user environment variables first
import os
env = os.environ.copy()
# Override with our specific settings if available
if self._token:
env["ANTHROPIC_AUTH_TOKEN"] = self._token
if self._base_url:
env["ANTHROPIC_BASE_URL"] = self._base_url
# Add critical flags
env["PYTHONUNBUFFERED"] = "1"
# Ensure model variables are picked up from env (already in os.environ)
# but if we had explicit settings for them, we'd set them here.
# Since we put them in .env and loaded via load_dotenv -> os.environ,
# simply copying os.environ is sufficient.
return env
def _run_cli(self, prompt: str, timeout: int = 300) -> str:
@@ -51,7 +66,7 @@ class ClaudeProvider(AIProvider):
text=True,
capture_output=True,
timeout=timeout,
shell=False
shell=False,
)
if process.returncode != 0:
@@ -84,7 +99,12 @@ Return only the corrected text, nothing else."""
def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
"""Classify content using Claude"""
categories = ["historia", "analisis_contable", "instituciones_gobierno", "otras_clases"]
categories = [
"historia",
"analisis_contable",
"instituciones_gobierno",
"otras_clases",
]
prompt = f"""Classify the following text into one of these categories:
- historia
@@ -101,11 +121,7 @@ Return only the category name, nothing else."""
if result not in categories:
result = "otras_clases"
return {
"category": result,
"confidence": 0.9,
"provider": self.name
}
return {"category": result, "confidence": 0.9, "provider": self.name}
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text using Claude"""

View File

@@ -1,6 +1,7 @@
"""
AI Provider Factory (Factory Pattern)
"""
import logging
from typing import Dict, Type
@@ -16,11 +17,11 @@ class AIProviderFactory:
def __init__(self):
self.logger = logging.getLogger(__name__)
self._providers: Dict[str, AIProvider] = {
'claude': ClaudeProvider(),
'gemini': GeminiProvider()
"claude": ClaudeProvider(),
"gemini": GeminiProvider(),
}
def get_provider(self, preferred: str = 'gemini') -> AIProvider:
def get_provider(self, preferred: str = "gemini") -> AIProvider:
"""Get available provider with fallback"""
# Try preferred provider first
if preferred in self._providers:
@@ -46,8 +47,8 @@ class AIProviderFactory:
}
def get_best_provider(self) -> AIProvider:
"""Get the best available provider (Gemini > Claude)"""
return self.get_provider('gemini')
"""Get the best available provider (Claude > Gemini)"""
return self.get_provider("claude")
# Global instance