## ✨ Novedades - **Soporte LaTeX**: Generación de PDFs y DOCX con fórmulas matemáticas renderizadas correctamente usando Pandoc. - **Sanitización Automática**: Corrección de caracteres Unicode (griegos/cirílicos) y sintaxis LaTeX para evitar errores de compilación. - **GLM/Claude Prioritario**: Cambio de proveedor de IA predeterminado a Claude/GLM para mayor estabilidad y capacidad de razonamiento. - **Mejoras en Formato**: El formateo final del resumen ahora usa el modelo principal (GLM) en lugar de Gemini para consistencia. ## 🛠️ Cambios Técnicos - `document/generators.py`: Reemplazo de generación manual por `pandoc`. Añadida función `_sanitize_latex`. - `services/ai/claude_provider.py`: Soporte mejorado para variables de entorno de Z.ai. - `services/ai/provider_factory.py`: Prioridad ajustada `Claude > Gemini`. - `latex/`: Añadida documentación de referencia para el pipeline LaTeX.
129 lines
3.8 KiB
Python
129 lines
3.8 KiB
Python
"""
|
|
Claude AI Provider implementation
|
|
"""
|
|
|
|
import logging
|
|
import subprocess
|
|
import shutil
|
|
from typing import Dict, Any, Optional
|
|
|
|
from config import settings
|
|
from core import AIProcessingError
|
|
from .base_provider import AIProvider
|
|
|
|
|
|
class ClaudeProvider(AIProvider):
|
|
"""Claude AI provider using CLI"""
|
|
|
|
def __init__(self):
|
|
self.logger = logging.getLogger(__name__)
|
|
self._cli_path = settings.CLAUDE_CLI_PATH or shutil.which("claude")
|
|
self._token = settings.ZAI_AUTH_TOKEN
|
|
self._base_url = settings.ZAI_BASE_URL
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "Claude"
|
|
|
|
def is_available(self) -> bool:
|
|
"""Check if Claude CLI is available"""
|
|
return bool(self._cli_path and self._token)
|
|
|
|
def _get_env(self) -> Dict[str, str]:
|
|
"""Get environment variables for Claude"""
|
|
# Load all user environment variables first
|
|
import os
|
|
|
|
env = os.environ.copy()
|
|
|
|
# Override with our specific settings if available
|
|
if self._token:
|
|
env["ANTHROPIC_AUTH_TOKEN"] = self._token
|
|
if self._base_url:
|
|
env["ANTHROPIC_BASE_URL"] = self._base_url
|
|
|
|
# Add critical flags
|
|
env["PYTHONUNBUFFERED"] = "1"
|
|
|
|
# Ensure model variables are picked up from env (already in os.environ)
|
|
# but if we had explicit settings for them, we'd set them here.
|
|
# Since we put them in .env and loaded via load_dotenv -> os.environ,
|
|
# simply copying os.environ is sufficient.
|
|
|
|
return env
|
|
|
|
def _run_cli(self, prompt: str, timeout: int = 300) -> str:
|
|
"""Run Claude CLI with prompt"""
|
|
if not self.is_available():
|
|
raise AIProcessingError("Claude CLI not available or not configured")
|
|
|
|
try:
|
|
cmd = [self._cli_path]
|
|
process = subprocess.run(
|
|
cmd,
|
|
input=prompt,
|
|
env=self._get_env(),
|
|
text=True,
|
|
capture_output=True,
|
|
timeout=timeout,
|
|
shell=False,
|
|
)
|
|
|
|
if process.returncode != 0:
|
|
error_msg = process.stderr or "Unknown error"
|
|
raise AIProcessingError(f"Claude CLI failed: {error_msg}")
|
|
|
|
return process.stdout.strip()
|
|
except subprocess.TimeoutExpired:
|
|
raise AIProcessingError(f"Claude CLI timed out after {timeout}s")
|
|
except Exception as e:
|
|
raise AIProcessingError(f"Claude CLI error: {e}")
|
|
|
|
def summarize(self, text: str, **kwargs) -> str:
|
|
"""Generate summary using Claude"""
|
|
prompt = f"""Summarize the following text:
|
|
|
|
{text}
|
|
|
|
Provide a clear, concise summary in Spanish."""
|
|
return self._run_cli(prompt)
|
|
|
|
def correct_text(self, text: str, **kwargs) -> str:
|
|
"""Correct text using Claude"""
|
|
prompt = f"""Correct the following text for grammar, spelling, and clarity:
|
|
|
|
{text}
|
|
|
|
Return only the corrected text, nothing else."""
|
|
return self._run_cli(prompt)
|
|
|
|
def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
|
|
"""Classify content using Claude"""
|
|
categories = [
|
|
"historia",
|
|
"analisis_contable",
|
|
"instituciones_gobierno",
|
|
"otras_clases",
|
|
]
|
|
|
|
prompt = f"""Classify the following text into one of these categories:
|
|
- historia
|
|
- analisis_contable
|
|
- instituciones_gobierno
|
|
- otras_clases
|
|
|
|
Text: {text}
|
|
|
|
Return only the category name, nothing else."""
|
|
result = self._run_cli(prompt).lower()
|
|
|
|
# Validate result
|
|
if result not in categories:
|
|
result = "otras_clases"
|
|
|
|
return {"category": result, "confidence": 0.9, "provider": self.name}
|
|
|
|
def generate_text(self, prompt: str, **kwargs) -> str:
|
|
"""Generate text using Claude"""
|
|
return self._run_cli(prompt)
|