feat: Implementación de Resúmenes Matemáticos con LaTeX y Pandoc

## ✨ Novedades - **Soporte LaTeX**: Generación de PDFs y DOCX con fórmulas matemáticas renderizadas correctamente usando Pandoc. - **Sanitización Automática**: Corrección de caracteres Unicode (griegos/cirílicos) y sintaxis LaTeX para evitar errores de compilación. - **GLM/Claude Prioritario**: Cambio de proveedor de IA predeterminado a Claude/GLM para mayor estabilidad y capacidad de razonamiento. - **Mejoras en Formato**: El formateo final del resumen ahora usa el modelo principal (GLM) en lugar de Gemini para consistencia. ## 🛠️ Cambios Técnicos - `document/generators.py`: Reemplazo de generación manual por `pandoc`. Añadida función `_sanitize_latex`. - `services/ai/claude_provider.py`: Soporte mejorado para variables de entorno de Z.ai. - `services/ai/provider_factory.py`: Prioridad ajustada `Claude > Gemini`. - `latex/`: Añadida documentación de referencia para el pipeline LaTeX.
2026-01-26 23:40:16 +00:00
parent f9d245a58e
commit 915f827305
4 changed files with 384 additions and 178 deletions
--- a/document/generators.py
+++ b/document/generators.py
@@ -3,6 +3,7 @@ Document generation utilities
 """
 import logging
 import subprocess
 import re
 from pathlib import Path
 from typing import Dict, Any, List, Tuple
@@ -49,17 +50,24 @@ Texto:
            # Step 2: Generate Unified Summary
            self.logger.info("Generating unified summary...")
-            summary_prompt = f"""Eres un profesor universitario experto en historia del siglo XX. Redacta un resumen académico integrado en español usando el texto y los bullet points extraídos.
+            summary_prompt = f"""Eres un profesor universitario experto en historia y economía. Redacta un resumen académico integrado en español usando el texto y los bullet points extraídos.
-REQUISITOS ESTRICTOS:
+REQUISITOS ESTRICTOS DE CONTENIDO:
 - Extensión entre 500-700 palabras
 - Usa encabezados Markdown con jerarquía clara (##, ###)
- Desarrolla los puntos clave con profundidad y contexto histórico
+- Desarrolla los puntos clave con profundidad y contexto histórico/económico
 - Mantén un tono académico y analítico
 - Incluye conclusiones significativas
 - NO agregues texto fuera del resumen
 - Devuelve únicamente el resumen en formato Markdown
 REQUISITOS ESTRICTOS DE FORMATO MATEMÁTICO (LaTeX):
 - Si el texto incluye fórmulas matemáticas o económicas, DEBES usar formato LaTeX.
 - Usa bloques $$ ... $$ para ecuaciones centradas importantes.
 - Usa $ ... $ para ecuaciones en línea.
 - Ejemplo: La fórmula del interés compuesto es $A = P(1 + r/n)^{{nt}}$.
 - NO uses bloques de código (```latex) para las fórmulas, úsalas directamente en el texto para que Pandoc las renderice.
 Contenido a resumir:
 {text[:20000]}
@@ -72,31 +80,29 @@ Puntos clave a incluir obligatoriamente:
                self.logger.error(f"Raw summary generation failed: {e}")
                raise e
-            # Step 3: Format with Gemini (using GeminiProvider explicitly)
+            # Step 3: Format with IA (using main provider instead of Gemini)
-            self.logger.info("Formatting summary with Gemini...")
+            self.logger.info("Formatting summary with IA...")
-            format_prompt = f"""Revisa y mejora el siguiente resumen en Markdown para que sea perfectamente legible:
+            format_prompt = f"""Revisa y mejora el siguiente resumen en Markdown para que sea perfectamente legible y compatible con Pandoc:
 {raw_summary}
 Instrucciones:
- Corrige cualquier error de formato
+- Corrige cualquier error de formato Markdown
 - Asegúrate de que los encabezados estén bien espaciados
 - Verifica que las viñetas usen "- " correctamente
 - Mantén exactamente el contenido existente
 - EVITA el uso excesivo de negritas (asteriscos), úsalas solo para conceptos clave
 - VERIFICA que todas las fórmulas matemáticas estén correctamente encerradas en $...$ (inline) o $$...$$ (display)
 - NO alteres la sintaxis LaTeX dentro de los delimitadores $...$ o $$...$$
 - Devuelve únicamente el resumen formateado sin texto adicional"""
            # Use generic Gemini provider for formatting as requested
            from services.ai.gemini_provider import GeminiProvider
            formatter = GeminiProvider()
            try:
-                if formatter.is_available():
+                # Use the main provider (Claude/GLM) for formatting too
-                    summary = formatter.generate_text(format_prompt)
+                if self.ai_provider.is_available():
                    summary = self.ai_provider.generate_text(format_prompt)
                else:
                    self.logger.warning(
-                        "Gemini formatter not available, using raw summary"
+                        "AI provider not available for formatting, using raw summary"
                    )
                    summary = raw_summary
            except Exception as e:
@@ -108,8 +114,20 @@ Instrucciones:
            # Create document
            markdown_path = self._create_markdown(summary, base_name)
-            docx_path = self._create_docx(summary, base_name)
+
-            pdf_path = self._create_pdf(summary, base_name)
+            docx_path = None
            try:
                docx_path = self._create_docx(markdown_path, base_name)
            except Exception as e:
                self.logger.error(f"Failed to create DOCX (non-critical): {e}")
            pdf_path = None
            try:
                # Sanitize LaTeX before PDF generation
                self._sanitize_latex(markdown_path)
                pdf_path = self._create_pdf(markdown_path, base_name)
            except Exception as e:
                self.logger.error(f"Failed to create PDF (non-critical): {e}")
            # Upload to Notion if configured
            from services.notion_service import notion_service
@@ -123,7 +141,7 @@ Instrucciones:
                    # Crear página con el contenido completo del resumen
                    notion_metadata = {
                        "file_type": "Audio",  # O 'PDF' dependiendo del origen
-                        "pdf_path": pdf_path,
+                        "pdf_path": pdf_path if pdf_path else Path(""),
                        "add_status": False,  # No usar Status/Tipo (no existen en la DB)
                        "use_as_page": False,  # Usar como database, no página
                    }
@@ -149,9 +167,9 @@ Instrucciones:
            metadata = {
                "markdown_path": str(markdown_path),
-                "docx_path": str(docx_path),
+                "docx_path": str(docx_path) if docx_path else "",
-                "pdf_path": str(pdf_path),
+                "pdf_path": str(pdf_path) if pdf_path else "",
-                "docx_name": Path(docx_path).name,
+                "docx_name": Path(docx_path).name if docx_path else "",
                "summary": summary,
                "filename": filename,
                "notion_uploaded": notion_uploaded,
@@ -164,6 +182,53 @@ Instrucciones:
            self.logger.error(f"Document generation process failed: {e}")
            return False, "", {}
    def _sanitize_latex(self, markdown_path: Path) -> None:
        """Sanitize LaTeX syntax in Markdown file to prevent Pandoc errors"""
        try:
            content = markdown_path.read_text(encoding="utf-8")
            # 1. Unescape escaped dollar signs which are common LLM errors for math
            content = content.replace(r"\$", "$")
            # 2. Fix common Cyrillic and Greek characters that sneak in via LLMs
            replacements = {
                "ч": "ch",
                "в": "v",
                "к": "k",
                "м": "m",
                "н": "n",
                "т": "t",
                "—": "-",
                "–": "-",
                "“": '"',
                "”": '"',
                "’": "'",
                "Δ": "$\\Delta$",
                "δ": "$\\delta$",
                "Σ": "$\\Sigma$",
                "σ": "$\\sigma$",
                "π": "$\\pi$",
                "Π": "$\\Pi$",
                "α": "$\\alpha$",
                "β": "$\\beta$",
                "γ": "$\\gamma$",
                "θ": "$\\theta$",
                "λ": "$\\lambda$",
                "μ": "$\\mu$",
            }
            # Be careful not to double-replace already correct LaTeX
            for char, repl in replacements.items():
                if char in content:
                    # Check if it's already inside math mode would be complex,
                    # but for now we assume raw unicode greek chars should become latex
                    content = content.replace(char, repl)
            markdown_path.write_text(content, encoding="utf-8")
            self.logger.info(f"Sanitized LaTeX in {markdown_path}")
        except Exception as e:
            self.logger.warning(f"Failed to sanitize LaTeX: {e}")
    def _generate_filename(self, text: str, summary: str) -> str:
        """Generate intelligent filename"""
        try:
@@ -173,11 +238,10 @@ Summary: {summary}
 Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
-            topics_text = (
+            try:
-                self.ai_provider.sanitize_input(prompt)
+                topics_text = self.ai_provider.generate_text(prompt)
-                if hasattr(self.ai_provider, "sanitize_input")
+            except Exception:
-                else summary[:100]
+                topics_text = summary[:100]
            )
            # Simple topic extraction
            topics = re.findall(r"\b[A-ZÁÉÍÓÚÑ][a-záéíóúñ]+\b", topics_text)[:3]
@@ -192,7 +256,7 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
        except Exception as e:
            self.logger.error(f"Filename generation failed: {e}")
-            return base_name[: settings.MAX_FILENAME_BASE_LENGTH]
+            return "documento"
    def _create_markdown(self, summary: str, base_name: str) -> Path:
        """Create Markdown document"""
@@ -217,154 +281,72 @@ Return only the topics separated by hyphens, max 20 chars each, in Spanish:"""
        return output_path
-    def _create_docx(self, summary: str, base_name: str) -> Path:
+    def _create_docx(self, markdown_path: Path, base_name: str) -> Path:
-        """Create DOCX document with Markdown parsing (Legacy method ported)"""
+        """Create DOCX document using pandoc"""
        try:
            from docx import Document
            from docx.shared import Inches
        except ImportError:
            raise FileProcessingError("python-docx not installed")
        output_dir = settings.LOCAL_DOCX
        output_dir.mkdir(parents=True, exist_ok=True)
        output_path = output_dir / f"{base_name}_unificado.docx"
-        doc = Document()
+        self.logger.info(
-        doc.add_heading(base_name.replace("_", " ").title(), 0)
+            f"Converting Markdown to DOCX: {markdown_path} -> {output_path}"
        )
        # Parse and render Markdown content line by line
        lines = summary.splitlines()
        current_paragraph = []
        for line in lines:
            line = line.strip()
            if not line:
                if current_paragraph:
                    p = doc.add_paragraph(" ".join(current_paragraph))
                    p.alignment = 3  # JUSTIFY alignment (WD_ALIGN_PARAGRAPH.JUSTIFY=3)
                    current_paragraph = []
                continue
            if line.startswith("#"):
                if current_paragraph:
                    p = doc.add_paragraph(" ".join(current_paragraph))
                    p.alignment = 3
                    current_paragraph = []
                # Process heading
                level = len(line) - len(line.lstrip("#"))
                heading_text = line.lstrip("#").strip()
                if level <= 6:
                    doc.add_heading(heading_text, level=level)
                else:
                    current_paragraph.append(heading_text)
            elif line.startswith("-") or line.startswith("*") or line.startswith("•"):
                if current_paragraph:
                    p = doc.add_paragraph(" ".join(current_paragraph))
                    p.alignment = 3
                    current_paragraph = []
                bullet_text = line.lstrip("-*• ").strip()
                p = doc.add_paragraph(bullet_text, style="List Bullet")
                # Remove bold markers from bullets if present
                if "**" in bullet_text:
                    # Basic cleanup for bullets
                    pass
            else:
                # Clean up excessive bold markers in body text if user requested
                clean_line = line.replace(
                    "**", ""
                )  # Removing asterisks as per user complaint "se abusa de los asteriscos"
                current_paragraph.append(clean_line)
        if current_paragraph:
            p = doc.add_paragraph(" ".join(current_paragraph))
            p.alignment = 3
        doc.add_page_break()
        doc.add_paragraph(f"*Generado por CBCFacil*")
        doc.save(output_path)
        return output_path
    def _create_pdf(self, summary: str, base_name: str) -> Path:
        """Create PDF document with Markdown parsing (Legacy method ported)"""
        try:
-            from reportlab.lib.pagesizes import letter
+            cmd = [
-            from reportlab.pdfgen import canvas
+                "pandoc",
-            import textwrap
+                str(markdown_path),
-        except ImportError:
+                "-o",
-            raise FileProcessingError("reportlab not installed")
+                str(output_path),
                "--from=markdown",
                "--to=docx",
            ]
            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
            self.logger.info("DOCX generated successfully with pandoc")
            return output_path
        except subprocess.CalledProcessError as e:
            self.logger.error(f"Pandoc DOCX conversion failed: {e.stderr}")
            raise FileProcessingError(f"Failed to generate DOCX: {e.stderr}")
        except Exception as e:
            self.logger.error(f"Error generating DOCX: {e}")
            raise FileProcessingError(f"Error generating DOCX: {e}")
    def _create_pdf(self, markdown_path: Path, base_name: str) -> Path:
        """Create PDF document using pandoc and pdflatex"""
        output_dir = settings.LOCAL_DOWNLOADS_PATH
        output_dir.mkdir(parents=True, exist_ok=True)
        output_path = output_dir / f"{base_name}_unificado.pdf"
-        c = canvas.Canvas(str(output_path), pagesize=letter)
+        self.logger.info(
-        width, height = letter
+            f"Converting Markdown to PDF: {markdown_path} -> {output_path}"
-        margin = 72
+        )
        y_position = height - margin
-        def new_page():
+        try:
-            nonlocal y_position
+            cmd = [
-            c.showPage()
+                "pandoc",
-            c.setFont("Helvetica", 11)
+                str(markdown_path),
-            y_position = height - margin
+                "-o",
                str(output_path),
                "--pdf-engine=pdflatex",
                "-V",
                "geometry:margin=2.5cm",
                "-V",
                "fontsize=12pt",
                "--highlight-style=tango",
            ]
-        c.setFont("Helvetica", 11)
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
-        # Title
+            self.logger.info("PDF generated successfully with pandoc")
-        c.setFont("Helvetica-Bold", 16)
+            return output_path
        c.drawString(margin, y_position, base_name.replace("_", " ").title()[:100])
        y_position -= 28
        c.setFont("Helvetica", 11)
-        summary_clean = summary.replace(
+        except subprocess.CalledProcessError as e:
-            "**", ""
+            self.logger.error(f"Pandoc PDF conversion failed: {e.stderr}")
-        )  # Remove asterisks globally for cleaner PDF
+            raise FileProcessingError(f"Failed to generate PDF: {e.stderr}")
-
+        except Exception as e:
-        for raw_line in summary_clean.splitlines():
+            self.logger.error(f"Error generating PDF: {e}")
-            line = raw_line.rstrip()
+            raise FileProcessingError(f"Error generating PDF: {e}")
            if not line.strip():
                y_position -= 14
                if y_position < margin:
                    new_page()
                continue
            stripped = line.lstrip()
            if stripped.startswith("#"):
                level = len(stripped) - len(stripped.lstrip("#"))
                heading_text = stripped.lstrip("#").strip()
                if heading_text:
                    font_size = 16 if level == 1 else 14 if level == 2 else 12
                    c.setFont("Helvetica-Bold", font_size)
                    c.drawString(margin, y_position, heading_text[:90])
                    y_position -= font_size + 6
                    if y_position < margin:
                        new_page()
                    c.setFont("Helvetica", 11)
                continue
            if stripped.startswith(("-", "*", "•")):
                bullet_text = stripped.lstrip("-*•").strip()
                wrapped_lines = textwrap.wrap(bullet_text, width=80) or [""]
                for idx, wrapped in enumerate(wrapped_lines):
                    prefix = "• " if idx == 0 else "  "
                    c.drawString(margin, y_position, f"{prefix}{wrapped}")
                    y_position -= 14
                    if y_position < margin:
                        new_page()
                continue
            # Body text - Justified approximation (ReportLab native justification requires Paragraph styles, defaulting to wrap)
            wrapped_lines = textwrap.wrap(stripped, width=90) or [""]
            for wrapped in wrapped_lines:
                c.drawString(margin, y_position, wrapped)
                y_position -= 14
                if y_position < margin:
                    new_page()
        c.save()
        return output_path
--- a/latex/pipeline_resumen_latex.md
+++ b/latex/pipeline_resumen_latex.md
@@ -0,0 +1,207 @@
 # Pipeline de Generación de Resúmenes Matemáticos (LaTeX -> PDF)
 Este documento contiene un script genérico en Python diseñado para integrarse en pipelines de automatización (GitHub Actions, Jenkins, GitLab CI). El script toma un archivo de texto plano, genera un resumen académico con fórmulas matemáticas usando LLMs (MiniMax, GLM, Gemini) y lo compila a PDF preservando la notación LaTeX.
 ## 1. Requisitos del Sistema
 El entorno donde se ejecute este script debe tener instalado:
 - **Python 3.8+**
 - **Pandoc** (para conversión de documentos)
 - **PDFLaTeX** (generalmente parte de TexLive, para renderizar fórmulas)
 ### Instalación en Debian/Ubuntu (Docker o CI)
 ```bash
 apt-get update && apt-get install -y pandoc texlive-latex-base texlive-fonts-recommended python3-pip
 pip install requests
 ```
 ## 2. Script Genérico (`math_summary.py`)
 Guarda el siguiente código como `math_summary.py`. Este script es agnóstico al proveedor y se configura mediante argumentos o variables de entorno.
 ```python
 #!/usr/bin/env python3
 import os
 import sys
 import argparse
 import subprocess
 import requests
 import json
 # Configuración de Modelos
 PROVIDERS = {
    "minimax": {
        "url": "https://api.minimax.io/anthropic/v1/messages",
        "model": "MiniMax-M2",
        "header_key": "x-api-key",
        "version_header": {"anthropic-version": "2023-06-01"},
        "env_var": "MINIMAX_API_KEY"
    },
    "glm": {
        "url": "https://api.z.ai/api/anthropic/v1/messages",
        "model": "glm-4.7",
        "header_key": "x-api-key",
        "version_header": {"anthropic-version": "2023-06-01"},
        "env_var": "GLM_API_KEY"
    }
 }
 PROMPT_SYSTEM = """
 Eres un asistente académico experto en matemáticas y economía.
 Tu tarea es resumir el texto proporcionado manteniendo el rigor científico.
 REGLAS DE FORMATO (CRÍTICO):
 1. La salida debe ser Markdown válido.
 2. TODAS las fórmulas matemáticas deben estar en formato LaTeX.
 3. Usa bloques $$ ... $$ para ecuaciones centradas importantes.
 4. Usa $ ... $ para ecuaciones en línea.
 5. NO uses bloques de código (```latex) para las fórmulas, úsalas directamente en el texto para que Pandoc las renderice.
 6. Incluye una sección de 'Conceptos Matemáticos' con las fórmulas desglosadas.
 """
 def get_api_key(provider):
    env_var = PROVIDERS[provider]["env_var"]
    key = os.getenv(env_var)
    if not key:
        print(f"Error: La variable de entorno {env_var} no está definida.")
        sys.exit(1)
    return key
 def call_llm(provider, text, api_key):
    print(f"--- Contactando API: {provider.upper()} ---")
    config = PROVIDERS[provider]
    headers = {
        "Content-Type": "application/json",
        config["header_key"]: api_key,
    }
    if "version_header" in config:
        headers.update(config["version_header"])
    payload = {
        "model": config["model"],
        "max_tokens": 4096,
        "messages": [
            {"role": "user", "content": f"{PROMPT_SYSTEM}\n\nTEXTO A RESUMIR:\n{text}"}
        ]
    }
    try:
        resp = requests.post(config["url"], json=payload, headers=headers, timeout=120)
        resp.raise_for_status()
        data = resp.json()
        # Manejo específico para MiniMax que puede devolver bloques de "thinking"
        content = ""
        for part in data.get("content", []):
            if part.get("type") == "text":
                content += part.get("text", "")
        # Fallback si no hay tipo explícito (GLM estándar)
        if not content and data.get("content"):
            if isinstance(data["content"], list):
                    content = data["content"][0].get("text", "")
        return content
    except Exception as e:
        print(f"Error llamando a {provider}: {e}")
        return None
 def convert_to_pdf(markdown_content, output_file):
    base_name = os.path.splitext(output_file)[0]
    md_file = f"{base_name}.md"
    with open(md_file, "w", encoding="utf-8") as f:
        f.write(markdown_content)
    print(f"--- Generando PDF: {output_file} ---")
    cmd = [
        "pandoc", md_file,
        "-o", output_file,
        "--pdf-engine=pdflatex",
        "-V", "geometry:margin=2.5cm",
        "-V", "fontsize=12pt",
        "--highlight-style=tango"
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode == 0:
        print("Éxito: PDF generado correctamente.")
        return True
    else:
        print("Error en Pandoc:")
        print(result.stderr)
        return False
 def main():
    parser = argparse.ArgumentParser(description="Generador de Resúmenes Matemáticos PDF")
    parser.add_argument("input_file", help="Ruta al archivo de texto (.txt) fuente")
    parser.add_argument("--provider", choices=["minimax", "glm"], default="glm", help="Proveedor de IA a usar")
    parser.add_argument("--output", default="resumen_output.pdf", help="Nombre del archivo PDF de salida")
    args = parser.parse_args()
    if not os.path.exists(args.input_file):
        print(f"Error: No se encuentra el archivo {args.input_file}")
        sys.exit(1)
    with open(args.input_file, "r", encoding="utf-8") as f:
        text_content = f.read()
    api_key = get_api_key(args.provider)
    summary_md = call_llm(args.provider, text_content, api_key)
    if summary_md:
        convert_to_pdf(summary_md, args.output)
    else:
        print("Fallo en la generación del resumen.")
        sys.exit(1)
 if __name__ == "__main__":
    main()
 ```
 ## 3. Ejemplo de Uso en Pipeline
 ### Ejecución Local
 ```bash
 export GLM_API_KEY="tu_api_key_aqui"
 python3 math_summary.py entrada.txt --provider glm --output reporte_final.pdf
 ```
 ### GitHub Actions (Ejemplo .yaml)
 Este paso automatizaría la creación del PDF cada vez que se sube un .txt a la carpeta `docs/`.
 ```yaml
 name: Generar PDF Matemático
 on:
  push:
    paths:
      - 'docs/*.txt'
 jobs:
  build-pdf:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      - name: Instalar dependencias
        run: |
          sudo apt-get update
          sudo apt-get install -y pandoc texlive-latex-base texlive-fonts-recommended
          pip install requests
      - name: Generar Resumen
        env:
          GLM_API_KEY: ${{ secrets.GLM_API_KEY }}
        run: |
          python3 math_summary.py docs/archivo.txt --provider glm --output docs/resumen.pdf
      - name: Subir Artefacto
        uses: actions/upload-artifact@v3
        with:
          name: PDF-Resumen
          path: docs/resumen.pdf
 ```
--- a/services/ai/claude_provider.py
+++ b/services/ai/claude_provider.py
@@ -1,6 +1,7 @@
 """
 Claude AI Provider implementation
 """
 import logging
 import subprocess
 import shutil
@@ -30,11 +31,25 @@ class ClaudeProvider(AIProvider):
    def _get_env(self) -> Dict[str, str]:
        """Get environment variables for Claude"""
-        env = {
+        # Load all user environment variables first
-            'ANTHROPIC_AUTH_TOKEN': self._token,
+        import os
-            'ANTHROPIC_BASE_URL': self._base_url,
+
-            'PYTHONUNBUFFERED': '1'
+        env = os.environ.copy()
-        }
+
        # Override with our specific settings if available
        if self._token:
            env["ANTHROPIC_AUTH_TOKEN"] = self._token
        if self._base_url:
            env["ANTHROPIC_BASE_URL"] = self._base_url
        # Add critical flags
        env["PYTHONUNBUFFERED"] = "1"
        # Ensure model variables are picked up from env (already in os.environ)
        # but if we had explicit settings for them, we'd set them here.
        # Since we put them in .env and loaded via load_dotenv -> os.environ,
        # simply copying os.environ is sufficient.
        return env
    def _run_cli(self, prompt: str, timeout: int = 300) -> str:
@@ -51,7 +66,7 @@ class ClaudeProvider(AIProvider):
                text=True,
                capture_output=True,
                timeout=timeout,
-                shell=False
+                shell=False,
            )
            if process.returncode != 0:
@@ -84,7 +99,12 @@ Return only the corrected text, nothing else."""
    def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
        """Classify content using Claude"""
-        categories = ["historia", "analisis_contable", "instituciones_gobierno", "otras_clases"]
+        categories = [
            "historia",
            "analisis_contable",
            "instituciones_gobierno",
            "otras_clases",
        ]
        prompt = f"""Classify the following text into one of these categories:
 - historia
@@ -101,11 +121,7 @@ Return only the category name, nothing else."""
        if result not in categories:
            result = "otras_clases"
-        return {
+        return {"category": result, "confidence": 0.9, "provider": self.name}
            "category": result,
            "confidence": 0.9,
            "provider": self.name
        }
    def generate_text(self, prompt: str, **kwargs) -> str:
        """Generate text using Claude"""
--- a/services/ai/provider_factory.py
+++ b/services/ai/provider_factory.py
@@ -1,6 +1,7 @@
 """
 AI Provider Factory (Factory Pattern)
 """
 import logging
 from typing import Dict, Type
@@ -16,11 +17,11 @@ class AIProviderFactory:
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._providers: Dict[str, AIProvider] = {
-            'claude': ClaudeProvider(),
+            "claude": ClaudeProvider(),
-            'gemini': GeminiProvider()
+            "gemini": GeminiProvider(),
        }
-    def get_provider(self, preferred: str = 'gemini') -> AIProvider:
+    def get_provider(self, preferred: str = "gemini") -> AIProvider:
        """Get available provider with fallback"""
        # Try preferred provider first
        if preferred in self._providers:
@@ -46,8 +47,8 @@ class AIProviderFactory:
        }
    def get_best_provider(self) -> AIProvider:
-        """Get the best available provider (Gemini > Claude)"""
+        """Get the best available provider (Claude > Gemini)"""
-        return self.get_provider('gemini')
+        return self.get_provider("claude")
 # Global instance