Cambios principales: ## Nuevos archivos - services/ai/parallel_provider.py: Ejecución paralela de múltiples proveedores AI - services/ai/prompt_manager.py: Gestión centralizada de prompts (resumen.md como fuente) - latex/resumen.md: Template del prompt para resúmenes académicos LaTeX ## Mejoras en generación LaTeX (document/generators.py) - Nueva función _sanitize_latex(): Corrige automáticamente errores comunes de AI - Agrega align=center a nodos TikZ con saltos de línea (\\) - Previene errores 'Not allowed in LR mode' antes de compilar - Soporte para procesamiento paralelo de proveedores AI - Conversión DOCX en paralelo con generación PDF - Uploads a Notion en background (non-blocking) - Callbacks de notificación para progreso en Telegram ## Mejoras en proveedores AI - claude_provider.py: fix_latex() con instrucciones específicas para errores TikZ - gemini_provider.py: fix_latex() mejorado + rate limiting + circuit breaker - provider_factory.py: Soporte para parallel provider ## Otros cambios - config/settings.py: Nuevas configuraciones para Gemini models - services/webdav_service.py: Mejoras en manejo de conexión - .gitignore: Ignora archivos LaTeX auxiliares (.aux, .toc, .out, .pdf) ## Archivos de ejemplo - latex/imperio_romano.tex, latex/clase_revolucion_rusa_crisis_30.tex - resumen_curiosidades.tex (corregido y compilado exitosamente)
344 lines
11 KiB
Python
344 lines
11 KiB
Python
"""
|
|
Prompt Manager - Centralized prompt management using resumen.md as source of truth
|
|
"""
|
|
|
|
import re
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Optional, Dict, Any
|
|
from config import settings
|
|
|
|
|
|
class PromptManager:
|
|
"""
|
|
Manages prompts for AI services, loading templates from latex/resumen.md
|
|
This is the SINGLE SOURCE OF TRUTH for academic summary generation.
|
|
"""
|
|
|
|
_instance = None
|
|
_prompt_cache: Optional[str] = None
|
|
_latex_preamble_cache: Optional[str] = None
|
|
|
|
# Path to the prompt template file
|
|
PROMPT_FILE_PATH = Path("latex/resumen.md")
|
|
|
|
def __new__(cls):
|
|
if cls._instance is None:
|
|
cls._instance = super(PromptManager, cls).__new__(cls)
|
|
return cls._instance
|
|
|
|
def _load_prompt_template(self) -> str:
|
|
"""Load the complete prompt template from resumen.md"""
|
|
if self._prompt_cache:
|
|
return self._prompt_cache
|
|
|
|
try:
|
|
file_path = self.PROMPT_FILE_PATH.resolve()
|
|
|
|
if not file_path.exists():
|
|
self._prompt_cache = self._get_fallback_prompt()
|
|
return self._prompt_cache
|
|
|
|
content = file_path.read_text(encoding="utf-8")
|
|
|
|
# The file has a markdown code block after "## Prompt Template"
|
|
# We need to find the content from "## Prompt Template" to the LAST ```
|
|
# (because there's a ```latex...``` block INSIDE the template)
|
|
|
|
# First, find where "## Prompt Template" starts
|
|
template_start = content.find("## Prompt Template")
|
|
if template_start == -1:
|
|
self._prompt_cache = self._get_fallback_prompt()
|
|
return self._prompt_cache
|
|
|
|
# Find the opening ``` after the header
|
|
after_header = content[template_start:]
|
|
code_block_start = after_header.find("```")
|
|
if code_block_start == -1:
|
|
self._prompt_cache = self._get_fallback_prompt()
|
|
return self._prompt_cache
|
|
|
|
# Skip the opening ``` and any language specifier
|
|
after_code_start = after_header[code_block_start + 3:]
|
|
first_newline = after_code_start.find("\n")
|
|
if first_newline != -1:
|
|
actual_content_start = template_start + code_block_start + 3 + first_newline + 1
|
|
else:
|
|
actual_content_start = template_start + code_block_start + 3
|
|
|
|
# Now find the LAST ``` that closes the main block
|
|
# We look for ``` followed by optional space and then newline or end
|
|
remaining = content[actual_content_start:]
|
|
|
|
# Find all positions of ``` in the remaining content
|
|
positions = []
|
|
pos = 0
|
|
while True:
|
|
found = remaining.find("```", pos)
|
|
if found == -1:
|
|
break
|
|
positions.append(found)
|
|
pos = found + 3
|
|
|
|
if not positions:
|
|
self._prompt_cache = self._get_fallback_prompt()
|
|
return self._prompt_cache
|
|
|
|
# The LAST ``` is the closing of the main block
|
|
# (all previous ``` are the latex block inside the template)
|
|
last_backtick_pos = positions[-1]
|
|
|
|
# Extract the content
|
|
template_content = content[actual_content_start:actual_content_start + last_backtick_pos]
|
|
|
|
# Remove leading newline if present
|
|
template_content = template_content.lstrip("\n")
|
|
|
|
self._prompt_cache = template_content
|
|
return self._prompt_cache
|
|
|
|
except Exception as e:
|
|
print(f"Error loading prompt file: {e}")
|
|
self._prompt_cache = self._get_fallback_prompt()
|
|
return self._prompt_cache
|
|
|
|
def _get_fallback_prompt(self) -> str:
|
|
"""Fallback prompt if resumen.md is not found"""
|
|
return """Sos un asistente académico experto. Creá un resumen extenso en LaTeX basado en la transcripción de clase.
|
|
|
|
## Transcripción de clase:
|
|
[PEGAR TRANSCRIPCIÓN AQUÍ]
|
|
|
|
## Material bibliográfico:
|
|
[PEGAR TEXTO DEL LIBRO/APUNTE O INDICAR QUE LO SUBISTE COMO ARCHIVO]
|
|
|
|
Generá un archivo LaTeX completo con:
|
|
- Estructura académica formal
|
|
- Mínimo 10 páginas de contenido
|
|
- Fórmulas matemáticas en LaTeX
|
|
- Tablas y diagramas cuando corresponda
|
|
"""
|
|
|
|
def _load_latex_preamble(self) -> str:
|
|
"""Extract the LaTeX preamble from resumen.md"""
|
|
if self._latex_preamble_cache:
|
|
return self._latex_preamble_cache
|
|
|
|
try:
|
|
file_path = self.PROMPT_FILE_PATH.resolve()
|
|
|
|
if not file_path.exists():
|
|
return self._get_default_preamble()
|
|
|
|
content = file_path.read_text(encoding="utf-8")
|
|
|
|
# Extract LaTeX code block in the template
|
|
match = re.search(
|
|
r"```latex\s*\n([\s\S]*?)\n```",
|
|
content
|
|
)
|
|
|
|
if match:
|
|
self._latex_preamble_cache = match.group(1).strip()
|
|
else:
|
|
self._latex_preamble_cache = self._get_default_preamble()
|
|
|
|
return self._latex_preamble_cache
|
|
|
|
except Exception as e:
|
|
print(f"Error loading LaTeX preamble: {e}")
|
|
return self._get_default_preamble()
|
|
|
|
def _get_default_preamble(self) -> str:
|
|
"""Default LaTeX preamble"""
|
|
return r"""\documentclass[11pt,a4paper]{article}
|
|
\usepackage[utf8]{inputenc}
|
|
\usepackage[spanish,provide=*]{babel}
|
|
\usepackage{amsmath,amssymb}
|
|
\usepackage{geometry}
|
|
\usepackage{graphicx}
|
|
\usepackage{tikz}
|
|
\usetikzlibrary{arrows.meta,positioning,shapes.geometric,calc}
|
|
\usepackage{booktabs}
|
|
\usepackage{enumitem}
|
|
\usepackage{fancyhdr}
|
|
\usepackage{titlesec}
|
|
\usepackage{tcolorbox}
|
|
\usepackage{array}
|
|
\usepackage{multirow}
|
|
|
|
\geometry{margin=2.5cm}
|
|
\pagestyle{fancy}
|
|
\fancyhf{}
|
|
\fancyhead[L]{[MATERIA] - CBC}
|
|
\fancyhead[R]{Clase [N]}
|
|
\fancyfoot[C]{\thepage}
|
|
|
|
% Cajas para destacar contenido
|
|
\newtcolorbox{definicion}[1][]{
|
|
colback=blue!5!white,
|
|
colframe=blue!75!black,
|
|
fonttitle=\bfseries,
|
|
title=#1
|
|
}
|
|
|
|
\newtcolorbox{importante}[1][]{
|
|
colback=red!5!white,
|
|
colframe=red!75!black,
|
|
fonttitle=\bfseries,
|
|
title=#1
|
|
}
|
|
|
|
\newtcolorbox{ejemplo}[1][]{
|
|
colback=green!5!white,
|
|
colframe=green!50!black,
|
|
fonttitle=\bfseries,
|
|
title=#1
|
|
}
|
|
"""
|
|
|
|
def get_latex_summary_prompt(
|
|
self,
|
|
transcription: str,
|
|
materia: str = "Economía",
|
|
bibliographic_text: Optional[str] = None,
|
|
class_number: Optional[int] = None
|
|
) -> str:
|
|
"""
|
|
Generate the complete prompt for LaTeX academic summary based on resumen.md template.
|
|
|
|
Args:
|
|
transcription: The class transcription text
|
|
materia: Subject name (default: "Economía")
|
|
bibliographic_text: Optional supporting text from books/notes
|
|
class_number: Optional class number for header
|
|
|
|
Returns:
|
|
Complete prompt string ready to send to AI
|
|
"""
|
|
template = self._load_prompt_template()
|
|
|
|
# CRITICAL: Prepend explicit instructions to force direct LaTeX generation
|
|
# (This doesn't modify resumen.md, just adds context before it)
|
|
explicit_instructions = """CRITICAL: Tu respuesta debe ser ÚNICAMENTE código LaTeX.
|
|
|
|
INSTRUCCIONES OBLIGATORIAS:
|
|
1. NO incluyas explicaciones previas
|
|
2. NO describas lo que vas a hacer
|
|
3. Comienza INMEDIATAMENTE con \\documentclass
|
|
4. Tu respuesta debe ser SOLO el código LaTeX fuente
|
|
5. Termina con \\end{document}
|
|
|
|
---
|
|
|
|
"""
|
|
|
|
prompt = explicit_instructions + template
|
|
|
|
# Replace placeholders
|
|
prompt = prompt.replace("[MATERIA]", materia)
|
|
|
|
# Insert transcription
|
|
if "[PEGAR TRANSCRIPCIÓN AQUÍ]" in prompt:
|
|
prompt = prompt.replace("[PEGAR TRANSCRIPCIÓN AQUÍ]", transcription)
|
|
else:
|
|
prompt += f"\n\n## Transcripción de clase:\n{transcription}"
|
|
|
|
# Insert bibliographic material
|
|
bib_text = bibliographic_text or "No se proporcionó material bibliográfico adicional."
|
|
if "[PEGAR TEXTO DEL LIBRO/APUNTE O INDICAR QUE LO SUBISTE COMO ARCHIVO]" in prompt:
|
|
prompt = prompt.replace(
|
|
"[PEGAR TEXTO DEL LIBRO/APUNTE O INDICAR QUE LO SUBISTE COMO ARCHIVO]",
|
|
bib_text
|
|
)
|
|
else:
|
|
prompt += f"\n\n## Material bibliográfico:\n{bib_text}"
|
|
|
|
# Add class number if provided
|
|
if class_number is not None:
|
|
prompt = prompt.replace("[N]", str(class_number))
|
|
|
|
return prompt
|
|
|
|
def get_latex_preamble(
|
|
self,
|
|
materia: str = "Economía",
|
|
class_number: Optional[int] = None
|
|
) -> str:
|
|
"""
|
|
Get the LaTeX preamble with placeholders replaced.
|
|
|
|
Args:
|
|
materia: Subject name
|
|
class_number: Optional class number
|
|
|
|
Returns:
|
|
Complete LaTeX preamble as string
|
|
"""
|
|
preamble = self._load_latex_preamble()
|
|
|
|
# Replace placeholders
|
|
preamble = preamble.replace("[MATERIA]", materia)
|
|
if class_number is not None:
|
|
preamble = preamble.replace("[N]", str(class_number))
|
|
|
|
return preamble
|
|
|
|
def get_latex_fix_prompt(self, latex_code: str, error_log: str) -> str:
|
|
"""Get prompt for fixing broken LaTeX code"""
|
|
return f"""I have a LaTeX file that failed to compile. Please fix the code.
|
|
|
|
COMPILER ERROR LOG:
|
|
{error_log[-3000:]}
|
|
|
|
BROKEN LATEX CODE:
|
|
{latex_code}
|
|
|
|
INSTRUCTIONS:
|
|
1. Analyze the error log to find the specific syntax error.
|
|
2. Fix the LaTeX code.
|
|
3. Return ONLY the full corrected LaTeX code.
|
|
4. Do not include markdown blocks or explanations.
|
|
5. Start immediately with \\documentclass.
|
|
6. Ensure all braces {{}} are properly balanced.
|
|
7. Ensure all environments \\begin{{...}} have matching \\end{{...}}.
|
|
8. Ensure all packages are properly declared.
|
|
"""
|
|
|
|
def extract_latex_from_response(self, response: str) -> Optional[str]:
|
|
"""
|
|
Extract clean LaTeX code from AI response.
|
|
|
|
Handles cases where AI wraps LaTeX in ```latex...``` blocks.
|
|
"""
|
|
if not response:
|
|
return None
|
|
|
|
# Try to find content inside ```latex ... ``` blocks
|
|
code_block_pattern = r"```(?:latex|tex)?\s*([\s\S]*?)\s*```"
|
|
match = re.search(code_block_pattern, response, re.IGNORECASE)
|
|
|
|
if match:
|
|
latex = match.group(1).strip()
|
|
else:
|
|
latex = response.strip()
|
|
|
|
# Verify it looks like LaTeX
|
|
if "\\documentclass" not in latex:
|
|
return None
|
|
|
|
# Clean up: remove anything before \documentclass
|
|
start_idx = latex.find("\\documentclass")
|
|
latex = latex[start_idx:]
|
|
|
|
# Clean up: remove anything after \end{document}
|
|
if "\\end{document}" in latex:
|
|
end_idx = latex.rfind("\\end{document}")
|
|
latex = latex[:end_idx + len("\\end{document}")]
|
|
|
|
return latex.strip()
|
|
|
|
|
|
# Singleton instance for easy import
|
|
prompt_manager = PromptManager()
|