Restore full pipeline: 3-step summarization, formatting, PDF/DOCX generation

This commit is contained in:
2026-01-09 17:01:22 -03:00
parent b017504c52
commit e6a01d08d4
20 changed files with 260 additions and 43 deletions

6
.gitignore vendored
View File

@@ -65,3 +65,9 @@ cbc-main.pid
ehthumbs.db
Thumbs.db
.aider*
# Temporary files from restoration
old/
imperio/
check_models.py
compare_configs.py

View File

@@ -8,10 +8,10 @@ from typing import Dict, Any, List
from flask import Flask, render_template, request, jsonify, send_from_directory
from flask_cors import CORS
from ..config import settings
from ..storage import processed_registry
from ..services.webdav_service import webdav_service
from ..services import vram_manager
from config import settings
from storage import processed_registry
from services.webdav_service import webdav_service
from services import vram_manager
def create_app() -> Flask:

View File

@@ -61,8 +61,8 @@ class Settings:
# Gemini
GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
GEMINI_FLASH_MODEL: Optional[str] = os.getenv("GEMINI_FLASH_MODEL")
GEMINI_PRO_MODEL: Optional[str] = os.getenv("GEMINI_PRO_MODEL")
GEMINI_FLASH_MODEL: str = os.getenv("GEMINI_FLASH_MODEL", "gemini-2.5-flash")
GEMINI_PRO_MODEL: str = os.getenv("GEMINI_PRO_MODEL", "gemini-1.5-pro")
# CLI paths
GEMINI_CLI_PATH: Optional[str] = os.getenv("GEMINI_CLI_PATH")

View File

@@ -10,6 +10,7 @@ from .exceptions import (
FileProcessingError
)
from .result import Result
from .base_service import BaseService
__all__ = [
'ProcessingError',
@@ -17,5 +18,6 @@ __all__ = [
'AIProcessingError',
'ConfigurationError',
'FileProcessingError',
'Result'
'Result',
'BaseService'
]

View File

@@ -5,9 +5,9 @@ import logging
import re
from pathlib import Path
from typing import Dict, Any, List, Tuple
from ..core import FileProcessingError
from ..config import settings
from ..services.ai import ai_provider_factory
from core import FileProcessingError
from config import settings
from services.ai import ai_provider_factory
class DocumentGenerator:
@@ -22,8 +22,79 @@ class DocumentGenerator:
self.logger.info(f"Generating summary for {base_name}")
try:
# Generate summary
summary = self.ai_provider.summarize(text)
# Step 1: Generate Bullet Points (Chunking handled by provider or single prompt for now)
# Note: We use the main provider (Claude/Zai) for content generation
self.logger.info("Generating bullet points...")
bullet_prompt = f"""Analiza el siguiente texto y extrae entre 5 y 8 bullet points clave en español.
REGLAS ESTRICTAS:
1. Devuelve ÚNICAMENTE bullet points, cada línea iniciando con "- "
2. Cada bullet debe ser conciso (12-20 palabras) y resaltar datos, fechas, conceptos o conclusiones importantes
3. NO agregues introducciones, conclusiones ni texto explicativo
4. Concéntrate en los puntos más importantes del texto
5. Incluye fechas, datos específicos y nombres relevantes si los hay
Texto:
{text[:15000]}""" # Truncate to avoid context limits if necessary, though providers handle it differently
try:
bullet_points = self.ai_provider.generate_text(bullet_prompt)
self.logger.info(f"Bullet points generated: {len(bullet_points)}")
except Exception as e:
self.logger.warning(f"Bullet point generation failed: {e}")
bullet_points = "- Puntos clave no disponibles por error en IA"
# Step 2: Generate Unified Summary
self.logger.info("Generating unified summary...")
summary_prompt = f"""Eres un profesor universitario experto en historia del siglo XX. Redacta un resumen académico integrado en español usando el texto y los bullet points extraídos.
REQUISITOS ESTRICTOS:
- Extensión entre 500-700 palabras
- Usa encabezados Markdown con jerarquía clara (##, ###)
- Desarrolla los puntos clave con profundidad y contexto histórico
- Mantén un tono académico y analítico
- Incluye conclusiones significativas
- NO agregues texto fuera del resumen
- Devuelve únicamente el resumen en formato Markdown
Contenido a resumir:
{text[:20000]}
Puntos clave a incluir obligatoriamente:
{bullet_points}"""
try:
raw_summary = self.ai_provider.generate_text(summary_prompt)
except Exception as e:
self.logger.error(f"Raw summary generation failed: {e}")
raise e
# Step 3: Format with Gemini (using GeminiProvider explicitly)
self.logger.info("Formatting summary with Gemini...")
format_prompt = f"""Revisa y mejora el siguiente resumen en Markdown para que sea perfectamente legible:
{raw_summary}
Instrucciones:
- Corrige cualquier error de formato
- Asegúrate de que los encabezados estén bien espaciados
- Verifica que las viñetas usen "- " correctamente
- Mantén exactamente el contenido existente
- Devuelve únicamente el resumen formateado sin texto adicional"""
# Use generic Gemini provider for formatting as requested
from services.ai.gemini_provider import GeminiProvider
formatter = GeminiProvider()
try:
if formatter.is_available():
summary = formatter.generate_text(format_prompt)
else:
self.logger.warning("Gemini formatter not available, using raw summary")
summary = raw_summary
except Exception as e:
self.logger.warning(f"Formatting failed ({e}), using raw summary")
summary = raw_summary
# Generate filename
filename = self._generate_filename(text, summary)
@@ -45,7 +116,7 @@ class DocumentGenerator:
return True, summary, metadata
except Exception as e:
self.logger.error(f"Summary generation failed: {e}")
self.logger.error(f"Document generation process failed: {e}")
return False, "", {}
def _generate_filename(self, text: str, summary: str) -> str:

90
main.py
View File

@@ -276,7 +276,95 @@ def run_main_loop() -> None:
for file_path in audio_files:
if any(file_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
if not processed_registry.is_processed(file_path):
audio_processor.process(file_path)
from pathlib import Path
from urllib.parse import unquote
from document.generators import DocumentGenerator
from services.telegram_service import telegram_service
local_filename = unquote(Path(file_path).name)
base_name = Path(local_filename).stem
local_path = settings.LOCAL_DOWNLOADS_PATH / local_filename
settings.LOCAL_DOWNLOADS_PATH.mkdir(parents=True, exist_ok=True)
# Step 1: Notify and download
telegram_service.send_message(
f"🎵 Nuevo audio detectado: {local_filename}\n"
f"⬇️ Descargando..."
)
logger.info(f"Downloading audio: {file_path} -> {local_path}")
webdav_service.download(file_path, local_path)
# Step 2: Transcribe
telegram_service.send_message(f"📝 Transcribiendo audio con Whisper...")
result = audio_processor.process(str(local_path))
if result.get("success") and result.get("transcription_path"):
transcription_file = Path(result["transcription_path"])
transcription_text = result.get("text", "")
# Step 3: Generate AI summary and documents
telegram_service.send_message(f"🤖 Generando resumen con IA...")
doc_generator = DocumentGenerator()
success, summary, output_files = doc_generator.generate_summary(
transcription_text, base_name
)
# Step 4: Upload all files to Nextcloud
if success and output_files:
# Create folders
for folder in [settings.RESUMENES_FOLDER, settings.DOCX_FOLDER]:
try:
webdav_service.makedirs(folder)
except Exception:
pass
# Upload transcription TXT
if transcription_file.exists():
remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
webdav_service.upload(transcription_file, remote_txt)
logger.info(f"Uploaded: {remote_txt}")
# Upload DOCX
docx_path = Path(output_files.get('docx_path', ''))
if docx_path.exists():
remote_docx = f"{settings.DOCX_FOLDER}/{docx_path.name}"
webdav_service.upload(docx_path, remote_docx)
logger.info(f"Uploaded: {remote_docx}")
# Upload PDF
pdf_path = Path(output_files.get('pdf_path', ''))
if pdf_path.exists():
remote_pdf = f"{settings.DOCX_FOLDER}/{pdf_path.name}"
webdav_service.upload(pdf_path, remote_pdf)
logger.info(f"Uploaded: {remote_pdf}")
# Upload Markdown
md_path = Path(output_files.get('markdown_path', ''))
if md_path.exists():
remote_md = f"{settings.RESUMENES_FOLDER}/{md_path.name}"
webdav_service.upload(md_path, remote_md)
logger.info(f"Uploaded: {remote_md}")
# Final notification
telegram_service.send_message(
f"✅ Audio procesado: {local_filename}\n"
f"📄 DOCX: {docx_path.name if docx_path.exists() else 'N/A'}\n"
f"📑 PDF: {pdf_path.name if pdf_path.exists() else 'N/A'}\n"
f"☁️ Subido a Nextcloud"
)
else:
# Just upload transcription if summary failed
if transcription_file.exists():
try:
webdav_service.makedirs(settings.RESUMENES_FOLDER)
except Exception:
pass
remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
webdav_service.upload(transcription_file, remote_txt)
telegram_service.send_message(
f"⚠️ Resumen fallido, solo transcripción subida:\n{transcription_file.name}"
)
processed_registry.save(file_path)
except Exception as e:
logger.exception(f"Error processing audio: {e}")

View File

@@ -4,10 +4,10 @@ Audio file processor using Whisper
import logging
from pathlib import Path
from typing import Dict, Any
from ..core import FileProcessingError
from ..config import settings
from ..services import vram_manager
from ..services.gpu_detector import gpu_detector
from core import FileProcessingError
from config import settings
from services import vram_manager
from services.gpu_detector import gpu_detector
from .base_processor import FileProcessor
try:

View File

@@ -4,7 +4,7 @@ Base File Processor (Strategy Pattern)
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, Any, Optional
from ..core import FileProcessingError
from core import FileProcessingError
class FileProcessor(ABC):

View File

@@ -5,10 +5,10 @@ import logging
from pathlib import Path
from typing import Dict, Any
from concurrent.futures import ThreadPoolExecutor, as_completed
from ..core import FileProcessingError
from ..config import settings
from ..services import vram_manager
from ..services.gpu_detector import gpu_detector
from core import FileProcessingError
from config import settings
from services import vram_manager
from services.gpu_detector import gpu_detector
from .base_processor import FileProcessor
try:
@@ -22,6 +22,11 @@ try:
PDF_OCR_AVAILABLE = True
except ImportError:
PDF_OCR_AVAILABLE = False
# Provide stub for type hints
try:
from PIL import Image
except ImportError:
Image = None # type: ignore
class PDFProcessor(FileProcessor):

View File

@@ -4,8 +4,8 @@ Text file processor
import logging
from pathlib import Path
from typing import Dict, Any
from ..core import FileProcessingError
from ..config import settings
from core import FileProcessingError
from config import settings
from .base_processor import FileProcessor

View File

@@ -5,11 +5,16 @@ AI Providers package for CBCFacil
from .base_provider import AIProvider
from .claude_provider import ClaudeProvider
from .gemini_provider import GeminiProvider
from .provider_factory import AIProviderFactory
from .provider_factory import AIProviderFactory, ai_provider_factory
# Alias for backwards compatibility
ai_service = ai_provider_factory
__all__ = [
'AIProvider',
'ClaudeProvider',
'GeminiProvider',
'AIProviderFactory'
'AIProviderFactory',
'ai_provider_factory',
'ai_service'
]

View File

@@ -23,6 +23,11 @@ class AIProvider(ABC):
"""Classify content into categories"""
pass
@abstractmethod
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text from prompt"""
pass
@abstractmethod
def is_available(self) -> bool:
"""Check if provider is available and configured"""

View File

@@ -6,8 +6,8 @@ import subprocess
import shutil
from typing import Dict, Any, Optional
from ..config import settings
from ..core import AIProcessingError
from config import settings
from core import AIProcessingError
from .base_provider import AIProvider
@@ -106,3 +106,7 @@ Return only the category name, nothing else."""
"confidence": 0.9,
"provider": self.name
}
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text using Claude"""
return self._run_cli(prompt)

View File

@@ -9,8 +9,8 @@ import time
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
from ..config import settings
from ..core import AIProcessingError
from config import settings
from core import AIProcessingError
from .base_provider import AIProvider
@@ -90,6 +90,7 @@ class GeminiProvider(AIProvider):
def __init__(self):
super().__init__()
self.logger = logging.getLogger(__name__)
self._cli_path = settings.GEMINI_CLI_PATH or shutil.which("gemini")
self._api_key = settings.GEMINI_API_KEY
self._flash_model = settings.GEMINI_FLASH_MODEL
@@ -104,6 +105,14 @@ class GeminiProvider(AIProvider):
"exponential_base": 2
}
@property
def name(self) -> str:
return "Gemini"
def is_available(self) -> bool:
"""Check if Gemini CLI or API is available"""
return bool(self._cli_path or self._api_key)
def _init_session(self) -> None:
"""Initialize HTTP session with connection pooling"""
if self._session is None:
@@ -275,6 +284,13 @@ Return only the category name, nothing else."""
"confidence": 0.9,
"provider": self.name
}
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text using Gemini"""
use_flash = kwargs.get('use_flash', True)
if self._api_key:
return self._call_api(prompt, use_flash=use_flash)
return self._call_cli(prompt, use_yolo=True)
def get_stats(self) -> Dict[str, Any]:
"""Get provider statistics"""

View File

@@ -4,7 +4,7 @@ AI Provider Factory (Factory Pattern)
import logging
from typing import Dict, Type
from ..core import AIProcessingError
from core import AIProcessingError
from .base_provider import AIProvider
from .claude_provider import ClaudeProvider
from .gemini_provider import GeminiProvider

View File

@@ -7,8 +7,8 @@ import time
from typing import Optional, Dict, Any
from threading import Lock
from ..config import settings
from ..core import AIProcessingError
from config import settings
from core import AIProcessingError
from .ai.provider_factory import AIProviderFactory, ai_provider_factory

View File

@@ -5,7 +5,7 @@ import logging
import time
from typing import Optional
from datetime import datetime
from ..config import settings
from config import settings
try:
import requests

View File

@@ -7,8 +7,8 @@ import os
import time
from datetime import datetime, timedelta
from typing import Optional, Dict, Any
from ..core import BaseService
from ..config import settings
from core import BaseService
from config import settings
try:
import torch

View File

@@ -13,8 +13,8 @@ import requests
from requests.auth import HTTPBasicAuth
from requests.adapters import HTTPAdapter
from ..config import settings
from ..core import WebDAVError
from config import settings
from core import WebDAVError
class WebDAVService:
@@ -112,16 +112,31 @@ class WebDAVService:
files = []
try:
import xml.etree.ElementTree as ET
from urllib.parse import urlparse, unquote
root = ET.fromstring(xml_response)
# Get the WebDAV path from settings
parsed_url = urlparse(settings.NEXTCLOUD_URL)
webdav_path = parsed_url.path.rstrip('/') # e.g. /remote.php/webdav
# Find all href elements
for href in root.findall('.//{DAV:}href'):
href_text = href.text or ""
href_text = unquote(href_text) # Decode URL encoding
# Remove base URL from href
base_url = settings.NEXTCLOUD_URL.rstrip('/')
if href_text.startswith(base_url):
href_text = href_text[len(base_url):]
files.append(href_text.lstrip('/'))
# Also strip the webdav path if it's there
if href_text.startswith(webdav_path):
href_text = href_text[len(webdav_path):]
# Clean up the path
href_text = href_text.lstrip('/')
if href_text: # Skip empty paths (root directory)
files.append(href_text)
except Exception as e:
self.logger.error(f"Error parsing PROPFIND response: {e}")
@@ -178,8 +193,8 @@ class WebDAVService:
self._request('MKCOL', current)
self.logger.debug(f"Created directory: {current}")
except WebDAVError as e:
# Directory might already exist (409 Conflict is OK)
if '409' not in str(e):
# Directory might already exist (409 Conflict or 405 MethodNotAllowed is OK)
if '409' not in str(e) and '405' not in str(e):
raise
def delete(self, remote_path: str) -> None:

View File

@@ -7,7 +7,7 @@ import time
from pathlib import Path
from typing import Set, Optional
from datetime import datetime, timedelta
from ..config import settings
from config import settings
class BloomFilter: