Restore full pipeline: 3-step summarization, formatting, PDF/DOCX generation

This commit is contained in:
2026-01-09 17:01:22 -03:00
parent b017504c52
commit e6a01d08d4
20 changed files with 260 additions and 43 deletions

6
.gitignore vendored
View File

@@ -65,3 +65,9 @@ cbc-main.pid
ehthumbs.db ehthumbs.db
Thumbs.db Thumbs.db
.aider* .aider*
# Temporary files from restoration
old/
imperio/
check_models.py
compare_configs.py

View File

@@ -8,10 +8,10 @@ from typing import Dict, Any, List
from flask import Flask, render_template, request, jsonify, send_from_directory from flask import Flask, render_template, request, jsonify, send_from_directory
from flask_cors import CORS from flask_cors import CORS
from ..config import settings from config import settings
from ..storage import processed_registry from storage import processed_registry
from ..services.webdav_service import webdav_service from services.webdav_service import webdav_service
from ..services import vram_manager from services import vram_manager
def create_app() -> Flask: def create_app() -> Flask:

View File

@@ -61,8 +61,8 @@ class Settings:
# Gemini # Gemini
GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY") GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
GEMINI_FLASH_MODEL: Optional[str] = os.getenv("GEMINI_FLASH_MODEL") GEMINI_FLASH_MODEL: str = os.getenv("GEMINI_FLASH_MODEL", "gemini-2.5-flash")
GEMINI_PRO_MODEL: Optional[str] = os.getenv("GEMINI_PRO_MODEL") GEMINI_PRO_MODEL: str = os.getenv("GEMINI_PRO_MODEL", "gemini-1.5-pro")
# CLI paths # CLI paths
GEMINI_CLI_PATH: Optional[str] = os.getenv("GEMINI_CLI_PATH") GEMINI_CLI_PATH: Optional[str] = os.getenv("GEMINI_CLI_PATH")

View File

@@ -10,6 +10,7 @@ from .exceptions import (
FileProcessingError FileProcessingError
) )
from .result import Result from .result import Result
from .base_service import BaseService
__all__ = [ __all__ = [
'ProcessingError', 'ProcessingError',
@@ -17,5 +18,6 @@ __all__ = [
'AIProcessingError', 'AIProcessingError',
'ConfigurationError', 'ConfigurationError',
'FileProcessingError', 'FileProcessingError',
'Result' 'Result',
'BaseService'
] ]

View File

@@ -5,9 +5,9 @@ import logging
import re import re
from pathlib import Path from pathlib import Path
from typing import Dict, Any, List, Tuple from typing import Dict, Any, List, Tuple
from ..core import FileProcessingError from core import FileProcessingError
from ..config import settings from config import settings
from ..services.ai import ai_provider_factory from services.ai import ai_provider_factory
class DocumentGenerator: class DocumentGenerator:
@@ -22,8 +22,79 @@ class DocumentGenerator:
self.logger.info(f"Generating summary for {base_name}") self.logger.info(f"Generating summary for {base_name}")
try: try:
# Generate summary # Step 1: Generate Bullet Points (Chunking handled by provider or single prompt for now)
summary = self.ai_provider.summarize(text) # Note: We use the main provider (Claude/Zai) for content generation
self.logger.info("Generating bullet points...")
bullet_prompt = f"""Analiza el siguiente texto y extrae entre 5 y 8 bullet points clave en español.
REGLAS ESTRICTAS:
1. Devuelve ÚNICAMENTE bullet points, cada línea iniciando con "- "
2. Cada bullet debe ser conciso (12-20 palabras) y resaltar datos, fechas, conceptos o conclusiones importantes
3. NO agregues introducciones, conclusiones ni texto explicativo
4. Concéntrate en los puntos más importantes del texto
5. Incluye fechas, datos específicos y nombres relevantes si los hay
Texto:
{text[:15000]}""" # Truncate to avoid context limits if necessary, though providers handle it differently
try:
bullet_points = self.ai_provider.generate_text(bullet_prompt)
self.logger.info(f"Bullet points generated: {len(bullet_points)}")
except Exception as e:
self.logger.warning(f"Bullet point generation failed: {e}")
bullet_points = "- Puntos clave no disponibles por error en IA"
# Step 2: Generate Unified Summary
self.logger.info("Generating unified summary...")
summary_prompt = f"""Eres un profesor universitario experto en historia del siglo XX. Redacta un resumen académico integrado en español usando el texto y los bullet points extraídos.
REQUISITOS ESTRICTOS:
- Extensión entre 500-700 palabras
- Usa encabezados Markdown con jerarquía clara (##, ###)
- Desarrolla los puntos clave con profundidad y contexto histórico
- Mantén un tono académico y analítico
- Incluye conclusiones significativas
- NO agregues texto fuera del resumen
- Devuelve únicamente el resumen en formato Markdown
Contenido a resumir:
{text[:20000]}
Puntos clave a incluir obligatoriamente:
{bullet_points}"""
try:
raw_summary = self.ai_provider.generate_text(summary_prompt)
except Exception as e:
self.logger.error(f"Raw summary generation failed: {e}")
raise e
# Step 3: Format with Gemini (using GeminiProvider explicitly)
self.logger.info("Formatting summary with Gemini...")
format_prompt = f"""Revisa y mejora el siguiente resumen en Markdown para que sea perfectamente legible:
{raw_summary}
Instrucciones:
- Corrige cualquier error de formato
- Asegúrate de que los encabezados estén bien espaciados
- Verifica que las viñetas usen "- " correctamente
- Mantén exactamente el contenido existente
- Devuelve únicamente el resumen formateado sin texto adicional"""
# Use generic Gemini provider for formatting as requested
from services.ai.gemini_provider import GeminiProvider
formatter = GeminiProvider()
try:
if formatter.is_available():
summary = formatter.generate_text(format_prompt)
else:
self.logger.warning("Gemini formatter not available, using raw summary")
summary = raw_summary
except Exception as e:
self.logger.warning(f"Formatting failed ({e}), using raw summary")
summary = raw_summary
# Generate filename # Generate filename
filename = self._generate_filename(text, summary) filename = self._generate_filename(text, summary)
@@ -45,7 +116,7 @@ class DocumentGenerator:
return True, summary, metadata return True, summary, metadata
except Exception as e: except Exception as e:
self.logger.error(f"Summary generation failed: {e}") self.logger.error(f"Document generation process failed: {e}")
return False, "", {} return False, "", {}
def _generate_filename(self, text: str, summary: str) -> str: def _generate_filename(self, text: str, summary: str) -> str:

90
main.py
View File

@@ -276,7 +276,95 @@ def run_main_loop() -> None:
for file_path in audio_files: for file_path in audio_files:
if any(file_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS): if any(file_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
if not processed_registry.is_processed(file_path): if not processed_registry.is_processed(file_path):
audio_processor.process(file_path) from pathlib import Path
from urllib.parse import unquote
from document.generators import DocumentGenerator
from services.telegram_service import telegram_service
local_filename = unquote(Path(file_path).name)
base_name = Path(local_filename).stem
local_path = settings.LOCAL_DOWNLOADS_PATH / local_filename
settings.LOCAL_DOWNLOADS_PATH.mkdir(parents=True, exist_ok=True)
# Step 1: Notify and download
telegram_service.send_message(
f"🎵 Nuevo audio detectado: {local_filename}\n"
f"⬇️ Descargando..."
)
logger.info(f"Downloading audio: {file_path} -> {local_path}")
webdav_service.download(file_path, local_path)
# Step 2: Transcribe
telegram_service.send_message(f"📝 Transcribiendo audio con Whisper...")
result = audio_processor.process(str(local_path))
if result.get("success") and result.get("transcription_path"):
transcription_file = Path(result["transcription_path"])
transcription_text = result.get("text", "")
# Step 3: Generate AI summary and documents
telegram_service.send_message(f"🤖 Generando resumen con IA...")
doc_generator = DocumentGenerator()
success, summary, output_files = doc_generator.generate_summary(
transcription_text, base_name
)
# Step 4: Upload all files to Nextcloud
if success and output_files:
# Create folders
for folder in [settings.RESUMENES_FOLDER, settings.DOCX_FOLDER]:
try:
webdav_service.makedirs(folder)
except Exception:
pass
# Upload transcription TXT
if transcription_file.exists():
remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
webdav_service.upload(transcription_file, remote_txt)
logger.info(f"Uploaded: {remote_txt}")
# Upload DOCX
docx_path = Path(output_files.get('docx_path', ''))
if docx_path.exists():
remote_docx = f"{settings.DOCX_FOLDER}/{docx_path.name}"
webdav_service.upload(docx_path, remote_docx)
logger.info(f"Uploaded: {remote_docx}")
# Upload PDF
pdf_path = Path(output_files.get('pdf_path', ''))
if pdf_path.exists():
remote_pdf = f"{settings.DOCX_FOLDER}/{pdf_path.name}"
webdav_service.upload(pdf_path, remote_pdf)
logger.info(f"Uploaded: {remote_pdf}")
# Upload Markdown
md_path = Path(output_files.get('markdown_path', ''))
if md_path.exists():
remote_md = f"{settings.RESUMENES_FOLDER}/{md_path.name}"
webdav_service.upload(md_path, remote_md)
logger.info(f"Uploaded: {remote_md}")
# Final notification
telegram_service.send_message(
f"✅ Audio procesado: {local_filename}\n"
f"📄 DOCX: {docx_path.name if docx_path.exists() else 'N/A'}\n"
f"📑 PDF: {pdf_path.name if pdf_path.exists() else 'N/A'}\n"
f"☁️ Subido a Nextcloud"
)
else:
# Just upload transcription if summary failed
if transcription_file.exists():
try:
webdav_service.makedirs(settings.RESUMENES_FOLDER)
except Exception:
pass
remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
webdav_service.upload(transcription_file, remote_txt)
telegram_service.send_message(
f"⚠️ Resumen fallido, solo transcripción subida:\n{transcription_file.name}"
)
processed_registry.save(file_path) processed_registry.save(file_path)
except Exception as e: except Exception as e:
logger.exception(f"Error processing audio: {e}") logger.exception(f"Error processing audio: {e}")

View File

@@ -4,10 +4,10 @@ Audio file processor using Whisper
import logging import logging
from pathlib import Path from pathlib import Path
from typing import Dict, Any from typing import Dict, Any
from ..core import FileProcessingError from core import FileProcessingError
from ..config import settings from config import settings
from ..services import vram_manager from services import vram_manager
from ..services.gpu_detector import gpu_detector from services.gpu_detector import gpu_detector
from .base_processor import FileProcessor from .base_processor import FileProcessor
try: try:

View File

@@ -4,7 +4,7 @@ Base File Processor (Strategy Pattern)
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from pathlib import Path from pathlib import Path
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
from ..core import FileProcessingError from core import FileProcessingError
class FileProcessor(ABC): class FileProcessor(ABC):

View File

@@ -5,10 +5,10 @@ import logging
from pathlib import Path from pathlib import Path
from typing import Dict, Any from typing import Dict, Any
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from ..core import FileProcessingError from core import FileProcessingError
from ..config import settings from config import settings
from ..services import vram_manager from services import vram_manager
from ..services.gpu_detector import gpu_detector from services.gpu_detector import gpu_detector
from .base_processor import FileProcessor from .base_processor import FileProcessor
try: try:
@@ -22,6 +22,11 @@ try:
PDF_OCR_AVAILABLE = True PDF_OCR_AVAILABLE = True
except ImportError: except ImportError:
PDF_OCR_AVAILABLE = False PDF_OCR_AVAILABLE = False
# Provide stub for type hints
try:
from PIL import Image
except ImportError:
Image = None # type: ignore
class PDFProcessor(FileProcessor): class PDFProcessor(FileProcessor):

View File

@@ -4,8 +4,8 @@ Text file processor
import logging import logging
from pathlib import Path from pathlib import Path
from typing import Dict, Any from typing import Dict, Any
from ..core import FileProcessingError from core import FileProcessingError
from ..config import settings from config import settings
from .base_processor import FileProcessor from .base_processor import FileProcessor

View File

@@ -5,11 +5,16 @@ AI Providers package for CBCFacil
from .base_provider import AIProvider from .base_provider import AIProvider
from .claude_provider import ClaudeProvider from .claude_provider import ClaudeProvider
from .gemini_provider import GeminiProvider from .gemini_provider import GeminiProvider
from .provider_factory import AIProviderFactory from .provider_factory import AIProviderFactory, ai_provider_factory
# Alias for backwards compatibility
ai_service = ai_provider_factory
__all__ = [ __all__ = [
'AIProvider', 'AIProvider',
'ClaudeProvider', 'ClaudeProvider',
'GeminiProvider', 'GeminiProvider',
'AIProviderFactory' 'AIProviderFactory',
'ai_provider_factory',
'ai_service'
] ]

View File

@@ -23,6 +23,11 @@ class AIProvider(ABC):
"""Classify content into categories""" """Classify content into categories"""
pass pass
@abstractmethod
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text from prompt"""
pass
@abstractmethod @abstractmethod
def is_available(self) -> bool: def is_available(self) -> bool:
"""Check if provider is available and configured""" """Check if provider is available and configured"""

View File

@@ -6,8 +6,8 @@ import subprocess
import shutil import shutil
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
from ..config import settings from config import settings
from ..core import AIProcessingError from core import AIProcessingError
from .base_provider import AIProvider from .base_provider import AIProvider
@@ -106,3 +106,7 @@ Return only the category name, nothing else."""
"confidence": 0.9, "confidence": 0.9,
"provider": self.name "provider": self.name
} }
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text using Claude"""
return self._run_cli(prompt)

View File

@@ -9,8 +9,8 @@ import time
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
from datetime import datetime, timedelta from datetime import datetime, timedelta
from ..config import settings from config import settings
from ..core import AIProcessingError from core import AIProcessingError
from .base_provider import AIProvider from .base_provider import AIProvider
@@ -90,6 +90,7 @@ class GeminiProvider(AIProvider):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.logger = logging.getLogger(__name__)
self._cli_path = settings.GEMINI_CLI_PATH or shutil.which("gemini") self._cli_path = settings.GEMINI_CLI_PATH or shutil.which("gemini")
self._api_key = settings.GEMINI_API_KEY self._api_key = settings.GEMINI_API_KEY
self._flash_model = settings.GEMINI_FLASH_MODEL self._flash_model = settings.GEMINI_FLASH_MODEL
@@ -104,6 +105,14 @@ class GeminiProvider(AIProvider):
"exponential_base": 2 "exponential_base": 2
} }
@property
def name(self) -> str:
return "Gemini"
def is_available(self) -> bool:
"""Check if Gemini CLI or API is available"""
return bool(self._cli_path or self._api_key)
def _init_session(self) -> None: def _init_session(self) -> None:
"""Initialize HTTP session with connection pooling""" """Initialize HTTP session with connection pooling"""
if self._session is None: if self._session is None:
@@ -276,6 +285,13 @@ Return only the category name, nothing else."""
"provider": self.name "provider": self.name
} }
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text using Gemini"""
use_flash = kwargs.get('use_flash', True)
if self._api_key:
return self._call_api(prompt, use_flash=use_flash)
return self._call_cli(prompt, use_yolo=True)
def get_stats(self) -> Dict[str, Any]: def get_stats(self) -> Dict[str, Any]:
"""Get provider statistics""" """Get provider statistics"""
return { return {

View File

@@ -4,7 +4,7 @@ AI Provider Factory (Factory Pattern)
import logging import logging
from typing import Dict, Type from typing import Dict, Type
from ..core import AIProcessingError from core import AIProcessingError
from .base_provider import AIProvider from .base_provider import AIProvider
from .claude_provider import ClaudeProvider from .claude_provider import ClaudeProvider
from .gemini_provider import GeminiProvider from .gemini_provider import GeminiProvider

View File

@@ -7,8 +7,8 @@ import time
from typing import Optional, Dict, Any from typing import Optional, Dict, Any
from threading import Lock from threading import Lock
from ..config import settings from config import settings
from ..core import AIProcessingError from core import AIProcessingError
from .ai.provider_factory import AIProviderFactory, ai_provider_factory from .ai.provider_factory import AIProviderFactory, ai_provider_factory

View File

@@ -5,7 +5,7 @@ import logging
import time import time
from typing import Optional from typing import Optional
from datetime import datetime from datetime import datetime
from ..config import settings from config import settings
try: try:
import requests import requests

View File

@@ -7,8 +7,8 @@ import os
import time import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Optional, Dict, Any from typing import Optional, Dict, Any
from ..core import BaseService from core import BaseService
from ..config import settings from config import settings
try: try:
import torch import torch

View File

@@ -13,8 +13,8 @@ import requests
from requests.auth import HTTPBasicAuth from requests.auth import HTTPBasicAuth
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from ..config import settings from config import settings
from ..core import WebDAVError from core import WebDAVError
class WebDAVService: class WebDAVService:
@@ -112,16 +112,31 @@ class WebDAVService:
files = [] files = []
try: try:
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from urllib.parse import urlparse, unquote
root = ET.fromstring(xml_response) root = ET.fromstring(xml_response)
# Get the WebDAV path from settings
parsed_url = urlparse(settings.NEXTCLOUD_URL)
webdav_path = parsed_url.path.rstrip('/') # e.g. /remote.php/webdav
# Find all href elements # Find all href elements
for href in root.findall('.//{DAV:}href'): for href in root.findall('.//{DAV:}href'):
href_text = href.text or "" href_text = href.text or ""
href_text = unquote(href_text) # Decode URL encoding
# Remove base URL from href # Remove base URL from href
base_url = settings.NEXTCLOUD_URL.rstrip('/') base_url = settings.NEXTCLOUD_URL.rstrip('/')
if href_text.startswith(base_url): if href_text.startswith(base_url):
href_text = href_text[len(base_url):] href_text = href_text[len(base_url):]
files.append(href_text.lstrip('/'))
# Also strip the webdav path if it's there
if href_text.startswith(webdav_path):
href_text = href_text[len(webdav_path):]
# Clean up the path
href_text = href_text.lstrip('/')
if href_text: # Skip empty paths (root directory)
files.append(href_text)
except Exception as e: except Exception as e:
self.logger.error(f"Error parsing PROPFIND response: {e}") self.logger.error(f"Error parsing PROPFIND response: {e}")
@@ -178,8 +193,8 @@ class WebDAVService:
self._request('MKCOL', current) self._request('MKCOL', current)
self.logger.debug(f"Created directory: {current}") self.logger.debug(f"Created directory: {current}")
except WebDAVError as e: except WebDAVError as e:
# Directory might already exist (409 Conflict is OK) # Directory might already exist (409 Conflict or 405 MethodNotAllowed is OK)
if '409' not in str(e): if '409' not in str(e) and '405' not in str(e):
raise raise
def delete(self, remote_path: str) -> None: def delete(self, remote_path: str) -> None:

View File

@@ -7,7 +7,7 @@ import time
from pathlib import Path from pathlib import Path
from typing import Set, Optional from typing import Set, Optional
from datetime import datetime, timedelta from datetime import datetime, timedelta
from ..config import settings from config import settings
class BloomFilter: class BloomFilter: