Restore full pipeline: 3-step summarization, formatting, PDF/DOCX generation

This commit is contained in:
2026-01-09 17:01:22 -03:00
parent b017504c52
commit e6a01d08d4
20 changed files with 260 additions and 43 deletions

View File

@@ -5,11 +5,16 @@ AI Providers package for CBCFacil
from .base_provider import AIProvider
from .claude_provider import ClaudeProvider
from .gemini_provider import GeminiProvider
from .provider_factory import AIProviderFactory
from .provider_factory import AIProviderFactory, ai_provider_factory
# Alias for backwards compatibility
ai_service = ai_provider_factory
__all__ = [
'AIProvider',
'ClaudeProvider',
'GeminiProvider',
'AIProviderFactory'
'AIProviderFactory',
'ai_provider_factory',
'ai_service'
]

View File

@@ -23,6 +23,11 @@ class AIProvider(ABC):
"""Classify content into categories"""
pass
@abstractmethod
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text from prompt"""
pass
@abstractmethod
def is_available(self) -> bool:
"""Check if provider is available and configured"""

View File

@@ -6,8 +6,8 @@ import subprocess
import shutil
from typing import Dict, Any, Optional
from ..config import settings
from ..core import AIProcessingError
from config import settings
from core import AIProcessingError
from .base_provider import AIProvider
@@ -106,3 +106,7 @@ Return only the category name, nothing else."""
"confidence": 0.9,
"provider": self.name
}
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text using Claude"""
return self._run_cli(prompt)

View File

@@ -9,8 +9,8 @@ import time
from typing import Dict, Any, Optional
from datetime import datetime, timedelta
from ..config import settings
from ..core import AIProcessingError
from config import settings
from core import AIProcessingError
from .base_provider import AIProvider
@@ -90,6 +90,7 @@ class GeminiProvider(AIProvider):
def __init__(self):
super().__init__()
self.logger = logging.getLogger(__name__)
self._cli_path = settings.GEMINI_CLI_PATH or shutil.which("gemini")
self._api_key = settings.GEMINI_API_KEY
self._flash_model = settings.GEMINI_FLASH_MODEL
@@ -104,6 +105,14 @@ class GeminiProvider(AIProvider):
"exponential_base": 2
}
@property
def name(self) -> str:
return "Gemini"
def is_available(self) -> bool:
"""Check if Gemini CLI or API is available"""
return bool(self._cli_path or self._api_key)
def _init_session(self) -> None:
"""Initialize HTTP session with connection pooling"""
if self._session is None:
@@ -275,6 +284,13 @@ Return only the category name, nothing else."""
"confidence": 0.9,
"provider": self.name
}
def generate_text(self, prompt: str, **kwargs) -> str:
"""Generate text using Gemini"""
use_flash = kwargs.get('use_flash', True)
if self._api_key:
return self._call_api(prompt, use_flash=use_flash)
return self._call_cli(prompt, use_yolo=True)
def get_stats(self) -> Dict[str, Any]:
"""Get provider statistics"""

View File

@@ -4,7 +4,7 @@ AI Provider Factory (Factory Pattern)
import logging
from typing import Dict, Type
from ..core import AIProcessingError
from core import AIProcessingError
from .base_provider import AIProvider
from .claude_provider import ClaudeProvider
from .gemini_provider import GeminiProvider

View File

@@ -7,8 +7,8 @@ import time
from typing import Optional, Dict, Any
from threading import Lock
from ..config import settings
from ..core import AIProcessingError
from config import settings
from core import AIProcessingError
from .ai.provider_factory import AIProviderFactory, ai_provider_factory

View File

@@ -5,7 +5,7 @@ import logging
import time
from typing import Optional
from datetime import datetime
from ..config import settings
from config import settings
try:
import requests

View File

@@ -7,8 +7,8 @@ import os
import time
from datetime import datetime, timedelta
from typing import Optional, Dict, Any
from ..core import BaseService
from ..config import settings
from core import BaseService
from config import settings
try:
import torch

View File

@@ -13,8 +13,8 @@ import requests
from requests.auth import HTTPBasicAuth
from requests.adapters import HTTPAdapter
from ..config import settings
from ..core import WebDAVError
from config import settings
from core import WebDAVError
class WebDAVService:
@@ -112,16 +112,31 @@ class WebDAVService:
files = []
try:
import xml.etree.ElementTree as ET
from urllib.parse import urlparse, unquote
root = ET.fromstring(xml_response)
# Get the WebDAV path from settings
parsed_url = urlparse(settings.NEXTCLOUD_URL)
webdav_path = parsed_url.path.rstrip('/') # e.g. /remote.php/webdav
# Find all href elements
for href in root.findall('.//{DAV:}href'):
href_text = href.text or ""
href_text = unquote(href_text) # Decode URL encoding
# Remove base URL from href
base_url = settings.NEXTCLOUD_URL.rstrip('/')
if href_text.startswith(base_url):
href_text = href_text[len(base_url):]
files.append(href_text.lstrip('/'))
# Also strip the webdav path if it's there
if href_text.startswith(webdav_path):
href_text = href_text[len(webdav_path):]
# Clean up the path
href_text = href_text.lstrip('/')
if href_text: # Skip empty paths (root directory)
files.append(href_text)
except Exception as e:
self.logger.error(f"Error parsing PROPFIND response: {e}")
@@ -178,8 +193,8 @@ class WebDAVService:
self._request('MKCOL', current)
self.logger.debug(f"Created directory: {current}")
except WebDAVError as e:
# Directory might already exist (409 Conflict is OK)
if '409' not in str(e):
# Directory might already exist (409 Conflict or 405 MethodNotAllowed is OK)
if '409' not in str(e) and '405' not in str(e):
raise
def delete(self, remote_path: str) -> None: