CBCFacil v8.0 - Refactored with AMD GPU support

This commit is contained in:
2026-01-09 13:05:46 -03:00
parent cb17136f21
commit b017504c52
54 changed files with 7251 additions and 3670 deletions

View File

@@ -0,0 +1,69 @@
"""
AI Service - Unified interface for AI providers
"""
import logging
from typing import Optional, Dict, Any
from .config import settings
from .core import AIProcessingError
from .services.ai.provider_factory import AIProviderFactory, ai_provider_factory
class AIService:
"""Unified service for AI operations with provider fallback"""
def __init__(self):
self.logger = logging.getLogger(__name__)
self._factory: Optional[AIProviderFactory] = None
@property
def factory(self) -> AIProviderFactory:
"""Lazy initialization of provider factory"""
if self._factory is None:
self._factory = ai_provider_factory
return self._factory
def generate_text(
self,
prompt: str,
provider: Optional[str] = None,
max_tokens: int = 4096
) -> str:
"""Generate text using AI provider"""
try:
ai_provider = self.factory.get_provider(provider or 'gemini')
return ai_provider.generate(prompt, max_tokens=max_tokens)
except AIProcessingError as e:
self.logger.error(f"AI generation failed: {e}")
return f"Error: {str(e)}"
def summarize(self, text: str, **kwargs) -> str:
"""Generate summary of text"""
try:
provider = self.factory.get_best_provider()
return provider.summarize(text, **kwargs)
except AIProcessingError as e:
self.logger.error(f"Summarization failed: {e}")
return f"Error: {str(e)}"
def correct_text(self, text: str, **kwargs) -> str:
"""Correct grammar and spelling in text"""
try:
provider = self.factory.get_best_provider()
return provider.correct_text(text, **kwargs)
except AIProcessingError as e:
self.logger.error(f"Text correction failed: {e}")
return text # Return original on error
def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
"""Classify content into categories"""
try:
provider = self.factory.get_best_provider()
return provider.classify_content(text, **kwargs)
except AIProcessingError as e:
self.logger.error(f"Classification failed: {e}")
return {"category": "otras_clases", "confidence": 0.0}
# Global instance
ai_service = AIService()

View File

@@ -0,0 +1,171 @@
"""
Gemini AI Provider implementation
"""
import logging
import subprocess
import shutil
import requests
import time
from typing import Dict, Any, Optional
from ..config import settings
from ..core import AIProcessingError
from .base_provider import AIProvider
class GeminiProvider(AIProvider):
"""Gemini AI provider using CLI or API"""
def __init__(self):
self.logger = logging.getLogger(__name__)
self._cli_path = settings.GEMINI_CLI_PATH or shutil.which("gemini")
self._api_key = settings.GEMINI_API_KEY
self._flash_model = settings.GEMINI_FLASH_MODEL
self._pro_model = settings.GEMINI_PRO_MODEL
self._session = None
@property
def name(self) -> str:
return "Gemini"
def is_available(self) -> bool:
"""Check if Gemini is available"""
return bool(self._cli_path or self._api_key)
def _run_cli(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
"""Run Gemini CLI with prompt"""
if not self._cli_path:
raise AIProcessingError("Gemini CLI not available")
model = self._flash_model if use_flash else self._pro_model
cmd = [self._cli_path, model, prompt]
try:
process = subprocess.run(
cmd,
text=True,
capture_output=True,
timeout=timeout,
shell=False
)
if process.returncode != 0:
error_msg = process.stderr or "Unknown error"
raise AIProcessingError(f"Gemini CLI failed: {error_msg}")
return process.stdout.strip()
except subprocess.TimeoutExpired:
raise AIProcessingError(f"Gemini CLI timed out after {timeout}s")
except Exception as e:
raise AIProcessingError(f"Gemini CLI error: {e}")
def _call_api(self, prompt: str, use_flash: bool = True, timeout: int = 180) -> str:
"""Call Gemini API"""
if not self._api_key:
raise AIProcessingError("Gemini API key not configured")
model = self._flash_model if use_flash else self._pro_model
# Initialize session if needed
if self._session is None:
self._session = requests.Session()
adapter = requests.adapters.HTTPAdapter(
pool_connections=10,
pool_maxsize=20
)
self._session.mount('https://', adapter)
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
payload = {
"contents": [{
"parts": [{"text": prompt}]
}]
}
params = {"key": self._api_key}
try:
response = self._session.post(
url,
json=payload,
params=params,
timeout=timeout
)
response.raise_for_status()
data = response.json()
if "candidates" not in data or not data["candidates"]:
raise AIProcessingError("Empty response from Gemini API")
candidate = data["candidates"][0]
if "content" not in candidate or "parts" not in candidate["content"]:
raise AIProcessingError("Invalid response format from Gemini API")
result = candidate["content"]["parts"][0]["text"]
return result.strip()
except requests.RequestException as e:
raise AIProcessingError(f"Gemini API request failed: {e}")
except (KeyError, IndexError, ValueError) as e:
raise AIProcessingError(f"Gemini API response error: {e}")
def _run(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
"""Run Gemini with fallback between CLI and API"""
# Try CLI first if available
if self._cli_path:
try:
return self._run_cli(prompt, use_flash, timeout)
except Exception as e:
self.logger.warning(f"Gemini CLI failed, trying API: {e}")
# Fallback to API
if self._api_key:
api_timeout = timeout if timeout < 180 else 180
return self._call_api(prompt, use_flash, api_timeout)
raise AIProcessingError("No Gemini provider available (CLI or API)")
def summarize(self, text: str, **kwargs) -> str:
"""Generate summary using Gemini"""
prompt = f"""Summarize the following text:
{text}
Provide a clear, concise summary in Spanish."""
return self._run(prompt, use_flash=True)
def correct_text(self, text: str, **kwargs) -> str:
"""Correct text using Gemini"""
prompt = f"""Correct the following text for grammar, spelling, and clarity:
{text}
Return only the corrected text, nothing else."""
return self._run(prompt, use_flash=True)
def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
"""Classify content using Gemini"""
categories = ["historia", "analisis_contable", "instituciones_gobierno", "otras_clases"]
prompt = f"""Classify the following text into one of these categories:
- historia
- analisis_contable
- instituciones_gobierno
- otras_clases
Text: {text}
Return only the category name, nothing else."""
result = self._run(prompt, use_flash=True).lower()
# Validate result
if result not in categories:
result = "otras_clases"
return {
"category": result,
"confidence": 0.9,
"provider": self.name
}

View File

@@ -0,0 +1,137 @@
"""
Processed files registry using repository pattern
"""
import fcntl
import logging
from pathlib import Path
from typing import Set, Optional
from datetime import datetime, timedelta
from ..config import settings
class ProcessedRegistry:
"""Registry for tracking processed files with caching and file locking"""
def __init__(self):
self.logger = logging.getLogger(__name__)
self._cache: Set[str] = set()
self._cache_time: Optional[datetime] = None
self._cache_ttl = 60
self._initialized = False
def initialize(self) -> None:
"""Initialize the registry"""
self.load()
self._initialized = True
def load(self) -> Set[str]:
"""Load processed files from disk with caching"""
now = datetime.utcnow()
if self._cache and self._cache_time and (now - self._cache_time).total_seconds() < self._cache_ttl:
return self._cache.copy()
processed = set()
registry_path = settings.processed_files_path
try:
registry_path.parent.mkdir(parents=True, exist_ok=True)
if registry_path.exists():
with open(registry_path, 'r', encoding='utf-8') as f:
for raw_line in f:
line = raw_line.strip()
if line and not line.startswith('#'):
processed.add(line)
base_name = Path(line).name
processed.add(base_name)
except Exception as e:
self.logger.error(f"Error reading processed files registry: {e}")
self._cache = processed
self._cache_time = now
return processed.copy()
def save(self, file_path: str) -> None:
"""Add file to processed registry with file locking"""
if not file_path:
return
registry_path = settings.processed_files_path
try:
registry_path.parent.mkdir(parents=True, exist_ok=True)
with open(registry_path, 'a', encoding='utf-8') as f:
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
try:
if file_path not in self._cache:
f.write(file_path + "\n")
self._cache.add(file_path)
self.logger.debug(f"Added {file_path} to processed registry")
finally:
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
except Exception as e:
self.logger.error(f"Error saving to processed files registry: {e}")
raise
def is_processed(self, file_path: str) -> bool:
"""Check if file has been processed"""
if not self._initialized:
self.initialize()
if file_path in self._cache:
return True
basename = Path(file_path).name
if basename in self._cache:
return True
return False
def remove(self, file_path: str) -> bool:
"""Remove file from processed registry"""
registry_path = settings.processed_files_path
try:
if not registry_path.exists():
return False
lines_to_keep = []
with open(registry_path, 'r', encoding='utf-8') as f:
for line in f:
if line.strip() != file_path and Path(line.strip()).name != Path(file_path).name:
lines_to_keep.append(line)
with open(registry_path, 'w', encoding='utf-8') as f:
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
try:
f.writelines(lines_to_keep)
self._cache.discard(file_path)
self._cache.discard(Path(file_path).name)
finally:
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
return True
except Exception as e:
self.logger.error(f"Error removing from processed files registry: {e}")
return False
def clear(self) -> None:
"""Clear the entire registry"""
registry_path = settings.processed_files_path
try:
if registry_path.exists():
registry_path.unlink()
self._cache.clear()
self._cache_time = None
self.logger.info("Processed files registry cleared")
except Exception as e:
self.logger.error(f"Error clearing processed files registry: {e}")
raise
def get_all(self) -> Set[str]:
"""Get all processed files"""
if not self._initialized:
self.initialize()
return self._cache.copy()
def count(self) -> int:
"""Get count of processed files"""
if not self._initialized:
self.initialize()
return len(self._cache)
# Global instance
processed_registry = ProcessedRegistry()