CBCFacil v8.0 - Refactored with AMD GPU support
This commit is contained in:
69
backup/originals_20250109/ai_service.py
Normal file
69
backup/originals_20250109/ai_service.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""
|
||||
AI Service - Unified interface for AI providers
|
||||
"""
|
||||
import logging
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from .config import settings
|
||||
from .core import AIProcessingError
|
||||
from .services.ai.provider_factory import AIProviderFactory, ai_provider_factory
|
||||
|
||||
|
||||
class AIService:
|
||||
"""Unified service for AI operations with provider fallback"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._factory: Optional[AIProviderFactory] = None
|
||||
|
||||
@property
|
||||
def factory(self) -> AIProviderFactory:
|
||||
"""Lazy initialization of provider factory"""
|
||||
if self._factory is None:
|
||||
self._factory = ai_provider_factory
|
||||
return self._factory
|
||||
|
||||
def generate_text(
|
||||
self,
|
||||
prompt: str,
|
||||
provider: Optional[str] = None,
|
||||
max_tokens: int = 4096
|
||||
) -> str:
|
||||
"""Generate text using AI provider"""
|
||||
try:
|
||||
ai_provider = self.factory.get_provider(provider or 'gemini')
|
||||
return ai_provider.generate(prompt, max_tokens=max_tokens)
|
||||
except AIProcessingError as e:
|
||||
self.logger.error(f"AI generation failed: {e}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
def summarize(self, text: str, **kwargs) -> str:
|
||||
"""Generate summary of text"""
|
||||
try:
|
||||
provider = self.factory.get_best_provider()
|
||||
return provider.summarize(text, **kwargs)
|
||||
except AIProcessingError as e:
|
||||
self.logger.error(f"Summarization failed: {e}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
def correct_text(self, text: str, **kwargs) -> str:
|
||||
"""Correct grammar and spelling in text"""
|
||||
try:
|
||||
provider = self.factory.get_best_provider()
|
||||
return provider.correct_text(text, **kwargs)
|
||||
except AIProcessingError as e:
|
||||
self.logger.error(f"Text correction failed: {e}")
|
||||
return text # Return original on error
|
||||
|
||||
def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Classify content into categories"""
|
||||
try:
|
||||
provider = self.factory.get_best_provider()
|
||||
return provider.classify_content(text, **kwargs)
|
||||
except AIProcessingError as e:
|
||||
self.logger.error(f"Classification failed: {e}")
|
||||
return {"category": "otras_clases", "confidence": 0.0}
|
||||
|
||||
|
||||
# Global instance
|
||||
ai_service = AIService()
|
||||
171
backup/originals_20250109/gemini_provider.py
Normal file
171
backup/originals_20250109/gemini_provider.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""
|
||||
Gemini AI Provider implementation
|
||||
"""
|
||||
import logging
|
||||
import subprocess
|
||||
import shutil
|
||||
import requests
|
||||
import time
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
from ..config import settings
|
||||
from ..core import AIProcessingError
|
||||
from .base_provider import AIProvider
|
||||
|
||||
|
||||
class GeminiProvider(AIProvider):
|
||||
"""Gemini AI provider using CLI or API"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._cli_path = settings.GEMINI_CLI_PATH or shutil.which("gemini")
|
||||
self._api_key = settings.GEMINI_API_KEY
|
||||
self._flash_model = settings.GEMINI_FLASH_MODEL
|
||||
self._pro_model = settings.GEMINI_PRO_MODEL
|
||||
self._session = None
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "Gemini"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if Gemini is available"""
|
||||
return bool(self._cli_path or self._api_key)
|
||||
|
||||
def _run_cli(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
|
||||
"""Run Gemini CLI with prompt"""
|
||||
if not self._cli_path:
|
||||
raise AIProcessingError("Gemini CLI not available")
|
||||
|
||||
model = self._flash_model if use_flash else self._pro_model
|
||||
cmd = [self._cli_path, model, prompt]
|
||||
|
||||
try:
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
timeout=timeout,
|
||||
shell=False
|
||||
)
|
||||
|
||||
if process.returncode != 0:
|
||||
error_msg = process.stderr or "Unknown error"
|
||||
raise AIProcessingError(f"Gemini CLI failed: {error_msg}")
|
||||
|
||||
return process.stdout.strip()
|
||||
except subprocess.TimeoutExpired:
|
||||
raise AIProcessingError(f"Gemini CLI timed out after {timeout}s")
|
||||
except Exception as e:
|
||||
raise AIProcessingError(f"Gemini CLI error: {e}")
|
||||
|
||||
def _call_api(self, prompt: str, use_flash: bool = True, timeout: int = 180) -> str:
|
||||
"""Call Gemini API"""
|
||||
if not self._api_key:
|
||||
raise AIProcessingError("Gemini API key not configured")
|
||||
|
||||
model = self._flash_model if use_flash else self._pro_model
|
||||
|
||||
# Initialize session if needed
|
||||
if self._session is None:
|
||||
self._session = requests.Session()
|
||||
adapter = requests.adapters.HTTPAdapter(
|
||||
pool_connections=10,
|
||||
pool_maxsize=20
|
||||
)
|
||||
self._session.mount('https://', adapter)
|
||||
|
||||
url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
|
||||
|
||||
payload = {
|
||||
"contents": [{
|
||||
"parts": [{"text": prompt}]
|
||||
}]
|
||||
}
|
||||
|
||||
params = {"key": self._api_key}
|
||||
|
||||
try:
|
||||
response = self._session.post(
|
||||
url,
|
||||
json=payload,
|
||||
params=params,
|
||||
timeout=timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
|
||||
if "candidates" not in data or not data["candidates"]:
|
||||
raise AIProcessingError("Empty response from Gemini API")
|
||||
|
||||
candidate = data["candidates"][0]
|
||||
if "content" not in candidate or "parts" not in candidate["content"]:
|
||||
raise AIProcessingError("Invalid response format from Gemini API")
|
||||
|
||||
result = candidate["content"]["parts"][0]["text"]
|
||||
return result.strip()
|
||||
|
||||
except requests.RequestException as e:
|
||||
raise AIProcessingError(f"Gemini API request failed: {e}")
|
||||
except (KeyError, IndexError, ValueError) as e:
|
||||
raise AIProcessingError(f"Gemini API response error: {e}")
|
||||
|
||||
def _run(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
|
||||
"""Run Gemini with fallback between CLI and API"""
|
||||
# Try CLI first if available
|
||||
if self._cli_path:
|
||||
try:
|
||||
return self._run_cli(prompt, use_flash, timeout)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Gemini CLI failed, trying API: {e}")
|
||||
|
||||
# Fallback to API
|
||||
if self._api_key:
|
||||
api_timeout = timeout if timeout < 180 else 180
|
||||
return self._call_api(prompt, use_flash, api_timeout)
|
||||
|
||||
raise AIProcessingError("No Gemini provider available (CLI or API)")
|
||||
|
||||
def summarize(self, text: str, **kwargs) -> str:
|
||||
"""Generate summary using Gemini"""
|
||||
prompt = f"""Summarize the following text:
|
||||
|
||||
{text}
|
||||
|
||||
Provide a clear, concise summary in Spanish."""
|
||||
return self._run(prompt, use_flash=True)
|
||||
|
||||
def correct_text(self, text: str, **kwargs) -> str:
|
||||
"""Correct text using Gemini"""
|
||||
prompt = f"""Correct the following text for grammar, spelling, and clarity:
|
||||
|
||||
{text}
|
||||
|
||||
Return only the corrected text, nothing else."""
|
||||
return self._run(prompt, use_flash=True)
|
||||
|
||||
def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Classify content using Gemini"""
|
||||
categories = ["historia", "analisis_contable", "instituciones_gobierno", "otras_clases"]
|
||||
|
||||
prompt = f"""Classify the following text into one of these categories:
|
||||
- historia
|
||||
- analisis_contable
|
||||
- instituciones_gobierno
|
||||
- otras_clases
|
||||
|
||||
Text: {text}
|
||||
|
||||
Return only the category name, nothing else."""
|
||||
result = self._run(prompt, use_flash=True).lower()
|
||||
|
||||
# Validate result
|
||||
if result not in categories:
|
||||
result = "otras_clases"
|
||||
|
||||
return {
|
||||
"category": result,
|
||||
"confidence": 0.9,
|
||||
"provider": self.name
|
||||
}
|
||||
137
backup/originals_20250109/processed_registry.py
Normal file
137
backup/originals_20250109/processed_registry.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
Processed files registry using repository pattern
|
||||
"""
|
||||
import fcntl
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Set, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from ..config import settings
|
||||
|
||||
|
||||
class ProcessedRegistry:
|
||||
"""Registry for tracking processed files with caching and file locking"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._cache: Set[str] = set()
|
||||
self._cache_time: Optional[datetime] = None
|
||||
self._cache_ttl = 60
|
||||
self._initialized = False
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Initialize the registry"""
|
||||
self.load()
|
||||
self._initialized = True
|
||||
|
||||
def load(self) -> Set[str]:
|
||||
"""Load processed files from disk with caching"""
|
||||
now = datetime.utcnow()
|
||||
if self._cache and self._cache_time and (now - self._cache_time).total_seconds() < self._cache_ttl:
|
||||
return self._cache.copy()
|
||||
|
||||
processed = set()
|
||||
registry_path = settings.processed_files_path
|
||||
|
||||
try:
|
||||
registry_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if registry_path.exists():
|
||||
with open(registry_path, 'r', encoding='utf-8') as f:
|
||||
for raw_line in f:
|
||||
line = raw_line.strip()
|
||||
if line and not line.startswith('#'):
|
||||
processed.add(line)
|
||||
base_name = Path(line).name
|
||||
processed.add(base_name)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error reading processed files registry: {e}")
|
||||
|
||||
self._cache = processed
|
||||
self._cache_time = now
|
||||
return processed.copy()
|
||||
|
||||
def save(self, file_path: str) -> None:
|
||||
"""Add file to processed registry with file locking"""
|
||||
if not file_path:
|
||||
return
|
||||
registry_path = settings.processed_files_path
|
||||
|
||||
try:
|
||||
registry_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(registry_path, 'a', encoding='utf-8') as f:
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
||||
try:
|
||||
if file_path not in self._cache:
|
||||
f.write(file_path + "\n")
|
||||
self._cache.add(file_path)
|
||||
self.logger.debug(f"Added {file_path} to processed registry")
|
||||
finally:
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error saving to processed files registry: {e}")
|
||||
raise
|
||||
|
||||
def is_processed(self, file_path: str) -> bool:
|
||||
"""Check if file has been processed"""
|
||||
if not self._initialized:
|
||||
self.initialize()
|
||||
if file_path in self._cache:
|
||||
return True
|
||||
basename = Path(file_path).name
|
||||
if basename in self._cache:
|
||||
return True
|
||||
return False
|
||||
|
||||
def remove(self, file_path: str) -> bool:
|
||||
"""Remove file from processed registry"""
|
||||
registry_path = settings.processed_files_path
|
||||
|
||||
try:
|
||||
if not registry_path.exists():
|
||||
return False
|
||||
lines_to_keep = []
|
||||
with open(registry_path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line.strip() != file_path and Path(line.strip()).name != Path(file_path).name:
|
||||
lines_to_keep.append(line)
|
||||
with open(registry_path, 'w', encoding='utf-8') as f:
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
||||
try:
|
||||
f.writelines(lines_to_keep)
|
||||
self._cache.discard(file_path)
|
||||
self._cache.discard(Path(file_path).name)
|
||||
finally:
|
||||
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
|
||||
return True
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error removing from processed files registry: {e}")
|
||||
return False
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear the entire registry"""
|
||||
registry_path = settings.processed_files_path
|
||||
try:
|
||||
if registry_path.exists():
|
||||
registry_path.unlink()
|
||||
self._cache.clear()
|
||||
self._cache_time = None
|
||||
self.logger.info("Processed files registry cleared")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error clearing processed files registry: {e}")
|
||||
raise
|
||||
|
||||
def get_all(self) -> Set[str]:
|
||||
"""Get all processed files"""
|
||||
if not self._initialized:
|
||||
self.initialize()
|
||||
return self._cache.copy()
|
||||
|
||||
def count(self) -> int:
|
||||
"""Get count of processed files"""
|
||||
if not self._initialized:
|
||||
self.initialize()
|
||||
return len(self._cache)
|
||||
|
||||
|
||||
# Global instance
|
||||
processed_registry = ProcessedRegistry()
|
||||
Reference in New Issue
Block a user