cbc2027/backup/originals_20250109/gemini_provider.py

"""
Gemini AI Provider implementation
"""
import logging
import subprocess
import shutil
import requests
import time
from typing import Dict, Any, Optional

from ..config import settings
from ..core import AIProcessingError
from .base_provider import AIProvider


class GeminiProvider(AIProvider):
    """Gemini AI provider using CLI or API"""

    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self._cli_path = settings.GEMINI_CLI_PATH or shutil.which("gemini")
        self._api_key = settings.GEMINI_API_KEY
        self._flash_model = settings.GEMINI_FLASH_MODEL
        self._pro_model = settings.GEMINI_PRO_MODEL
        self._session = None

    @property
    def name(self) -> str:
        return "Gemini"

    def is_available(self) -> bool:
        """Check if Gemini is available"""
        return bool(self._cli_path or self._api_key)

    def _run_cli(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
        """Run Gemini CLI with prompt"""
        if not self._cli_path:
            raise AIProcessingError("Gemini CLI not available")

        model = self._flash_model if use_flash else self._pro_model
        cmd = [self._cli_path, model, prompt]

        try:
            process = subprocess.run(
                cmd,
                text=True,
                capture_output=True,
                timeout=timeout,
                shell=False
            )

            if process.returncode != 0:
                error_msg = process.stderr or "Unknown error"
                raise AIProcessingError(f"Gemini CLI failed: {error_msg}")

            return process.stdout.strip()
        except subprocess.TimeoutExpired:
            raise AIProcessingError(f"Gemini CLI timed out after {timeout}s")
        except Exception as e:
            raise AIProcessingError(f"Gemini CLI error: {e}")

    def _call_api(self, prompt: str, use_flash: bool = True, timeout: int = 180) -> str:
        """Call Gemini API"""
        if not self._api_key:
            raise AIProcessingError("Gemini API key not configured")

        model = self._flash_model if use_flash else self._pro_model

        # Initialize session if needed
        if self._session is None:
            self._session = requests.Session()
            adapter = requests.adapters.HTTPAdapter(
                pool_connections=10,
                pool_maxsize=20
            )
            self._session.mount('https://', adapter)

        url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"

        payload = {
            "contents": [{
                "parts": [{"text": prompt}]
            }]
        }

        params = {"key": self._api_key}

        try:
            response = self._session.post(
                url,
                json=payload,
                params=params,
                timeout=timeout
            )
            response.raise_for_status()

            data = response.json()

            if "candidates" not in data or not data["candidates"]:
                raise AIProcessingError("Empty response from Gemini API")

            candidate = data["candidates"][0]
            if "content" not in candidate or "parts" not in candidate["content"]:
                raise AIProcessingError("Invalid response format from Gemini API")

            result = candidate["content"]["parts"][0]["text"]
            return result.strip()

        except requests.RequestException as e:
            raise AIProcessingError(f"Gemini API request failed: {e}")
        except (KeyError, IndexError, ValueError) as e:
            raise AIProcessingError(f"Gemini API response error: {e}")

    def _run(self, prompt: str, use_flash: bool = True, timeout: int = 300) -> str:
        """Run Gemini with fallback between CLI and API"""
        # Try CLI first if available
        if self._cli_path:
            try:
                return self._run_cli(prompt, use_flash, timeout)
            except Exception as e:
                self.logger.warning(f"Gemini CLI failed, trying API: {e}")

        # Fallback to API
        if self._api_key:
            api_timeout = timeout if timeout < 180 else 180
            return self._call_api(prompt, use_flash, api_timeout)

        raise AIProcessingError("No Gemini provider available (CLI or API)")

    def summarize(self, text: str, **kwargs) -> str:
        """Generate summary using Gemini"""
        prompt = f"""Summarize the following text:

{text}

Provide a clear, concise summary in Spanish."""
        return self._run(prompt, use_flash=True)

    def correct_text(self, text: str, **kwargs) -> str:
        """Correct text using Gemini"""
        prompt = f"""Correct the following text for grammar, spelling, and clarity:

{text}

Return only the corrected text, nothing else."""
        return self._run(prompt, use_flash=True)

    def classify_content(self, text: str, **kwargs) -> Dict[str, Any]:
        """Classify content using Gemini"""
        categories = ["historia", "analisis_contable", "instituciones_gobierno", "otras_clases"]

        prompt = f"""Classify the following text into one of these categories:
- historia
- analisis_contable
- instituciones_gobierno
- otras_clases

Text: {text}

Return only the category name, nothing else."""
        result = self._run(prompt, use_flash=True).lower()

        # Validate result
        if result not in categories:
            result = "otras_clases"

        return {
            "category": result,
            "confidence": 0.9,
            "provider": self.name
        }