131 lines
5.2 KiB
Plaintext
131 lines
5.2 KiB
Plaintext
"""
|
|
Centralized configuration management for CBCFacil
|
|
"""
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Optional, Set
|
|
|
|
|
|
class Settings:
|
|
"""Application settings loaded from environment variables"""
|
|
|
|
# Application
|
|
APP_NAME: str = "CBCFacil"
|
|
APP_VERSION: str = "8.0"
|
|
DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
|
|
|
|
# Nextcloud/WebDAV Configuration
|
|
NEXTCLOUD_URL: str = os.getenv("NEXTCLOUD_URL", "")
|
|
NEXTCLOUD_USER: str = os.getenv("NEXTCLOUD_USER", "")
|
|
NEXTCLOUD_PASSWORD: str = os.getenv("NEXTCLOUD_PASSWORD", "")
|
|
WEBDAV_ENDPOINT: str = NEXTCLOUD_URL
|
|
|
|
# Remote folders
|
|
REMOTE_AUDIOS_FOLDER: str = "Audios"
|
|
REMOTE_DOCX_AUDIO_FOLDER: str = "Documentos"
|
|
REMOTE_PDF_FOLDER: str = "Pdf"
|
|
REMOTE_TXT_FOLDER: str = "Textos"
|
|
RESUMENES_FOLDER: str = "Resumenes"
|
|
DOCX_FOLDER: str = "Documentos"
|
|
|
|
# Local paths
|
|
BASE_DIR: Path = Path(__file__).resolve().parent.parent
|
|
LOCAL_STATE_DIR: str = os.getenv("LOCAL_STATE_DIR", str(BASE_DIR))
|
|
LOCAL_DOWNLOADS_PATH: Path = BASE_DIR / "downloads"
|
|
LOCAL_RESUMENES: Path = LOCAL_DOWNLOADS_PATH
|
|
LOCAL_DOCX: Path = BASE_DIR / "resumenes_docx"
|
|
|
|
# Processing
|
|
POLL_INTERVAL: int = int(os.getenv("POLL_INTERVAL", "5"))
|
|
HTTP_TIMEOUT: int = int(os.getenv("HTTP_TIMEOUT", "30"))
|
|
WEBDAV_MAX_RETRIES: int = int(os.getenv("WEBDAV_MAX_RETRIES", "3"))
|
|
DOWNLOAD_CHUNK_SIZE: int = int(os.getenv("DOWNLOAD_CHUNK_SIZE", "65536")) # 64KB for better performance
|
|
MAX_FILENAME_LENGTH: int = int(os.getenv("MAX_FILENAME_LENGTH", "80"))
|
|
MAX_FILENAME_BASE_LENGTH: int = int(os.getenv("MAX_FILENAME_BASE_LENGTH", "40"))
|
|
MAX_FILENAME_TOPICS_LENGTH: int = int(os.getenv("MAX_FILENAME_TOPICS_LENGTH", "20"))
|
|
|
|
# File extensions
|
|
AUDIO_EXTENSIONS: Set[str] = {".mp3", ".wav", ".m4a", ".ogg", ".aac"}
|
|
PDF_EXTENSIONS: Set[str] = {".pdf"}
|
|
TXT_EXTENSIONS: Set[str] = {".txt"}
|
|
|
|
# AI Providers
|
|
ZAI_BASE_URL: str = os.getenv("ZAI_BASE_URL", "https://api.z.ai/api/anthropic")
|
|
ZAI_DEFAULT_MODEL: str = os.getenv("ZAI_MODEL", "glm-4.6")
|
|
ZAI_AUTH_TOKEN: Optional[str] = os.getenv("ANTHROPIC_AUTH_TOKEN") or os.getenv("ZAI_AUTH_TOKEN", "")
|
|
|
|
# Gemini
|
|
GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
|
|
GEMINI_FLASH_MODEL: Optional[str] = os.getenv("GEMINI_FLASH_MODEL")
|
|
GEMINI_PRO_MODEL: Optional[str] = os.getenv("GEMINI_PRO_MODEL")
|
|
|
|
# CLI paths
|
|
GEMINI_CLI_PATH: Optional[str] = os.getenv("GEMINI_CLI_PATH")
|
|
CLAUDE_CLI_PATH: Optional[str] = os.getenv("CLAUDE_CLI_PATH")
|
|
|
|
# Telegram
|
|
TELEGRAM_TOKEN: Optional[str] = os.getenv("TELEGRAM_TOKEN")
|
|
TELEGRAM_CHAT_ID: Optional[str] = os.getenv("TELEGRAM_CHAT_ID")
|
|
|
|
# PDF Processing Configuration
|
|
CPU_COUNT: int = os.cpu_count() or 1
|
|
PDF_MAX_PAGES_PER_CHUNK: int = int(os.getenv("PDF_MAX_PAGES_PER_CHUNK", "2"))
|
|
PDF_DPI: int = int(os.getenv("PDF_DPI", "200"))
|
|
PDF_RENDER_THREAD_COUNT: int = int(os.getenv("PDF_RENDER_THREAD_COUNT", str(min(4, CPU_COUNT))))
|
|
PDF_BATCH_SIZE: int = int(os.getenv("PDF_BATCH_SIZE", "2"))
|
|
PDF_TROCR_MAX_BATCH: int = int(os.getenv("PDF_TROCR_MAX_BATCH", str(PDF_BATCH_SIZE)))
|
|
PDF_TESSERACT_THREADS: int = int(os.getenv("PDF_TESSERACT_THREADS", str(max(1, min(2, max(1, CPU_COUNT // 3))))))
|
|
PDF_PREPROCESS_THREADS: int = int(os.getenv("PDF_PREPROCESS_THREADS", str(PDF_TESSERACT_THREADS)))
|
|
PDF_TEXT_DETECTION_MIN_RATIO: float = float(os.getenv("PDF_TEXT_DETECTION_MIN_RATIO", "0.6"))
|
|
PDF_TEXT_DETECTION_MIN_AVG_CHARS: int = int(os.getenv("PDF_TEXT_DETECTION_MIN_AVG_CHARS", "120"))
|
|
|
|
# Error handling
|
|
ERROR_THROTTLE_SECONDS: int = int(os.getenv("ERROR_THROTTLE_SECONDS", "600"))
|
|
|
|
# GPU/VRAM Management
|
|
MODEL_TIMEOUT_SECONDS: int = int(os.getenv("MODEL_TIMEOUT_SECONDS", "300"))
|
|
CUDA_VISIBLE_DEVICES: str = os.getenv("CUDA_VISIBLE_DEVICES", "all")
|
|
PYTORCH_CUDA_ALLOC_CONF: str = os.getenv("PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:512")
|
|
|
|
# Dashboard
|
|
DASHBOARD_SECRET_KEY: str = os.getenv("DASHBOARD_SECRET_KEY", "")
|
|
DASHBOARD_PORT: int = int(os.getenv("DASHBOARD_PORT", "5000"))
|
|
DASHBOARD_HOST: str = os.getenv("DASHBOARD_HOST", "0.0.0.0")
|
|
|
|
# Logging
|
|
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
|
|
LOG_FILE: Optional[str] = os.getenv("LOG_FILE")
|
|
|
|
# Threading optimization
|
|
OMP_NUM_THREADS: int = int(os.getenv("OMP_NUM_THREADS", "4"))
|
|
MKL_NUM_THREADS: int = int(os.getenv("MKL_NUM_THREADS", "4"))
|
|
|
|
@property
|
|
def is_production(self) -> bool:
|
|
"""Check if running in production mode"""
|
|
return not self.DEBUG
|
|
|
|
@property
|
|
def has_webdav_config(self) -> bool:
|
|
"""Check if WebDAV credentials are configured"""
|
|
return all([self.NEXTCLOUD_URL, self.NEXTCLOUD_USER, self.NEXTCLOUD_PASSWORD])
|
|
|
|
@property
|
|
def has_ai_config(self) -> bool:
|
|
"""Check if AI providers are configured"""
|
|
return any([
|
|
self.ZAI_AUTH_TOKEN,
|
|
self.GEMINI_API_KEY,
|
|
self.CLAUDE_CLI_PATH,
|
|
self.GEMINI_CLI_PATH
|
|
])
|
|
|
|
@property
|
|
def processed_files_path(self) -> Path:
|
|
"""Get the path to the processed files registry"""
|
|
return Path(os.getenv("PROCESSED_FILES_PATH", str(Path(self.LOCAL_STATE_DIR) / "processed_files.txt")))
|
|
|
|
|
|
# Create global settings instance
|
|
settings = Settings()
|