feat: Sistema CBCFacil completo con cola secuencial

- Implementa ProcessingMonitor singleton para procesamiento secuencial de archivos
- Agrega AI summary service con soporte para MiniMax API
- Agrega PDF generator para resúmenes
- Agrega watchers para monitoreo de carpeta remota
- Mejora sistema de notificaciones Telegram
- Implementa gestión de VRAM para GPU
- Configuración mediante variables de entorno (sin hardcoded secrets)
- .env y transcriptions/ agregados a .gitignore

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
renato97
2026-02-25 15:35:39 +00:00
parent dcf887c510
commit ee8fc183be
77 changed files with 3734 additions and 20263 deletions

View File

@@ -1,290 +1,102 @@
"""
WebDAV service for Nextcloud integration
Cliente WebDAV para Nextcloud.
Provee métodos para interactuar con Nextcloud via WebDAV.
"""
import logging
import os
import time
import unicodedata
import re
from pathlib import Path
from typing import Optional, List, Dict, Tuple
from contextlib import contextmanager
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from requests.auth import HTTPBasicAuth
from requests.adapters import HTTPAdapter
from typing import Optional
from webdav3.client import Client
from config import settings
from core import WebDAVError
class WebDAVService:
"""Service for WebDAV operations with Nextcloud"""
"""Cliente WebDAV para Nextcloud."""
def __init__(self):
self.session: Optional[requests.Session] = None
def __init__(self) -> None:
self.logger = logging.getLogger(__name__)
self._retry_delay = 1
self._max_retries = settings.WEBDAV_MAX_RETRIES
self._client: Optional[Client] = None
def initialize(self) -> None:
"""Initialize WebDAV session"""
if not settings.has_webdav_config:
raise WebDAVError("WebDAV credentials not configured")
def _get_client(self) -> Client:
"""Obtiene o crea el cliente WebDAV."""
if self._client is None:
if not settings.has_webdav_config:
raise RuntimeError("WebDAV configuration missing")
self.session = requests.Session()
self.session.auth = HTTPBasicAuth(settings.NEXTCLOUD_USER, settings.NEXTCLOUD_PASSWORD)
options = {
"webdav_hostname": settings.NEXTCLOUD_URL,
"webdav_login": settings.NEXTCLOUD_USER,
"webdav_password": settings.NEXTCLOUD_PASSWORD,
}
self._client = Client(options)
self._client.verify = True # Verificar SSL
# Configure HTTP adapter with retry strategy
adapter = HTTPAdapter(
max_retries=0, # We'll handle retries manually
pool_connections=10,
pool_maxsize=20
)
self.session.mount('https://', adapter)
self.session.mount('http://', adapter)
return self._client
# Test connection
def test_connection(self) -> bool:
"""Prueba la conexión con Nextcloud."""
try:
self._request('GET', '', timeout=5)
self.logger.info("WebDAV connection established")
client = self._get_client()
return client.check()
except Exception as e:
raise WebDAVError(f"Failed to connect to WebDAV: {e}")
def cleanup(self) -> None:
"""Cleanup WebDAV session"""
if self.session:
self.session.close()
self.session = None
@staticmethod
def normalize_path(path: str) -> str:
"""Normalize remote paths to a consistent representation"""
if not path:
return ""
normalized = unicodedata.normalize("NFC", str(path)).strip()
if not normalized:
return ""
normalized = normalized.replace("\\", "/")
normalized = re.sub(r"/+", "/", normalized)
return normalized.lstrip("/")
def _build_url(self, remote_path: str) -> str:
"""Build WebDAV URL"""
path = self.normalize_path(remote_path)
base_url = settings.WEBDAV_ENDPOINT.rstrip('/')
return f"{base_url}/{path}"
def _request(self, method: str, remote_path: str, **kwargs) -> requests.Response:
"""Make HTTP request to WebDAV with retries"""
if not self.session:
raise WebDAVError("WebDAV session not initialized")
url = self._build_url(remote_path)
timeout = kwargs.pop('timeout', settings.HTTP_TIMEOUT)
for attempt in range(self._max_retries):
try:
response = self.session.request(method, url, timeout=timeout, **kwargs)
if response.status_code < 400:
return response
elif response.status_code == 404:
raise WebDAVError(f"Resource not found: {remote_path}")
else:
raise WebDAVError(f"HTTP {response.status_code}: {response.text}")
except (requests.RequestException, requests.Timeout) as e:
if attempt == self._max_retries - 1:
raise WebDAVError(f"Request failed after {self._max_retries} retries: {e}")
delay = self._retry_delay * (2 ** attempt)
self.logger.warning(f"Request failed (attempt {attempt + 1}/{self._max_retries}), retrying in {delay}s...")
time.sleep(delay)
raise WebDAVError("Max retries exceeded")
def list(self, remote_path: str = "") -> List[str]:
"""List files in remote directory"""
self.logger.debug(f"Listing remote directory: {remote_path}")
response = self._request('PROPFIND', remote_path, headers={'Depth': '1'})
return self._parse_propfind_response(response.text)
def _parse_propfind_response(self, xml_response: str) -> List[str]:
"""Parse PROPFIND XML response and return only files (not directories)"""
# Simple parser for PROPFIND response
files = []
try:
import xml.etree.ElementTree as ET
from urllib.parse import urlparse, unquote
root = ET.fromstring(xml_response)
# Get the WebDAV path from settings
parsed_url = urlparse(settings.NEXTCLOUD_URL)
webdav_path = parsed_url.path.rstrip('/') # e.g. /remote.php/webdav
# Find all response elements
for response in root.findall('.//{DAV:}response'):
href = response.find('.//{DAV:}href')
if href is None or href.text is None:
continue
href_text = unquote(href.text) # Decode URL encoding
# Check if this is a directory (has collection resourcetype)
propstat = response.find('.//{DAV:}propstat')
is_directory = False
if propstat is not None:
prop = propstat.find('.//{DAV:}prop')
if prop is not None:
resourcetype = prop.find('.//{DAV:}resourcetype')
if resourcetype is not None and resourcetype.find('.//{DAV:}collection') is not None:
is_directory = True
# Skip directories
if is_directory:
continue
# Also skip paths ending with / (another way to detect directories)
if href_text.endswith('/'):
continue
# Remove base URL from href
base_url = settings.NEXTCLOUD_URL.rstrip('/')
if href_text.startswith(base_url):
href_text = href_text[len(base_url):]
# Also strip the webdav path if it's there
if href_text.startswith(webdav_path):
href_text = href_text[len(webdav_path):]
# Clean up the path
href_text = href_text.lstrip('/')
if href_text: # Skip empty paths (root directory)
files.append(href_text)
except Exception as e:
self.logger.error(f"Error parsing PROPFIND response: {e}")
return files
def download(self, remote_path: str, local_path: Path) -> None:
"""Download file from WebDAV"""
self.logger.info(f"Downloading {remote_path} to {local_path}")
# Ensure local directory exists
local_path.parent.mkdir(parents=True, exist_ok=True)
response = self._request('GET', remote_path, stream=True)
# Use larger buffer size for better performance
with open(local_path, 'wb', buffering=65536) as f:
for chunk in response.iter_content(chunk_size=settings.DOWNLOAD_CHUNK_SIZE):
if chunk:
f.write(chunk)
self.logger.debug(f"Download completed: {local_path}")
def upload(self, local_path: Path, remote_path: str) -> None:
"""Upload file to WebDAV"""
self.logger.info(f"Uploading {local_path} to {remote_path}")
# Ensure remote directory exists
remote_dir = self.normalize_path(remote_path)
if '/' in remote_dir:
dir_path = '/'.join(remote_dir.split('/')[:-1])
self.makedirs(dir_path)
with open(local_path, 'rb') as f:
self._request('PUT', remote_path, data=f)
self.logger.debug(f"Upload completed: {remote_path}")
def mkdir(self, remote_path: str) -> None:
"""Create directory on WebDAV"""
self.makedirs(remote_path)
def makedirs(self, remote_path: str) -> None:
"""Create directory and parent directories on WebDAV"""
path = self.normalize_path(remote_path)
if not path:
return
parts = path.split('/')
current = ""
for part in parts:
current = f"{current}/{part}" if current else part
try:
self._request('MKCOL', current)
self.logger.debug(f"Created directory: {current}")
except WebDAVError as e:
# Directory might already exist (409 Conflict or 405 MethodNotAllowed is OK)
if '409' not in str(e) and '405' not in str(e):
raise
def delete(self, remote_path: str) -> None:
"""Delete file or directory from WebDAV"""
self.logger.info(f"Deleting remote path: {remote_path}")
self._request('DELETE', remote_path)
def exists(self, remote_path: str) -> bool:
"""Check if remote path exists"""
try:
self._request('HEAD', remote_path)
return True
except WebDAVError:
self.logger.error(f"WebDAV connection failed: {e}")
return False
def upload_batch(
self,
files: List[Tuple[Path, str]],
max_workers: int = 4,
timeout: int = 120
) -> Dict[str, bool]:
"""
Upload multiple files concurrently.
def list_files(self, remote_path: str = "/") -> list[str]:
"""Lista archivos en una ruta remota."""
try:
client = self._get_client()
# Asegurar que la ruta empieza con /
if not remote_path.startswith("/"):
remote_path = "/" + remote_path
Args:
files: List of (local_path, remote_path) tuples
max_workers: Maximum concurrent uploads
timeout: Timeout per upload in seconds
files = client.list(remote_path)
return files if files else []
except Exception as e:
self.logger.error(f"Failed to list files: {e}")
return []
Returns:
Dict mapping remote_path to success status
"""
if not files:
def download_file(self, remote_path: str, local_path: Path) -> bool:
"""Descarga un archivo desde Nextcloud."""
try:
client = self._get_client()
local_path.parent.mkdir(parents=True, exist_ok=True)
client.download_sync(remote_path=str(remote_path), local_path=str(local_path))
self.logger.info(f"Downloaded: {remote_path} -> {local_path}")
return True
except Exception as e:
self.logger.error(f"Failed to download {remote_path}: {e}")
return False
def get_file_info(self, remote_path: str) -> dict:
"""Obtiene información de un archivo."""
try:
client = self._get_client()
info = client.info(remote_path)
return {
"name": info.get("name", ""),
"size": info.get("size", 0),
"modified": info.get("modified", ""),
}
except Exception as e:
self.logger.error(f"Failed to get file info: {e}")
return {}
results: Dict[str, bool] = {}
def file_exists(self, remote_path: str) -> bool:
"""Verifica si un archivo existe en remoto."""
try:
client = self._get_client()
return client.check(remote_path)
except Exception:
return False
with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all upload tasks
future_to_path = {
executor.submit(self.upload, local, remote): remote
for local, remote in files
}
# Collect results as they complete
for future in as_completed(future_to_path, timeout=timeout):
remote_path = future_to_path[future]
try:
future.result()
results[remote_path] = True
self.logger.info(f"Successfully uploaded: {remote_path}")
except Exception as e:
results[remote_path] = False
self.logger.error(f"Failed to upload {remote_path}: {e}")
failed_count = sum(1 for success in results.values() if not success)
if failed_count > 0:
self.logger.warning(
f"Batch upload completed with {failed_count} failures "
f"({len(results) - failed_count}/{len(results)} successful)"
)
else:
self.logger.info(
f"Batch upload completed: {len(results)} files uploaded successfully"
)
return results
# Global instance
webdav_service = WebDAVService()
def upload_file(self, local_path: Path, remote_path: str) -> bool:
"""Sube un archivo a Nextcloud."""
try:
client = self._get_client()
client.upload_sync(local_path=str(local_path), remote_path=str(remote_path))
self.logger.info(f"Uploaded: {local_path} -> {remote_path}")
return True
except Exception as e:
self.logger.error(f"Failed to upload {local_path}: {e}")
return False