Files
cbc2027/watchers/folder_watcher.py
renato97 ee8fc183be feat: Sistema CBCFacil completo con cola secuencial
- Implementa ProcessingMonitor singleton para procesamiento secuencial de archivos
- Agrega AI summary service con soporte para MiniMax API
- Agrega PDF generator para resúmenes
- Agrega watchers para monitoreo de carpeta remota
- Mejora sistema de notificaciones Telegram
- Implementa gestión de VRAM para GPU
- Configuración mediante variables de entorno (sin hardcoded secrets)
- .env y transcriptions/ agregados a .gitignore

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 15:35:39 +00:00

219 lines
6.8 KiB
Python

"""
Watcher de carpeta local.
Monitorea una carpeta por archivos nuevos y los procesa.
"""
import logging
import time
from pathlib import Path
from typing import Callable, Optional
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler, FileCreatedEvent
from config import settings
from services import WebDAVService
class FileHandler(FileSystemEventHandler):
"""Manejador de eventos del sistema de archivos."""
def __init__(
self,
on_new_file: Callable[[Path], None],
logger: Optional[logging.Logger] = None,
) -> None:
super().__init__()
self.on_new_file = on_new_file
self.logger = logger or logging.getLogger(__name__)
def on_created(self, event: FileCreatedEvent) -> None:
"""Se llama cuando se crea un nuevo archivo."""
if event.is_directory:
return
file_path = Path(event.src_path)
self.logger.info(f"New file detected: {file_path}")
# Ignorar archivos temporales
if file_path.suffix in [".tmp", ".part", ".crdownload"]:
return
# Ignorar archivos ocultos
if file_path.name.startswith("."):
return
# Esperar a que el archivo esté listo
time.sleep(1)
try:
self.on_new_file(file_path)
except Exception as e:
self.logger.error(f"Error processing file {file_path}: {e}")
class FolderWatcher:
"""Monitor de carpeta local."""
def __init__(
self,
watch_path: Optional[Path] = None,
on_new_file: Optional[Callable[[Path], None]] = None,
) -> None:
self.logger = logging.getLogger(__name__)
self.watch_path = watch_path or settings.DOWNLOADS_DIR
self.on_new_file_callback = on_new_file
self._observer: Optional[Observer] = None
self._running = False
self._processed_files: set[str] = set()
# Asegurar que la carpeta existe
self.watch_path.mkdir(parents=True, exist_ok=True)
def set_callback(self, callback: Callable[[Path], None]) -> None:
"""Establece el callback para nuevos archivos."""
self.on_new_file_callback = callback
def start(self) -> None:
"""Inicia el watcher."""
if self._running:
self.logger.warning("Watcher already running")
return
self.logger.info(f"Starting folder watcher on: {self.watch_path}")
event_handler = FileHandler(
on_new_file=self._handle_new_file,
logger=self.logger,
)
self._observer = Observer()
self._observer.schedule(event_handler, str(self.watch_path), recursive=False)
self._observer.start()
self._running = True
self.logger.info("Folder watcher started")
def stop(self) -> None:
"""Detiene el watcher."""
if not self._running:
return
self.logger.info("Stopping folder watcher")
if self._observer:
self._observer.stop()
self._observer.join()
self._observer = None
self._running = False
self.logger.info("Folder watcher stopped")
def _handle_new_file(self, file_path: Path) -> None:
"""Maneja un nuevo archivo detectado."""
file_key = str(file_path)
# Evitar procesar el mismo archivo dos veces
if file_key in self._processed_files:
return
self._processed_files.add(file_key)
self.logger.info(f"Processing new file: {file_path}")
if self.on_new_file_callback:
self.on_new_file_callback(file_path)
def get_status(self) -> dict:
"""Obtiene el estado del watcher."""
return {
"running": self._running,
"watch_path": str(self.watch_path),
"processed_files_count": len(self._processed_files),
}
class RemoteFolderWatcher:
"""Watcher que descarga archivos desde Nextcloud."""
def __init__(
self,
webdav_service: WebDAVService,
local_path: Optional[Path] = None,
remote_path: Optional[str] = None,
) -> None:
self.logger = logging.getLogger(__name__)
self.webdav = webdav_service
self.local_path = local_path or settings.DOWNLOADS_DIR
self.remote_path = remote_path or settings.WATCHED_REMOTE_PATH
self._running = False
self._last_checked_files: set[str] = set()
self._on_download: Optional[Callable[[Path], None]] = None
# Asegurar que la carpeta local existe
self.local_path.mkdir(parents=True, exist_ok=True)
def set_callback(self, callback: Callable[[Path], None]) -> None:
"""Establece el callback para archivos descargados."""
self._on_download = callback
def start(self) -> None:
"""Inicia el polling de archivos remotos."""
if self._running:
self.logger.warning("Remote watcher already running")
return
self._running = True
self.logger.info(f"Starting remote folder watcher: {self.remote_path} -> {self.local_path}")
# Primer escaneo
self._check_for_new_files()
def stop(self) -> None:
"""Detiene el watcher."""
self._running = False
self.logger.info("Remote folder watcher stopped")
def check_now(self) -> None:
"""Fuerza una verificación inmediata."""
self._check_for_new_files()
def _check_for_new_files(self) -> None:
"""Verifica si hay nuevos archivos en Nextcloud."""
if not self._running:
return
try:
files = self.webdav.list_files(self.remote_path)
current_files = set(files) - self._last_checked_files
if current_files:
self.logger.info(f"Found {len(current_files)} new files")
for filename in current_files:
if filename.strip(): # Ignorar nombres vacíos
self._download_file(filename)
self._last_checked_files = set(files)
except Exception as e:
self.logger.error(f"Error checking remote files: {e}")
def _download_file(self, filename: str) -> None:
"""Descarga un archivo individual."""
remote_path = f"{self.remote_path}/{filename}"
local_path = self.local_path / filename
self.logger.info(f"Downloading: {remote_path}")
if self.webdav.download_file(remote_path, local_path):
if self._on_download:
self._on_download(local_path)
else:
self.logger.error(f"Failed to download: {filename}")
def get_status(self) -> dict:
"""Obtiene el estado del watcher."""
return {
"running": self._running,
"remote_path": self.remote_path,
"local_path": str(self.local_path),
"last_checked_files": len(self._last_checked_files),
}