""" Watcher de carpeta local. Monitorea una carpeta por archivos nuevos y los procesa. """ import logging import time from pathlib import Path from typing import Callable, Optional from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler, FileCreatedEvent from config import settings from services import WebDAVService class FileHandler(FileSystemEventHandler): """Manejador de eventos del sistema de archivos.""" def __init__( self, on_new_file: Callable[[Path], None], logger: Optional[logging.Logger] = None, ) -> None: super().__init__() self.on_new_file = on_new_file self.logger = logger or logging.getLogger(__name__) def on_created(self, event: FileCreatedEvent) -> None: """Se llama cuando se crea un nuevo archivo.""" if event.is_directory: return file_path = Path(event.src_path) self.logger.info(f"New file detected: {file_path}") # Ignorar archivos temporales if file_path.suffix in [".tmp", ".part", ".crdownload"]: return # Ignorar archivos ocultos if file_path.name.startswith("."): return # Esperar a que el archivo esté listo time.sleep(1) try: self.on_new_file(file_path) except Exception as e: self.logger.error(f"Error processing file {file_path}: {e}") class FolderWatcher: """Monitor de carpeta local.""" def __init__( self, watch_path: Optional[Path] = None, on_new_file: Optional[Callable[[Path], None]] = None, ) -> None: self.logger = logging.getLogger(__name__) self.watch_path = watch_path or settings.DOWNLOADS_DIR self.on_new_file_callback = on_new_file self._observer: Optional[Observer] = None self._running = False self._processed_files: set[str] = set() # Asegurar que la carpeta existe self.watch_path.mkdir(parents=True, exist_ok=True) def set_callback(self, callback: Callable[[Path], None]) -> None: """Establece el callback para nuevos archivos.""" self.on_new_file_callback = callback def start(self) -> None: """Inicia el watcher.""" if self._running: self.logger.warning("Watcher already running") return self.logger.info(f"Starting folder watcher on: {self.watch_path}") event_handler = FileHandler( on_new_file=self._handle_new_file, logger=self.logger, ) self._observer = Observer() self._observer.schedule(event_handler, str(self.watch_path), recursive=False) self._observer.start() self._running = True self.logger.info("Folder watcher started") def stop(self) -> None: """Detiene el watcher.""" if not self._running: return self.logger.info("Stopping folder watcher") if self._observer: self._observer.stop() self._observer.join() self._observer = None self._running = False self.logger.info("Folder watcher stopped") def _handle_new_file(self, file_path: Path) -> None: """Maneja un nuevo archivo detectado.""" file_key = str(file_path) # Evitar procesar el mismo archivo dos veces if file_key in self._processed_files: return self._processed_files.add(file_key) self.logger.info(f"Processing new file: {file_path}") if self.on_new_file_callback: self.on_new_file_callback(file_path) def get_status(self) -> dict: """Obtiene el estado del watcher.""" return { "running": self._running, "watch_path": str(self.watch_path), "processed_files_count": len(self._processed_files), } class RemoteFolderWatcher: """Watcher que descarga archivos desde Nextcloud.""" def __init__( self, webdav_service: WebDAVService, local_path: Optional[Path] = None, remote_path: Optional[str] = None, ) -> None: self.logger = logging.getLogger(__name__) self.webdav = webdav_service self.local_path = local_path or settings.DOWNLOADS_DIR self.remote_path = remote_path or settings.WATCHED_REMOTE_PATH self._running = False self._last_checked_files: set[str] = set() self._on_download: Optional[Callable[[Path], None]] = None # Asegurar que la carpeta local existe self.local_path.mkdir(parents=True, exist_ok=True) def set_callback(self, callback: Callable[[Path], None]) -> None: """Establece el callback para archivos descargados.""" self._on_download = callback def start(self) -> None: """Inicia el polling de archivos remotos.""" if self._running: self.logger.warning("Remote watcher already running") return self._running = True self.logger.info(f"Starting remote folder watcher: {self.remote_path} -> {self.local_path}") # Primer escaneo self._check_for_new_files() def stop(self) -> None: """Detiene el watcher.""" self._running = False self.logger.info("Remote folder watcher stopped") def check_now(self) -> None: """Fuerza una verificación inmediata.""" self._check_for_new_files() def _check_for_new_files(self) -> None: """Verifica si hay nuevos archivos en Nextcloud.""" if not self._running: return try: files = self.webdav.list_files(self.remote_path) current_files = set(files) - self._last_checked_files if current_files: self.logger.info(f"Found {len(current_files)} new files") for filename in current_files: if filename.strip(): # Ignorar nombres vacíos self._download_file(filename) self._last_checked_files = set(files) except Exception as e: self.logger.error(f"Error checking remote files: {e}") def _download_file(self, filename: str) -> None: """Descarga un archivo individual.""" remote_path = f"{self.remote_path}/{filename}" local_path = self.local_path / filename # Verificar si ya existe el archivo localmente if local_path.exists(): # Verificar si ya fue procesado (existe transcripción) stem = local_path.stem transcriptions_dir = self.local_path.parent / "transcriptions" processed = False if transcriptions_dir.exists(): for f in transcriptions_dir.iterdir(): if f.suffix == ".txt" and stem in f.stem: processed = True break if processed: self.logger.info(f"Skipping already processed file: {filename}") return else: # Existe pero no procesado, re-descargar self.logger.info(f"Re-downloading incomplete file: {filename}") self.logger.info(f"Downloading: {remote_path}") if self.webdav.download_file(remote_path, local_path): if self._on_download: self._on_download(local_path) else: self.logger.error(f"Failed to download: {filename}") def get_status(self) -> dict: """Obtiene el estado del watcher.""" return { "running": self._running, "remote_path": self.remote_path, "local_path": str(self.local_path), "last_checked_files": len(self._last_checked_files), }