cbcren2026/main.py

#!/usr/bin/env python3
"""
CBCFacil - Main Service Entry Point
Unified AI service for document processing (audio, PDF, text)
"""

import logging
import sys
import time
import fcntl
import os
import json
import threading
from pathlib import Path
from datetime import datetime
from typing import Optional

# Load environment variables from .env file
from dotenv import load_dotenv

load_dotenv()


# Configure logging with JSON formatter for production
class JSONFormatter(logging.Formatter):
    """JSON formatter for structured logging in production"""

    def format(self, record):
        log_entry = {
            "timestamp": datetime.utcnow().isoformat() + "Z",
            "level": record.levelname,
            "message": record.getMessage(),
            "module": record.module,
            "function": record.funcName,
            "line": record.lineno,
        }

        # Add exception info if present
        if record.exc_info:
            log_entry["exception"] = self.formatException(record.exc_info)

        return json.dumps(log_entry)


def setup_logging() -> logging.Logger:
    """Setup logging configuration"""
    from config import settings

    # Create logger
    logger = logging.getLogger(__name__)
    logger.setLevel(getattr(logging, settings.LOG_LEVEL.upper()))

    # Remove existing handlers
    logger.handlers.clear()

    # Console handler
    console_handler = logging.StreamHandler(sys.stdout)
    if settings.is_production:
        console_handler.setFormatter(JSONFormatter())
    else:
        console_handler.setFormatter(
            logging.Formatter("%(asctime)s [%(levelname)s] - %(name)s - %(message)s")
        )
    logger.addHandler(console_handler)

    # File handler if configured
    if settings.LOG_FILE:
        file_handler = logging.FileHandler(settings.LOG_FILE)
        file_handler.setFormatter(JSONFormatter())
        logger.addHandler(file_handler)

    return logger


logger = setup_logging()


def acquire_lock() -> int:
    """Acquire single instance lock"""
    lock_file = (
        Path(os.getenv("LOCAL_STATE_DIR", str(Path(__file__).parent)))
        / ".main_service.lock"
    )
    lock_file.parent.mkdir(parents=True, exist_ok=True)
    lock_fd = open(lock_file, "w")
    fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
    lock_fd.write(str(os.getpid()))
    lock_fd.flush()
    logger.info(f"Lock acquired. PID: {os.getpid()}")
    return lock_fd


def release_lock(lock_fd) -> None:
    """Release lock"""
    try:
        fcntl.flock(lock_fd.fileno(), fcntl.LOCK_UN)
        lock_fd.close()
    except Exception as e:
        logger.warning(f"Could not release lock: {e}")


def validate_configuration() -> None:
    """Validate configuration at startup"""
    from config.validators import validate_environment, ConfigurationError

    try:
        warnings = validate_environment()
        if warnings:
            logger.info(
                f"Configuration validation completed with {len(warnings)} warnings"
            )
    except ConfigurationError as e:
        logger.error(f"Configuration validation failed: {e}")
        raise


def check_service_health() -> dict:
    """
    Check health of all external services
    Returns dict with health status
    """
    from config import settings
    from services.webdav_service import webdav_service

    health_status = {
        "timestamp": datetime.utcnow().isoformat(),
        "status": "healthy",
        "services": {},
    }

    # Check WebDAV
    try:
        if settings.has_webdav_config:
            # Try a simple operation
            webdav_service.list(".")
            health_status["services"]["webdav"] = {"status": "healthy"}
        else:
            health_status["services"]["webdav"] = {"status": "not_configured"}
    except Exception as e:
        health_status["services"]["webdav"] = {"status": "unhealthy", "error": str(e)}
        health_status["status"] = "degraded"

    # Check Telegram
    try:
        from services.telegram_service import telegram_service

        if telegram_service.is_configured:
            health_status["services"]["telegram"] = {"status": "healthy"}
        else:
            health_status["services"]["telegram"] = {"status": "not_configured"}
    except Exception as e:
        health_status["services"]["telegram"] = {
            "status": "unavailable",
            "error": str(e),
        }

    # Check VRAM manager
    try:
        from services.vram_manager import vram_manager

        vram_info = vram_manager.get_vram_info()
        health_status["services"]["vram"] = {
            "status": "healthy",
            "available_gb": vram_info.get("free", 0) / (1024**3),
        }
    except Exception as e:
        health_status["services"]["vram"] = {"status": "unavailable", "error": str(e)}

    return health_status


def initialize_services() -> None:
    """Initialize all services with configuration validation"""
    from config import settings
    from services.webdav_service import webdav_service
    from services.vram_manager import vram_manager
    from services.telegram_service import telegram_service
    from storage.processed_registry import processed_registry

    logger.info("Initializing services...")

    # Validate configuration
    validate_configuration()

    # Warn if WebDAV not configured
    if not settings.has_webdav_config:
        logger.warning("WebDAV not configured - file sync functionality disabled")

    # Warn if AI providers not configured
    if not settings.has_ai_config:
        logger.warning("AI providers not configured - summary generation will not work")

    # Configure Telegram if credentials available
    if settings.TELEGRAM_TOKEN and settings.TELEGRAM_CHAT_ID:
        try:
            telegram_service.configure(
                settings.TELEGRAM_TOKEN, settings.TELEGRAM_CHAT_ID
            )
            telegram_service.send_start_notification()
            logger.info("Telegram notifications enabled")
        except Exception as e:
            logger.error(f"Failed to configure Telegram: {e}")

    # Configure Notion if credentials available
    if settings.has_notion_config:
        try:
            from services.notion_service import notion_service

            notion_service.configure(
                settings.NOTION_API_TOKEN, settings.NOTION_DATABASE_ID
            )
            logger.info("✅ Notion integration enabled")
        except Exception as e:
            logger.error(f"Failed to configure Notion: {e}")
    else:
        logger.info("Notion not configured - upload to Notion disabled")

    # Initialize WebDAV if configured
    if settings.has_webdav_config:
        try:
            webdav_service.initialize()
            logger.info("WebDAV service initialized")
        except Exception as e:
            logger.error(f"Failed to initialize WebDAV: {e}")
            logger.exception("WebDAV initialization error details")
    else:
        logger.info("Skipping WebDAV initialization (not configured)")

    # Initialize VRAM manager
    try:
        vram_manager.initialize()
        logger.info("VRAM manager initialized")
    except Exception as e:
        logger.error(f"Failed to initialize VRAM manager: {e}")
        logger.exception("VRAM manager initialization error details")

    # Initialize processed registry
    try:
        processed_registry.initialize()
        logger.info("Processed registry initialized")
    except Exception as e:
        logger.error(f"Failed to initialize processed registry: {e}")
        logger.exception("Registry initialization error details")

    # Run health check
    health = check_service_health()
    logger.info(f"Initial health check: {json.dumps(health, indent=2)}")

    logger.info("All services initialized successfully")


def send_error_notification(error_type: str, error_message: str) -> None:
    """Send error notification via Telegram"""
    try:
        from services.telegram_service import telegram_service

        if telegram_service.is_configured:
            telegram_service.send_error_notification(error_type, error_message)
    except Exception as e:
        logger.warning(f"Failed to send error notification: {e}")


def run_dashboard_thread() -> None:
    """Run Flask dashboard in a separate thread"""
    try:
        from api.routes import create_app

        app = create_app()

        # Run Flask in production mode with threaded=True
        app.run(
            host="0.0.0.0",
            port=5000,
            debug=False,
            threaded=True,
            use_reloader=False,  # Important: disable reloader in thread
        )
    except Exception as e:
        logger.error(f"Dashboard thread error: {e}")
        logger.exception("Dashboard thread exception details")


def start_dashboard() -> threading.Thread:
    """Start dashboard in a background daemon thread"""
    dashboard_port = int(os.getenv("DASHBOARD_PORT", "5000"))
    logger.info(f"Starting dashboard on port {dashboard_port}...")

    # Create daemon thread so it doesn't block shutdown
    dashboard_thread = threading.Thread(
        target=run_dashboard_thread, name="DashboardThread", daemon=True
    )
    dashboard_thread.start()
    logger.info(f"Dashboard thread started (Thread-ID: {dashboard_thread.ident})")
    return dashboard_thread


def run_main_loop() -> None:
    """Main processing loop with improved error handling"""
    from config import settings
    from services.webdav_service import webdav_service
    from storage.processed_registry import processed_registry
    from processors.audio_processor import AudioProcessor
    from processors.pdf_processor import PDFProcessor
    from processors.text_processor import TextProcessor

    audio_processor = AudioProcessor()
    pdf_processor = PDFProcessor()
    text_processor = TextProcessor()

    consecutive_errors = 0
    max_consecutive_errors = 5

    while True:
        try:
            logger.info("--- Polling for new files ---")
            processed_registry.load()

            # Process PDFs
            if settings.has_webdav_config:
                try:
                    webdav_service.mkdir(settings.REMOTE_PDF_FOLDER)
                    pdf_files = webdav_service.list(settings.REMOTE_PDF_FOLDER)
                    for file_path in pdf_files:
                        if file_path.lower().endswith(".pdf"):
                            if not processed_registry.is_processed(file_path):
                                from pathlib import Path
                                from urllib.parse import unquote
                                from services.telegram_service import telegram_service

                                local_filename = unquote(Path(file_path).name)
                                base_name = Path(local_filename).stem
                                local_path = (
                                    settings.LOCAL_DOWNLOADS_PATH / local_filename
                                )
                                settings.LOCAL_DOWNLOADS_PATH.mkdir(
                                    parents=True, exist_ok=True
                                )

                                # Step 1: Notify and download
                                telegram_service.send_message(
                                    f"📄 Nuevo PDF detectado: {local_filename}\n"
                                    f"⬇️ Descargando..."
                                )
                                logger.info(
                                    f"Downloading PDF: {file_path} -> {local_path}"
                                )
                                webdav_service.download(file_path, local_path)

                                # Step 2: Process PDF
                                telegram_service.send_message(
                                    f"🔍 Procesando PDF con OCR..."
                                )
                                pdf_processor.process(str(local_path))

                                processed_registry.save(file_path)
                except Exception as e:
                    logger.exception(f"Error processing PDFs: {e}")
                    send_error_notification("pdf_processing", str(e))

            # Process Audio files
            if settings.has_webdav_config:
                try:
                    audio_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER)
                    for file_path in audio_files:
                        if any(
                            file_path.lower().endswith(ext)
                            for ext in settings.AUDIO_EXTENSIONS
                        ):
                            if not processed_registry.is_processed(file_path):
                                from pathlib import Path
                                from urllib.parse import unquote
                                from document.generators import DocumentGenerator
                                from services.telegram_service import telegram_service

                                local_filename = unquote(Path(file_path).name)
                                base_name = Path(local_filename).stem
                                local_path = (
                                    settings.LOCAL_DOWNLOADS_PATH / local_filename
                                )
                                settings.LOCAL_DOWNLOADS_PATH.mkdir(
                                    parents=True, exist_ok=True
                                )

                                # Step 1: Notify and download
                                telegram_service.send_message(
                                    f"🎵 Nuevo audio detectado: {local_filename}\n"
                                    f"⬇️ Descargando..."
                                )
                                logger.info(
                                    f"Downloading audio: {file_path} -> {local_path}"
                                )
                                webdav_service.download(file_path, local_path)

                                # Step 2: Transcribe
                                telegram_service.send_message(
                                    f"📝 Transcribiendo audio con Whisper..."
                                )
                                result = audio_processor.process(str(local_path))

                                if result.get("success") and result.get(
                                    "transcription_path"
                                ):
                                    transcription_file = Path(
                                        result["transcription_path"]
                                    )
                                    transcription_text = result.get("text", "")

                                    # Step 3: Generate AI summary and documents
                                    telegram_service.send_message(
                                        f"🤖 Generando resumen académico LaTeX..."
                                    )
                                    doc_generator = DocumentGenerator(
                                        notification_callback=lambda msg: telegram_service.send_message(msg)
                                    )
                                    success, summary, output_files = (
                                        doc_generator.generate_summary(
                                            transcription_text, base_name
                                        )
                                    )

                                    # Step 4: Upload all files to Nextcloud
                                    if success and output_files:
                                        # Create folders
                                        for folder in [
                                            settings.RESUMENES_FOLDER,
                                            settings.DOCX_FOLDER,
                                        ]:
                                            try:
                                                webdav_service.makedirs(folder)
                                            except Exception:
                                                pass

                                        # Upload all files in parallel using batch upload
                                        upload_tasks = []

                                        # Upload transcription TXT
                                        if transcription_file.exists():
                                            remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
                                            upload_tasks.append((transcription_file, remote_txt))

                                        # Upload DOCX
                                        docx_path = Path(
                                            output_files.get("docx_path", "")
                                        )
                                        if docx_path.exists():
                                            remote_docx = f"{settings.DOCX_FOLDER}/{docx_path.name}"
                                            upload_tasks.append((docx_path, remote_docx))

                                        # Upload PDF
                                        pdf_path = Path(
                                            output_files.get("pdf_path", "")
                                        )
                                        if pdf_path.exists():
                                            remote_pdf = f"{settings.DOCX_FOLDER}/{pdf_path.name}"
                                            upload_tasks.append((pdf_path, remote_pdf))

                                        # Upload Markdown
                                        md_path = Path(
                                            output_files.get("markdown_path", "")
                                        )
                                        if md_path.exists():
                                            remote_md = f"{settings.RESUMENES_FOLDER}/{md_path.name}"
                                            upload_tasks.append((md_path, remote_md))

                                        # Execute parallel uploads
                                        if upload_tasks:
                                            upload_results = webdav_service.upload_batch(
                                                upload_tasks, max_workers=4, timeout=120
                                            )
                                            logger.info(f"Parallel upload complete: {len(upload_results)} files")

                                        # Final notification
                                        telegram_service.send_message(
                                            f"✅ Audio procesado: {local_filename}\n"
                                            f"📄 DOCX: {docx_path.name if docx_path.exists() else 'N/A'}\n"
                                            f"📑 PDF: {pdf_path.name if pdf_path.exists() else 'N/A'}\n"
                                            f"☁️ Subido a Nextcloud"
                                        )
                                    else:
                                        # Just upload transcription if summary failed
                                        if transcription_file.exists():
                                            try:
                                                webdav_service.makedirs(
                                                    settings.RESUMENES_FOLDER
                                                )
                                            except Exception:
                                                pass
                                            remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
                                            webdav_service.upload(
                                                transcription_file, remote_txt
                                            )
                                            telegram_service.send_message(
                                                f"⚠️ Resumen fallido, solo transcripción subida:\n{transcription_file.name}"
                                            )

                                processed_registry.save(file_path)
                except Exception as e:
                    logger.exception(f"Error processing audio: {e}")
                    send_error_notification("audio_processing", str(e))

            # Process Text files
            if settings.has_webdav_config:
                try:
                    text_files = webdav_service.list(settings.REMOTE_TXT_FOLDER)
                    for file_path in text_files:
                        if any(
                            file_path.lower().endswith(ext)
                            for ext in settings.TXT_EXTENSIONS
                        ):
                            if not processed_registry.is_processed(file_path):
                                text_processor.process(file_path)
                                processed_registry.save(file_path)
                except Exception as e:
                    logger.exception(f"Error processing text: {e}")
                    send_error_notification("text_processing", str(e))

            # Reset error counter on success
            consecutive_errors = 0

        except Exception as e:
            # Improved error logging with full traceback
            logger.exception(f"Critical error in main loop: {e}")

            # Send notification for critical errors
            send_error_notification("main_loop", str(e))

            # Track consecutive errors
            consecutive_errors += 1

            if consecutive_errors >= max_consecutive_errors:
                logger.critical(
                    f"Too many consecutive errors ({consecutive_errors}). "
                    "Service may be unstable. Consider checking configuration."
                )
                send_error_notification(
                    "consecutive_errors",
                    f"Service has failed {consecutive_errors} consecutive times",
                )

            # Don't exit, let the loop continue with backoff
            logger.info(f"Waiting {settings.POLL_INTERVAL * 2} seconds before retry...")
            time.sleep(settings.POLL_INTERVAL * 2)
            continue

        logger.info(f"Cycle completed. Waiting {settings.POLL_INTERVAL} seconds...")
        time.sleep(settings.POLL_INTERVAL)


def main():
    """Main entry point"""
    lock_fd = None
    dashboard_thread = None
    try:
        logger.info("=== CBCFacil Service Started ===")
        logger.info(f"Version: {os.getenv('APP_VERSION', '8.0')}")
        logger.info(
            f"Environment: {'production' if os.getenv('DEBUG', 'false').lower() != 'true' else 'development'}"
        )

        lock_fd = acquire_lock()
        initialize_services()

        # Start dashboard in background thread
        dashboard_thread = start_dashboard()

        # Run main processing loop
        run_main_loop()

    except KeyboardInterrupt:
        logger.info("Shutdown requested by user")
    except Exception as e:
        logger.exception(f"Fatal error in main: {e}")
        send_error_notification("fatal_error", str(e))
        sys.exit(1)
    finally:
        if lock_fd:
            release_lock(lock_fd)
        logger.info("=== CBCFacil Service Stopped ===")


if __name__ == "__main__":
    # Handle CLI commands
    if len(sys.argv) > 1:
        command = sys.argv[1]
        if command == "whisper" and len(sys.argv) == 4:
            from processors.audio_processor import AudioProcessor

            AudioProcessor().process(sys.argv[2])
        elif command == "pdf" and len(sys.argv) == 4:
            from processors.pdf_processor import PDFProcessor

            PDFProcessor().process(sys.argv[2])
        elif command == "health":
            from main import check_service_health

            health = check_service_health()
            print(json.dumps(health, indent=2))
        else:
            print("Usage: python main.py [whisper|pdf|health]")
            sys.exit(1)
    else:
        main()