Initial commit - cleaned for CV
This commit is contained in:
602
main.py
Normal file
602
main.py
Normal file
@@ -0,0 +1,602 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
CBCFacil - Main Service Entry Point
|
||||
Unified AI service for document processing (audio, PDF, text)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import time
|
||||
import fcntl
|
||||
import os
|
||||
import json
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
# Load environment variables from .env file
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# Configure logging with JSON formatter for production
|
||||
class JSONFormatter(logging.Formatter):
|
||||
"""JSON formatter for structured logging in production"""
|
||||
|
||||
def format(self, record):
|
||||
log_entry = {
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"level": record.levelname,
|
||||
"message": record.getMessage(),
|
||||
"module": record.module,
|
||||
"function": record.funcName,
|
||||
"line": record.lineno,
|
||||
}
|
||||
|
||||
# Add exception info if present
|
||||
if record.exc_info:
|
||||
log_entry["exception"] = self.formatException(record.exc_info)
|
||||
|
||||
return json.dumps(log_entry)
|
||||
|
||||
|
||||
def setup_logging() -> logging.Logger:
|
||||
"""Setup logging configuration"""
|
||||
from config import settings
|
||||
|
||||
# Create logger
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(getattr(logging, settings.LOG_LEVEL.upper()))
|
||||
|
||||
# Remove existing handlers
|
||||
logger.handlers.clear()
|
||||
|
||||
# Console handler
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
if settings.is_production:
|
||||
console_handler.setFormatter(JSONFormatter())
|
||||
else:
|
||||
console_handler.setFormatter(
|
||||
logging.Formatter("%(asctime)s [%(levelname)s] - %(name)s - %(message)s")
|
||||
)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# File handler if configured
|
||||
if settings.LOG_FILE:
|
||||
file_handler = logging.FileHandler(settings.LOG_FILE)
|
||||
file_handler.setFormatter(JSONFormatter())
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
logger = setup_logging()
|
||||
|
||||
|
||||
def acquire_lock() -> int:
|
||||
"""Acquire single instance lock"""
|
||||
lock_file = (
|
||||
Path(os.getenv("LOCAL_STATE_DIR", str(Path(__file__).parent)))
|
||||
/ ".main_service.lock"
|
||||
)
|
||||
lock_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_fd = open(lock_file, "w")
|
||||
fcntl.flock(lock_fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
lock_fd.write(str(os.getpid()))
|
||||
lock_fd.flush()
|
||||
logger.info(f"Lock acquired. PID: {os.getpid()}")
|
||||
return lock_fd
|
||||
|
||||
|
||||
def release_lock(lock_fd) -> None:
|
||||
"""Release lock"""
|
||||
try:
|
||||
fcntl.flock(lock_fd.fileno(), fcntl.LOCK_UN)
|
||||
lock_fd.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not release lock: {e}")
|
||||
|
||||
|
||||
def validate_configuration() -> None:
|
||||
"""Validate configuration at startup"""
|
||||
from config.validators import validate_environment, ConfigurationError
|
||||
|
||||
try:
|
||||
warnings = validate_environment()
|
||||
if warnings:
|
||||
logger.info(
|
||||
f"Configuration validation completed with {len(warnings)} warnings"
|
||||
)
|
||||
except ConfigurationError as e:
|
||||
logger.error(f"Configuration validation failed: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def check_service_health() -> dict:
|
||||
"""
|
||||
Check health of all external services
|
||||
Returns dict with health status
|
||||
"""
|
||||
from config import settings
|
||||
from services.webdav_service import webdav_service
|
||||
|
||||
health_status = {
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"status": "healthy",
|
||||
"services": {},
|
||||
}
|
||||
|
||||
# Check WebDAV
|
||||
try:
|
||||
if settings.has_webdav_config:
|
||||
# Try a simple operation
|
||||
webdav_service.list(".")
|
||||
health_status["services"]["webdav"] = {"status": "healthy"}
|
||||
else:
|
||||
health_status["services"]["webdav"] = {"status": "not_configured"}
|
||||
except Exception as e:
|
||||
health_status["services"]["webdav"] = {"status": "unhealthy", "error": str(e)}
|
||||
health_status["status"] = "degraded"
|
||||
|
||||
# Check Telegram
|
||||
try:
|
||||
from services.telegram_service import telegram_service
|
||||
|
||||
if telegram_service.is_configured:
|
||||
health_status["services"]["telegram"] = {"status": "healthy"}
|
||||
else:
|
||||
health_status["services"]["telegram"] = {"status": "not_configured"}
|
||||
except Exception as e:
|
||||
health_status["services"]["telegram"] = {
|
||||
"status": "unavailable",
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
# Check VRAM manager
|
||||
try:
|
||||
from services.vram_manager import vram_manager
|
||||
|
||||
vram_info = vram_manager.get_vram_info()
|
||||
health_status["services"]["vram"] = {
|
||||
"status": "healthy",
|
||||
"available_gb": vram_info.get("free", 0) / (1024**3),
|
||||
}
|
||||
except Exception as e:
|
||||
health_status["services"]["vram"] = {"status": "unavailable", "error": str(e)}
|
||||
|
||||
return health_status
|
||||
|
||||
|
||||
def initialize_services() -> None:
|
||||
"""Initialize all services with configuration validation"""
|
||||
from config import settings
|
||||
from services.webdav_service import webdav_service
|
||||
from services.vram_manager import vram_manager
|
||||
from services.telegram_service import telegram_service
|
||||
from storage.processed_registry import processed_registry
|
||||
|
||||
logger.info("Initializing services...")
|
||||
|
||||
# Validate configuration
|
||||
validate_configuration()
|
||||
|
||||
# Warn if WebDAV not configured
|
||||
if not settings.has_webdav_config:
|
||||
logger.warning("WebDAV not configured - file sync functionality disabled")
|
||||
|
||||
# Warn if AI providers not configured
|
||||
if not settings.has_ai_config:
|
||||
logger.warning("AI providers not configured - summary generation will not work")
|
||||
|
||||
# Configure Telegram if credentials available
|
||||
if settings.TELEGRAM_TOKEN and settings.TELEGRAM_CHAT_ID:
|
||||
try:
|
||||
telegram_service.configure(
|
||||
settings.TELEGRAM_TOKEN, settings.TELEGRAM_CHAT_ID
|
||||
)
|
||||
telegram_service.send_start_notification()
|
||||
logger.info("Telegram notifications enabled")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to configure Telegram: {e}")
|
||||
|
||||
# Configure Notion if credentials available
|
||||
if settings.has_notion_config:
|
||||
try:
|
||||
from services.notion_service import notion_service
|
||||
|
||||
notion_service.configure(
|
||||
settings.NOTION_API_TOKEN, settings.NOTION_DATABASE_ID
|
||||
)
|
||||
logger.info("✅ Notion integration enabled")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to configure Notion: {e}")
|
||||
else:
|
||||
logger.info("Notion not configured - upload to Notion disabled")
|
||||
|
||||
# Initialize WebDAV if configured
|
||||
if settings.has_webdav_config:
|
||||
try:
|
||||
webdav_service.initialize()
|
||||
logger.info("WebDAV service initialized")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize WebDAV: {e}")
|
||||
logger.exception("WebDAV initialization error details")
|
||||
else:
|
||||
logger.info("Skipping WebDAV initialization (not configured)")
|
||||
|
||||
# Initialize VRAM manager
|
||||
try:
|
||||
vram_manager.initialize()
|
||||
logger.info("VRAM manager initialized")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize VRAM manager: {e}")
|
||||
logger.exception("VRAM manager initialization error details")
|
||||
|
||||
# Initialize processed registry
|
||||
try:
|
||||
processed_registry.initialize()
|
||||
logger.info("Processed registry initialized")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize processed registry: {e}")
|
||||
logger.exception("Registry initialization error details")
|
||||
|
||||
# Run health check
|
||||
health = check_service_health()
|
||||
logger.info(f"Initial health check: {json.dumps(health, indent=2)}")
|
||||
|
||||
logger.info("All services initialized successfully")
|
||||
|
||||
|
||||
def send_error_notification(error_type: str, error_message: str) -> None:
|
||||
"""Send error notification via Telegram"""
|
||||
try:
|
||||
from services.telegram_service import telegram_service
|
||||
|
||||
if telegram_service.is_configured:
|
||||
telegram_service.send_error_notification(error_type, error_message)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to send error notification: {e}")
|
||||
|
||||
|
||||
def run_dashboard_thread() -> None:
|
||||
"""Run Flask dashboard in a separate thread"""
|
||||
try:
|
||||
from api.routes import create_app
|
||||
|
||||
app = create_app()
|
||||
|
||||
# Run Flask in production mode with threaded=True
|
||||
app.run(
|
||||
host="0.0.0.0",
|
||||
port=5000,
|
||||
debug=False,
|
||||
threaded=True,
|
||||
use_reloader=False, # Important: disable reloader in thread
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Dashboard thread error: {e}")
|
||||
logger.exception("Dashboard thread exception details")
|
||||
|
||||
|
||||
def start_dashboard() -> threading.Thread:
|
||||
"""Start dashboard in a background daemon thread"""
|
||||
dashboard_port = int(os.getenv("DASHBOARD_PORT", "5000"))
|
||||
logger.info(f"Starting dashboard on port {dashboard_port}...")
|
||||
|
||||
# Create daemon thread so it doesn't block shutdown
|
||||
dashboard_thread = threading.Thread(
|
||||
target=run_dashboard_thread, name="DashboardThread", daemon=True
|
||||
)
|
||||
dashboard_thread.start()
|
||||
logger.info(f"Dashboard thread started (Thread-ID: {dashboard_thread.ident})")
|
||||
return dashboard_thread
|
||||
|
||||
|
||||
def run_main_loop() -> None:
|
||||
"""Main processing loop with improved error handling"""
|
||||
from config import settings
|
||||
from services.webdav_service import webdav_service
|
||||
from storage.processed_registry import processed_registry
|
||||
from processors.audio_processor import AudioProcessor
|
||||
from processors.pdf_processor import PDFProcessor
|
||||
from processors.text_processor import TextProcessor
|
||||
|
||||
audio_processor = AudioProcessor()
|
||||
pdf_processor = PDFProcessor()
|
||||
text_processor = TextProcessor()
|
||||
|
||||
consecutive_errors = 0
|
||||
max_consecutive_errors = 5
|
||||
|
||||
while True:
|
||||
try:
|
||||
logger.info("--- Polling for new files ---")
|
||||
processed_registry.load()
|
||||
|
||||
# Process PDFs
|
||||
if settings.has_webdav_config:
|
||||
try:
|
||||
webdav_service.mkdir(settings.REMOTE_PDF_FOLDER)
|
||||
pdf_files = webdav_service.list(settings.REMOTE_PDF_FOLDER)
|
||||
for file_path in pdf_files:
|
||||
if file_path.lower().endswith(".pdf"):
|
||||
if not processed_registry.is_processed(file_path):
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote
|
||||
from services.telegram_service import telegram_service
|
||||
|
||||
local_filename = unquote(Path(file_path).name)
|
||||
base_name = Path(local_filename).stem
|
||||
local_path = (
|
||||
settings.LOCAL_DOWNLOADS_PATH / local_filename
|
||||
)
|
||||
settings.LOCAL_DOWNLOADS_PATH.mkdir(
|
||||
parents=True, exist_ok=True
|
||||
)
|
||||
|
||||
# Step 1: Notify and download
|
||||
telegram_service.send_message(
|
||||
f"📄 Nuevo PDF detectado: {local_filename}\n"
|
||||
f"⬇️ Descargando..."
|
||||
)
|
||||
logger.info(
|
||||
f"Downloading PDF: {file_path} -> {local_path}"
|
||||
)
|
||||
webdav_service.download(file_path, local_path)
|
||||
|
||||
# Step 2: Process PDF
|
||||
telegram_service.send_message(
|
||||
f"🔍 Procesando PDF con OCR..."
|
||||
)
|
||||
pdf_processor.process(str(local_path))
|
||||
|
||||
processed_registry.save(file_path)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error processing PDFs: {e}")
|
||||
send_error_notification("pdf_processing", str(e))
|
||||
|
||||
# Process Audio files
|
||||
if settings.has_webdav_config:
|
||||
try:
|
||||
audio_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER)
|
||||
for file_path in audio_files:
|
||||
if any(
|
||||
file_path.lower().endswith(ext)
|
||||
for ext in settings.AUDIO_EXTENSIONS
|
||||
):
|
||||
if not processed_registry.is_processed(file_path):
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote
|
||||
from document.generators import DocumentGenerator
|
||||
from services.telegram_service import telegram_service
|
||||
|
||||
local_filename = unquote(Path(file_path).name)
|
||||
base_name = Path(local_filename).stem
|
||||
local_path = (
|
||||
settings.LOCAL_DOWNLOADS_PATH / local_filename
|
||||
)
|
||||
settings.LOCAL_DOWNLOADS_PATH.mkdir(
|
||||
parents=True, exist_ok=True
|
||||
)
|
||||
|
||||
# Step 1: Notify and download
|
||||
telegram_service.send_message(
|
||||
f"🎵 Nuevo audio detectado: {local_filename}\n"
|
||||
f"⬇️ Descargando..."
|
||||
)
|
||||
logger.info(
|
||||
f"Downloading audio: {file_path} -> {local_path}"
|
||||
)
|
||||
webdav_service.download(file_path, local_path)
|
||||
|
||||
# Step 2: Transcribe
|
||||
telegram_service.send_message(
|
||||
f"📝 Transcribiendo audio con Whisper..."
|
||||
)
|
||||
result = audio_processor.process(str(local_path))
|
||||
|
||||
if result.get("success") and result.get(
|
||||
"transcription_path"
|
||||
):
|
||||
transcription_file = Path(
|
||||
result["transcription_path"]
|
||||
)
|
||||
transcription_text = result.get("text", "")
|
||||
|
||||
# Step 3: Generate AI summary and documents
|
||||
telegram_service.send_message(
|
||||
f"🤖 Generando resumen académico LaTeX..."
|
||||
)
|
||||
doc_generator = DocumentGenerator(
|
||||
notification_callback=lambda msg: telegram_service.send_message(msg)
|
||||
)
|
||||
success, summary, output_files = (
|
||||
doc_generator.generate_summary(
|
||||
transcription_text, base_name
|
||||
)
|
||||
)
|
||||
|
||||
# Step 4: Upload all files to Nextcloud
|
||||
if success and output_files:
|
||||
# Create folders
|
||||
for folder in [
|
||||
settings.RESUMENES_FOLDER,
|
||||
settings.DOCX_FOLDER,
|
||||
]:
|
||||
try:
|
||||
webdav_service.makedirs(folder)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Upload all files in parallel using batch upload
|
||||
upload_tasks = []
|
||||
|
||||
# Upload transcription TXT
|
||||
if transcription_file.exists():
|
||||
remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
|
||||
upload_tasks.append((transcription_file, remote_txt))
|
||||
|
||||
# Upload DOCX
|
||||
docx_path = Path(
|
||||
output_files.get("docx_path", "")
|
||||
)
|
||||
if docx_path.exists():
|
||||
remote_docx = f"{settings.DOCX_FOLDER}/{docx_path.name}"
|
||||
upload_tasks.append((docx_path, remote_docx))
|
||||
|
||||
# Upload PDF
|
||||
pdf_path = Path(
|
||||
output_files.get("pdf_path", "")
|
||||
)
|
||||
if pdf_path.exists():
|
||||
remote_pdf = f"{settings.DOCX_FOLDER}/{pdf_path.name}"
|
||||
upload_tasks.append((pdf_path, remote_pdf))
|
||||
|
||||
# Upload Markdown
|
||||
md_path = Path(
|
||||
output_files.get("markdown_path", "")
|
||||
)
|
||||
if md_path.exists():
|
||||
remote_md = f"{settings.RESUMENES_FOLDER}/{md_path.name}"
|
||||
upload_tasks.append((md_path, remote_md))
|
||||
|
||||
# Execute parallel uploads
|
||||
if upload_tasks:
|
||||
upload_results = webdav_service.upload_batch(
|
||||
upload_tasks, max_workers=4, timeout=120
|
||||
)
|
||||
logger.info(f"Parallel upload complete: {len(upload_results)} files")
|
||||
|
||||
# Final notification
|
||||
telegram_service.send_message(
|
||||
f"✅ Audio procesado: {local_filename}\n"
|
||||
f"📄 DOCX: {docx_path.name if docx_path.exists() else 'N/A'}\n"
|
||||
f"📑 PDF: {pdf_path.name if pdf_path.exists() else 'N/A'}\n"
|
||||
f"☁️ Subido a Nextcloud"
|
||||
)
|
||||
else:
|
||||
# Just upload transcription if summary failed
|
||||
if transcription_file.exists():
|
||||
try:
|
||||
webdav_service.makedirs(
|
||||
settings.RESUMENES_FOLDER
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
|
||||
webdav_service.upload(
|
||||
transcription_file, remote_txt
|
||||
)
|
||||
telegram_service.send_message(
|
||||
f"⚠️ Resumen fallido, solo transcripción subida:\n{transcription_file.name}"
|
||||
)
|
||||
|
||||
processed_registry.save(file_path)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error processing audio: {e}")
|
||||
send_error_notification("audio_processing", str(e))
|
||||
|
||||
# Process Text files
|
||||
if settings.has_webdav_config:
|
||||
try:
|
||||
text_files = webdav_service.list(settings.REMOTE_TXT_FOLDER)
|
||||
for file_path in text_files:
|
||||
if any(
|
||||
file_path.lower().endswith(ext)
|
||||
for ext in settings.TXT_EXTENSIONS
|
||||
):
|
||||
if not processed_registry.is_processed(file_path):
|
||||
text_processor.process(file_path)
|
||||
processed_registry.save(file_path)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error processing text: {e}")
|
||||
send_error_notification("text_processing", str(e))
|
||||
|
||||
# Reset error counter on success
|
||||
consecutive_errors = 0
|
||||
|
||||
except Exception as e:
|
||||
# Improved error logging with full traceback
|
||||
logger.exception(f"Critical error in main loop: {e}")
|
||||
|
||||
# Send notification for critical errors
|
||||
send_error_notification("main_loop", str(e))
|
||||
|
||||
# Track consecutive errors
|
||||
consecutive_errors += 1
|
||||
|
||||
if consecutive_errors >= max_consecutive_errors:
|
||||
logger.critical(
|
||||
f"Too many consecutive errors ({consecutive_errors}). "
|
||||
"Service may be unstable. Consider checking configuration."
|
||||
)
|
||||
send_error_notification(
|
||||
"consecutive_errors",
|
||||
f"Service has failed {consecutive_errors} consecutive times",
|
||||
)
|
||||
|
||||
# Don't exit, let the loop continue with backoff
|
||||
logger.info(f"Waiting {settings.POLL_INTERVAL * 2} seconds before retry...")
|
||||
time.sleep(settings.POLL_INTERVAL * 2)
|
||||
continue
|
||||
|
||||
logger.info(f"Cycle completed. Waiting {settings.POLL_INTERVAL} seconds...")
|
||||
time.sleep(settings.POLL_INTERVAL)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
lock_fd = None
|
||||
dashboard_thread = None
|
||||
try:
|
||||
logger.info("=== CBCFacil Service Started ===")
|
||||
logger.info(f"Version: {os.getenv('APP_VERSION', '8.0')}")
|
||||
logger.info(
|
||||
f"Environment: {'production' if os.getenv('DEBUG', 'false').lower() != 'true' else 'development'}"
|
||||
)
|
||||
|
||||
lock_fd = acquire_lock()
|
||||
initialize_services()
|
||||
|
||||
# Start dashboard in background thread
|
||||
dashboard_thread = start_dashboard()
|
||||
|
||||
# Run main processing loop
|
||||
run_main_loop()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Shutdown requested by user")
|
||||
except Exception as e:
|
||||
logger.exception(f"Fatal error in main: {e}")
|
||||
send_error_notification("fatal_error", str(e))
|
||||
sys.exit(1)
|
||||
finally:
|
||||
if lock_fd:
|
||||
release_lock(lock_fd)
|
||||
logger.info("=== CBCFacil Service Stopped ===")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Handle CLI commands
|
||||
if len(sys.argv) > 1:
|
||||
command = sys.argv[1]
|
||||
if command == "whisper" and len(sys.argv) == 4:
|
||||
from processors.audio_processor import AudioProcessor
|
||||
|
||||
AudioProcessor().process(sys.argv[2])
|
||||
elif command == "pdf" and len(sys.argv) == 4:
|
||||
from processors.pdf_processor import PDFProcessor
|
||||
|
||||
PDFProcessor().process(sys.argv[2])
|
||||
elif command == "health":
|
||||
from main import check_service_health
|
||||
|
||||
health = check_service_health()
|
||||
print(json.dumps(health, indent=2))
|
||||
else:
|
||||
print("Usage: python main.py [whisper|pdf|health]")
|
||||
sys.exit(1)
|
||||
else:
|
||||
main()
|
||||
Reference in New Issue
Block a user