Restore full pipeline: 3-step summarization, formatting, PDF/DOCX generation

2026-01-09 17:01:22 -03:00
parent b017504c52
commit e6a01d08d4
20 changed files with 260 additions and 43 deletions
--- a/main.py
+++ b/main.py
@@ -276,7 +276,95 @@ def run_main_loop() -> None:
                    for file_path in audio_files:
                        if any(file_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
                            if not processed_registry.is_processed(file_path):
-                                audio_processor.process(file_path)
+                                from pathlib import Path
+                                from urllib.parse import unquote
+                                from document.generators import DocumentGenerator
+                                from services.telegram_service import telegram_service
+                                
+                                local_filename = unquote(Path(file_path).name)
+                                base_name = Path(local_filename).stem
+                                local_path = settings.LOCAL_DOWNLOADS_PATH / local_filename
+                                settings.LOCAL_DOWNLOADS_PATH.mkdir(parents=True, exist_ok=True)
+                                
+                                # Step 1: Notify and download
+                                telegram_service.send_message(
+                                    f"🎵 Nuevo audio detectado: {local_filename}\n"
+                                    f"⬇️ Descargando..."
+                                )
+                                logger.info(f"Downloading audio: {file_path} -> {local_path}")
+                                webdav_service.download(file_path, local_path)
+                                
+                                # Step 2: Transcribe
+                                telegram_service.send_message(f"📝 Transcribiendo audio con Whisper...")
+                                result = audio_processor.process(str(local_path))
+                                
+                                if result.get("success") and result.get("transcription_path"):
+                                    transcription_file = Path(result["transcription_path"])
+                                    transcription_text = result.get("text", "")
+                                    
+                                    # Step 3: Generate AI summary and documents
+                                    telegram_service.send_message(f"🤖 Generando resumen con IA...")
+                                    doc_generator = DocumentGenerator()
+                                    success, summary, output_files = doc_generator.generate_summary(
+                                        transcription_text, base_name
+                                    )
+                                    
+                                    # Step 4: Upload all files to Nextcloud
+                                    if success and output_files:
+                                        # Create folders
+                                        for folder in [settings.RESUMENES_FOLDER, settings.DOCX_FOLDER]:
+                                            try:
+                                                webdav_service.makedirs(folder)
+                                            except Exception:
+                                                pass
+                                        
+                                        # Upload transcription TXT
+                                        if transcription_file.exists():
+                                            remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
+                                            webdav_service.upload(transcription_file, remote_txt)
+                                            logger.info(f"Uploaded: {remote_txt}")
+                                        
+                                        # Upload DOCX
+                                        docx_path = Path(output_files.get('docx_path', ''))
+                                        if docx_path.exists():
+                                            remote_docx = f"{settings.DOCX_FOLDER}/{docx_path.name}"
+                                            webdav_service.upload(docx_path, remote_docx)
+                                            logger.info(f"Uploaded: {remote_docx}")
+                                        
+                                        # Upload PDF
+                                        pdf_path = Path(output_files.get('pdf_path', ''))
+                                        if pdf_path.exists():
+                                            remote_pdf = f"{settings.DOCX_FOLDER}/{pdf_path.name}"
+                                            webdav_service.upload(pdf_path, remote_pdf)
+                                            logger.info(f"Uploaded: {remote_pdf}")
+                                        
+                                        # Upload Markdown
+                                        md_path = Path(output_files.get('markdown_path', ''))
+                                        if md_path.exists():
+                                            remote_md = f"{settings.RESUMENES_FOLDER}/{md_path.name}"
+                                            webdav_service.upload(md_path, remote_md)
+                                            logger.info(f"Uploaded: {remote_md}")
+                                        
+                                        # Final notification
+                                        telegram_service.send_message(
+                                            f"✅ Audio procesado: {local_filename}\n"
+                                            f"📄 DOCX: {docx_path.name if docx_path.exists() else 'N/A'}\n"
+                                            f"📑 PDF: {pdf_path.name if pdf_path.exists() else 'N/A'}\n"
+                                            f"☁️ Subido a Nextcloud"
+                                        )
+                                    else:
+                                        # Just upload transcription if summary failed
+                                        if transcription_file.exists():
+                                            try:
+                                                webdav_service.makedirs(settings.RESUMENES_FOLDER)
+                                            except Exception:
+                                                pass
+                                            remote_txt = f"{settings.RESUMENES_FOLDER}/{transcription_file.name}"
+                                            webdav_service.upload(transcription_file, remote_txt)
+                                            telegram_service.send_message(
+                                                f"⚠️ Resumen fallido, solo transcripción subida:\n{transcription_file.name}"
+                                            )
+                                
                                processed_registry.save(file_path)
                except Exception as e:
                    logger.exception(f"Error processing audio: {e}")