diff --git a/api/routes.py b/api/routes.py index ff7cf1d..b01fc2f 100644 --- a/api/routes.py +++ b/api/routes.py @@ -2,6 +2,7 @@ Flask API routes for CBCFacil dashboard """ import os +import time from datetime import datetime from pathlib import Path from typing import Dict, Any, List @@ -9,14 +10,20 @@ from flask import Flask, render_template, request, jsonify, send_from_directory from flask_cors import CORS from config import settings -from storage import processed_registry +from storage.processed_registry import processed_registry from services.webdav_service import webdav_service from services import vram_manager +from document.generators import DocumentGenerator def create_app() -> Flask: """Create and configure Flask application""" - app = Flask(__name__) + # Get the project root directory (parent of api/) + current_dir = Path(__file__).parent + project_root = current_dir.parent + template_dir = project_root / 'templates' + + app = Flask(__name__, template_folder=str(template_dir)) CORS(app) # Configure app @@ -157,6 +164,60 @@ def create_app() -> Flask: app.logger.error(f"Error downloading file: {e}") return jsonify({'error': 'File not found'}), 404 + @app.route('/downloads/find-file') + def find_and_download_file(): + """Find and download file with various name variants""" + try: + filename = request.args.get('filename', '') + ext = request.args.get('ext', 'txt') + + if not filename: + return jsonify({'error': 'Filename required'}), 400 + + # Validate to prevent path traversal + if '..' in filename or filename.startswith('/'): + return jsonify({'error': 'Invalid filename'}), 400 + + # Try various name variants + base_name = filename.replace('_unificado', '').replace('_unified', '') + name_variants = [ + f"{base_name}.{ext}", + f"{base_name}_unificado.{ext}", + f"{base_name}_unified.{ext}", + f"{base_name.replace(' ', '_')}.{ext}", + f"{base_name.replace(' ', '_')}_unificado.{ext}", + ] + + # Directories to search + directories = [ + settings.LOCAL_DOWNLOADS_PATH, + settings.LOCAL_DOCX + ] + + # Search for file + for directory in directories: + if not directory.exists(): + continue + for variant in name_variants: + file_path = directory / variant + if file_path.exists(): + # Determinar mimetype para que se abra en el navegador + mimetype = None + if ext == 'pdf': + mimetype = 'application/pdf' + elif ext == 'md': + mimetype = 'text/markdown' + elif ext == 'txt': + mimetype = 'text/plain' + # as_attachment=False para abrir en navegador, no descargar + return send_from_directory(str(directory), variant, as_attachment=False, mimetype=mimetype) + + return jsonify({'error': f'File not found: {filename}.{ext}'}), 404 + + except Exception as e: + app.logger.error(f"Error finding file: {e}") + return jsonify({'error': 'File not found'}), 404 + @app.route('/health') def health_check(): """Health check endpoint""" @@ -174,11 +235,316 @@ def create_app() -> Flask: } }) + @app.route('/api/transcription/') + def get_transcription(filename: str): + """Get transcription content for a specific file""" + try: + # Validate filename to prevent path traversal + if '..' in filename or filename.startswith('/'): + return jsonify({ + 'success': False, + 'message': 'Invalid filename' + }), 400 + + # Extract base name without extension (handle .mp3, .wav, .txt, etc.) + base_name = Path(filename).stem + + # Construct file path for transcription + file_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt" + + # Check if file exists + if not file_path.exists(): + return jsonify({ + 'success': False, + 'message': f'Transcription file not found: {base_name}.txt' + }), 404 + + # Read file content + with open(file_path, 'r', encoding='utf-8') as f: + transcription_text = f.read() + + # Calculate statistics + word_count = len(transcription_text.split()) + char_count = len(transcription_text) + + return jsonify({ + 'success': True, + 'filename': filename, + 'transcription': transcription_text, + 'file_path': str(file_path), + 'word_count': word_count, + 'char_count': char_count + }) + + except Exception as e: + app.logger.error(f"Error reading transcription: {e}") + return jsonify({ + 'success': False, + 'message': f"Error reading transcription: {str(e)}" + }), 500 + + @app.route('/api/summary/') + def get_summary(filename: str): + """Get summary content for a specific file""" + try: + # Validate filename to prevent path traversal + if '..' in filename or filename.startswith('/'): + return jsonify({ + 'success': False, + 'message': 'Invalid filename' + }), 400 + + # Extract base name without extension (handle .mp3, .wav, etc.) + base_name = Path(filename).stem + # Also remove _unificado/_unified suffixes if present + base_name = base_name.replace('_unificado', '').replace('_unified', '') + + # Try different file path variants + possible_paths = [ + settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unificado.md", + settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unified.md", + settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.md", + ] + + file_path = None + for path in possible_paths: + if path.exists(): + file_path = path + break + + if not file_path: + return jsonify({ + 'success': False, + 'message': f'Summary file not found for: {filename}' + }), 404 + + # Read file content + with open(file_path, 'r', encoding='utf-8') as f: + summary_text = f.read() + + # Get available formats + formats_available = get_available_formats(base_name) + + return jsonify({ + 'success': True, + 'filename': base_name, + 'summary': summary_text, + 'file_path': str(file_path), + 'formats_available': formats_available + }) + + except Exception as e: + app.logger.error(f"Error reading summary: {e}") + return jsonify({ + 'success': False, + 'message': f"Error reading summary: {str(e)}" + }), 500 + + @app.route('/api/versions/') + def get_versions(filename: str): + """Get all summary versions for a file""" + try: + # Validate filename + if '..' in filename or filename.startswith('/'): + return jsonify({'success': False, 'message': 'Invalid filename'}), 400 + + # Extract base name + base_name = Path(filename).stem + base_name = base_name.replace('_unificado', '').replace('_unified', '') + + versions = [] + downloads_path = settings.LOCAL_DOWNLOADS_PATH + docx_path = settings.LOCAL_DOCX + + # Check for transcription (original) + txt_path = downloads_path / f"{base_name}.txt" + if txt_path.exists(): + stat = txt_path.stat() + versions.append({ + 'type': 'transcription', + 'label': '📝 Transcripción Original', + 'filename': txt_path.name, + 'path': f"/downloads/find-file?filename={base_name}&ext=txt", + 'date': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M'), + 'size': f"{stat.st_size / 1024:.1f} KB" + }) + + # Check for summary versions (md, docx, pdf) + summary_patterns = [ + (f"{base_name}_unificado.md", "📋 Resumen MD"), + (f"{base_name}_unificado.docx", "📄 Documento DOCX"), + (f"{base_name}_unificado.pdf", "📑 PDF"), + ] + + for pattern, label in summary_patterns: + # Check downloads path + file_path = downloads_path / pattern + if not file_path.exists(): + file_path = docx_path / pattern + + if file_path.exists(): + stat = file_path.stat() + ext = file_path.suffix[1:] # Remove the dot + versions.append({ + 'type': 'summary', + 'label': label, + 'filename': pattern, + 'path': f"/downloads/find-file?filename={base_name}&ext={ext}", + 'date': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M'), + 'size': f"{stat.st_size / 1024:.1f} KB" + }) + + # Sort by date descending + versions.sort(key=lambda x: x['date'], reverse=True) + + return jsonify({ + 'success': True, + 'base_name': base_name, + 'versions': versions, + 'count': len(versions) + }) + + except Exception as e: + app.logger.error(f"Error getting versions: {e}") + return jsonify({'success': False, 'message': str(e)}), 500 + + @app.route('/api/regenerate-summary', methods=['POST']) + def regenerate_summary(): + """Regenerate summary from existing transcription""" + start_time = time.time() + + try: + data = request.get_json() + filename = data.get('filename') + custom_prompt = data.get('custom_prompt') + + if not filename: + return jsonify({ + 'success': False, + 'message': 'Filename is required' + }), 400 + + # Validate filename to prevent path traversal + if '..' in filename or filename.startswith('/'): + return jsonify({ + 'success': False, + 'message': 'Invalid filename' + }), 400 + + # Get base name (remove extension if present) + base_name = Path(filename).stem + + # Read transcription from .txt file + transcription_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt" + + if not transcription_path.exists(): + # Try without .txt extension if already included + transcription_path = settings.LOCAL_DOWNLOADS_PATH / filename + if not transcription_path.exists(): + return jsonify({ + 'success': False, + 'message': f'Transcription file not found for: {filename}' + }), 404 + + with open(transcription_path, 'r', encoding='utf-8') as f: + transcription_text = f.read() + + # Generate new summary using DocumentGenerator + doc_generator = DocumentGenerator() + success, new_summary, metadata = doc_generator.generate_summary( + transcription_text, + base_name + ) + + if not success: + return jsonify({ + 'success': False, + 'message': 'Failed to generate summary' + }), 500 + + # Upload to WebDAV if configured + files_updated = [] + if settings.has_webdav_config: + try: + # Upload markdown + if 'markdown_path' in metadata: + md_path = Path(metadata['markdown_path']) + if md_path.exists(): + remote_md_path = f"{settings.REMOTE_TXT_FOLDER}/{md_path.name}" + webdav_service.upload(str(md_path), remote_md_path) + files_updated.append(remote_md_path) + + # Upload DOCX + if 'docx_path' in metadata: + docx_path = Path(metadata['docx_path']) + if docx_path.exists(): + remote_docx_path = f"{settings.DOCX_FOLDER}/{docx_path.name}" + webdav_service.upload(str(docx_path), remote_docx_path) + files_updated.append(remote_docx_path) + + # Upload PDF if available + if 'pdf_path' in metadata: + pdf_path = Path(metadata['pdf_path']) + if pdf_path.exists(): + remote_pdf_path = f"{settings.REMOTE_PDF_FOLDER}/{pdf_path.name}" + webdav_service.upload(str(pdf_path), remote_pdf_path) + files_updated.append(remote_pdf_path) + + except Exception as e: + app.logger.warning(f"WebDAV upload failed: {e}") + # Continue even if upload fails + + processing_time = time.time() - start_time + + return jsonify({ + 'success': True, + 'message': 'Summary regenerated successfully', + 'new_summary': new_summary, + 'files_updated': files_updated, + 'processing_time': f"{processing_time:.2f}s", + 'metadata': metadata + }) + + except Exception as e: + app.logger.error(f"Error regenerating summary: {e}") + return jsonify({ + 'success': False, + 'message': f"Error regenerating summary: {str(e)}" + }), 500 + + @app.route('/api/files-detailed') + def get_files_detailed(): + """Get detailed list of files with transcription and summary info""" + try: + files = get_audio_files_detailed() + + # Calculate statistics + total = len(files) + with_transcription = sum(1 for f in files if f['has_transcription']) + with_summary = sum(1 for f in files if f['has_summary']) + + return jsonify({ + 'success': True, + 'files': files, + 'total': total, + 'with_transcription': with_transcription, + 'with_summary': with_summary + }) + + except Exception as e: + app.logger.error(f"Error getting detailed files: {e}") + return jsonify({ + 'success': False, + 'message': f"Error: {str(e)}" + }), 500 + return app def get_audio_files() -> List[Dict[str, Any]]: """Get list of audio files from WebDAV and local""" + import logging + logger = logging.getLogger(__name__) files = [] # Get files from WebDAV @@ -202,7 +568,7 @@ def get_audio_files() -> List[Dict[str, Any]]: 'available_formats': get_available_formats(base_name) }) except Exception as e: - app.logger.error(f"Error getting WebDAV files: {e}") + logger.warning(f"Error getting WebDAV files: {e}") # Get local files try: @@ -222,13 +588,113 @@ def get_audio_files() -> List[Dict[str, Any]]: 'available_formats': get_available_formats(file_path.name) }) except Exception as e: - app.logger.error(f"Error getting local files: {e}") + logger.error(f"Error getting local files: {e}") - # Remove duplicates (WebDAV takes precedence) + # Remove duplicates (keep both local and webdav - distinguish by source) unique_files = {} for file in files: - key = file['filename'] - if key not in unique_files or file['source'] == 'webdav': + # Use (filename, source) as key to keep both local and webdav files + key = (file['filename'], file['source']) + unique_files[key] = file + + return sorted(unique_files.values(), key=lambda x: (x['source'], x['filename'])) + + +def get_audio_files_detailed() -> List[Dict[str, Any]]: + """Get detailed list of audio files with transcription and summary information""" + files = [] + + # Get local audio files only for detailed view + try: + if settings.LOCAL_DOWNLOADS_PATH.exists(): + for ext in settings.AUDIO_EXTENSIONS: + for file_path in settings.LOCAL_DOWNLOADS_PATH.glob(f"*{ext}"): + stat = file_path.stat() + filename = file_path.name + base_name = file_path.stem + + # Check for transcription + transcription_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt" + has_transcription = transcription_path.exists() + transcription_words = 0 + + if has_transcription: + try: + with open(transcription_path, 'r', encoding='utf-8') as f: + transcription_text = f.read() + transcription_words = len(transcription_text.split()) + except Exception: + pass + + # Check for summary and formats + formats = get_available_formats(filename) + has_summary = formats.get('md', False) + + # Get summary path + summary_path = None + if has_summary: + summary_variants = [ + settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unificado.md", + settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unified.md", + settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.md", + ] + for variant in summary_variants: + if variant.exists(): + summary_path = str(variant) + break + + files.append({ + 'filename': filename, + 'base_name': base_name, + 'audio_path': str(file_path), + 'has_transcription': has_transcription, + 'transcription_path': str(transcription_path) if has_transcription else None, + 'transcription_words': transcription_words, + 'has_summary': has_summary, + 'summary_path': summary_path, + 'formats': formats, + 'processed': processed_registry.is_processed(filename), + 'last_modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'), + 'size': format_size(stat.st_size) + }) + except Exception as e: + pass # Error logged in endpoint + + # Get WebDAV files + if settings.has_webdav_config: + try: + webdav_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER) + for file_path in webdav_files: + normalized_path = webdav_service.normalize_path(file_path) + base_name = Path(normalized_path).stem + + if any(normalized_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS): + # Check if already in local files + if not any(f['base_name'] == base_name for f in files): + formats = get_available_formats(base_name) + + files.append({ + 'filename': Path(normalized_path).name, + 'base_name': base_name, + 'audio_path': normalized_path, + 'has_transcription': formats.get('txt', False), + 'transcription_path': None, + 'transcription_words': 0, + 'has_summary': formats.get('md', False), + 'summary_path': None, + 'formats': formats, + 'processed': processed_registry.is_processed(normalized_path), + 'last_modified': 'Unknown', + 'size': 'Unknown' + }) + except Exception as e: + pass # Error logged in endpoint + + # Remove duplicates and sort + unique_files = {} + for file in files: + key = file['base_name'] + if key not in unique_files: unique_files[key] = file return sorted(unique_files.values(), key=lambda x: x['filename']) diff --git a/templates/index.html b/templates/index.html index 241eae0..9cc9430 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,10 +1,12 @@ + Dashboard de Gestión de Audio - + +
@@ -1074,7 +1613,8 @@
@@ -1084,7 +1624,8 @@ 🔤 Nombre
- +
@@ -1093,12 +1634,86 @@
+ +
+
+
+
+
Regenerando resumen...
+
+ +
+

Vista Previa

+ +
+ +
+ + + +
+ +
+
+
+
+
Palabras
+
-
+
+
+
Caracteres
+
-
+
+
+
+ Cargando transcripción... +
+
+ +
+
+
+
Palabras
+
-
+
+
+
Caracteres
+
-
+
+
+
+ Cargando resumen... +
+
+ +
+
+

📁 Archivos Generados

+

Haz clic para abrir en nueva pestaña

+
+
+ Cargando versiones... +
+
+
+ +
+ + +
+
+ + \ No newline at end of file