feat(dashboard): agregar panel de versiones y corregir carga de transcripciones

- Corregir endpoints /api/transcription y /api/summary para manejar filenames con extensión
- Agregar endpoint /api/versions para listar archivos generados
- Agregar tab 'Versiones' en panel lateral con lista de archivos
- Mejorar modal de progreso con barra animada y estados
- Cambiar archivos para que se abran en pestaña en lugar de descargarse
- Agregar botón 'Regenerar' en lista de archivos procesados
This commit is contained in:
renato97
2026-01-10 19:18:14 +00:00
parent 312e303563
commit 75ef0afcb1
2 changed files with 1608 additions and 46 deletions

View File

@@ -2,6 +2,7 @@
Flask API routes for CBCFacil dashboard Flask API routes for CBCFacil dashboard
""" """
import os import os
import time
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Dict, Any, List from typing import Dict, Any, List
@@ -9,14 +10,20 @@ from flask import Flask, render_template, request, jsonify, send_from_directory
from flask_cors import CORS from flask_cors import CORS
from config import settings from config import settings
from storage import processed_registry from storage.processed_registry import processed_registry
from services.webdav_service import webdav_service from services.webdav_service import webdav_service
from services import vram_manager from services import vram_manager
from document.generators import DocumentGenerator
def create_app() -> Flask: def create_app() -> Flask:
"""Create and configure Flask application""" """Create and configure Flask application"""
app = Flask(__name__) # Get the project root directory (parent of api/)
current_dir = Path(__file__).parent
project_root = current_dir.parent
template_dir = project_root / 'templates'
app = Flask(__name__, template_folder=str(template_dir))
CORS(app) CORS(app)
# Configure app # Configure app
@@ -157,6 +164,60 @@ def create_app() -> Flask:
app.logger.error(f"Error downloading file: {e}") app.logger.error(f"Error downloading file: {e}")
return jsonify({'error': 'File not found'}), 404 return jsonify({'error': 'File not found'}), 404
@app.route('/downloads/find-file')
def find_and_download_file():
"""Find and download file with various name variants"""
try:
filename = request.args.get('filename', '')
ext = request.args.get('ext', 'txt')
if not filename:
return jsonify({'error': 'Filename required'}), 400
# Validate to prevent path traversal
if '..' in filename or filename.startswith('/'):
return jsonify({'error': 'Invalid filename'}), 400
# Try various name variants
base_name = filename.replace('_unificado', '').replace('_unified', '')
name_variants = [
f"{base_name}.{ext}",
f"{base_name}_unificado.{ext}",
f"{base_name}_unified.{ext}",
f"{base_name.replace(' ', '_')}.{ext}",
f"{base_name.replace(' ', '_')}_unificado.{ext}",
]
# Directories to search
directories = [
settings.LOCAL_DOWNLOADS_PATH,
settings.LOCAL_DOCX
]
# Search for file
for directory in directories:
if not directory.exists():
continue
for variant in name_variants:
file_path = directory / variant
if file_path.exists():
# Determinar mimetype para que se abra en el navegador
mimetype = None
if ext == 'pdf':
mimetype = 'application/pdf'
elif ext == 'md':
mimetype = 'text/markdown'
elif ext == 'txt':
mimetype = 'text/plain'
# as_attachment=False para abrir en navegador, no descargar
return send_from_directory(str(directory), variant, as_attachment=False, mimetype=mimetype)
return jsonify({'error': f'File not found: {filename}.{ext}'}), 404
except Exception as e:
app.logger.error(f"Error finding file: {e}")
return jsonify({'error': 'File not found'}), 404
@app.route('/health') @app.route('/health')
def health_check(): def health_check():
"""Health check endpoint""" """Health check endpoint"""
@@ -174,11 +235,316 @@ def create_app() -> Flask:
} }
}) })
@app.route('/api/transcription/<filename>')
def get_transcription(filename: str):
"""Get transcription content for a specific file"""
try:
# Validate filename to prevent path traversal
if '..' in filename or filename.startswith('/'):
return jsonify({
'success': False,
'message': 'Invalid filename'
}), 400
# Extract base name without extension (handle .mp3, .wav, .txt, etc.)
base_name = Path(filename).stem
# Construct file path for transcription
file_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt"
# Check if file exists
if not file_path.exists():
return jsonify({
'success': False,
'message': f'Transcription file not found: {base_name}.txt'
}), 404
# Read file content
with open(file_path, 'r', encoding='utf-8') as f:
transcription_text = f.read()
# Calculate statistics
word_count = len(transcription_text.split())
char_count = len(transcription_text)
return jsonify({
'success': True,
'filename': filename,
'transcription': transcription_text,
'file_path': str(file_path),
'word_count': word_count,
'char_count': char_count
})
except Exception as e:
app.logger.error(f"Error reading transcription: {e}")
return jsonify({
'success': False,
'message': f"Error reading transcription: {str(e)}"
}), 500
@app.route('/api/summary/<filename>')
def get_summary(filename: str):
"""Get summary content for a specific file"""
try:
# Validate filename to prevent path traversal
if '..' in filename or filename.startswith('/'):
return jsonify({
'success': False,
'message': 'Invalid filename'
}), 400
# Extract base name without extension (handle .mp3, .wav, etc.)
base_name = Path(filename).stem
# Also remove _unificado/_unified suffixes if present
base_name = base_name.replace('_unificado', '').replace('_unified', '')
# Try different file path variants
possible_paths = [
settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unificado.md",
settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unified.md",
settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.md",
]
file_path = None
for path in possible_paths:
if path.exists():
file_path = path
break
if not file_path:
return jsonify({
'success': False,
'message': f'Summary file not found for: {filename}'
}), 404
# Read file content
with open(file_path, 'r', encoding='utf-8') as f:
summary_text = f.read()
# Get available formats
formats_available = get_available_formats(base_name)
return jsonify({
'success': True,
'filename': base_name,
'summary': summary_text,
'file_path': str(file_path),
'formats_available': formats_available
})
except Exception as e:
app.logger.error(f"Error reading summary: {e}")
return jsonify({
'success': False,
'message': f"Error reading summary: {str(e)}"
}), 500
@app.route('/api/versions/<filename>')
def get_versions(filename: str):
"""Get all summary versions for a file"""
try:
# Validate filename
if '..' in filename or filename.startswith('/'):
return jsonify({'success': False, 'message': 'Invalid filename'}), 400
# Extract base name
base_name = Path(filename).stem
base_name = base_name.replace('_unificado', '').replace('_unified', '')
versions = []
downloads_path = settings.LOCAL_DOWNLOADS_PATH
docx_path = settings.LOCAL_DOCX
# Check for transcription (original)
txt_path = downloads_path / f"{base_name}.txt"
if txt_path.exists():
stat = txt_path.stat()
versions.append({
'type': 'transcription',
'label': '📝 Transcripción Original',
'filename': txt_path.name,
'path': f"/downloads/find-file?filename={base_name}&ext=txt",
'date': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M'),
'size': f"{stat.st_size / 1024:.1f} KB"
})
# Check for summary versions (md, docx, pdf)
summary_patterns = [
(f"{base_name}_unificado.md", "📋 Resumen MD"),
(f"{base_name}_unificado.docx", "📄 Documento DOCX"),
(f"{base_name}_unificado.pdf", "📑 PDF"),
]
for pattern, label in summary_patterns:
# Check downloads path
file_path = downloads_path / pattern
if not file_path.exists():
file_path = docx_path / pattern
if file_path.exists():
stat = file_path.stat()
ext = file_path.suffix[1:] # Remove the dot
versions.append({
'type': 'summary',
'label': label,
'filename': pattern,
'path': f"/downloads/find-file?filename={base_name}&ext={ext}",
'date': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M'),
'size': f"{stat.st_size / 1024:.1f} KB"
})
# Sort by date descending
versions.sort(key=lambda x: x['date'], reverse=True)
return jsonify({
'success': True,
'base_name': base_name,
'versions': versions,
'count': len(versions)
})
except Exception as e:
app.logger.error(f"Error getting versions: {e}")
return jsonify({'success': False, 'message': str(e)}), 500
@app.route('/api/regenerate-summary', methods=['POST'])
def regenerate_summary():
"""Regenerate summary from existing transcription"""
start_time = time.time()
try:
data = request.get_json()
filename = data.get('filename')
custom_prompt = data.get('custom_prompt')
if not filename:
return jsonify({
'success': False,
'message': 'Filename is required'
}), 400
# Validate filename to prevent path traversal
if '..' in filename or filename.startswith('/'):
return jsonify({
'success': False,
'message': 'Invalid filename'
}), 400
# Get base name (remove extension if present)
base_name = Path(filename).stem
# Read transcription from .txt file
transcription_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt"
if not transcription_path.exists():
# Try without .txt extension if already included
transcription_path = settings.LOCAL_DOWNLOADS_PATH / filename
if not transcription_path.exists():
return jsonify({
'success': False,
'message': f'Transcription file not found for: {filename}'
}), 404
with open(transcription_path, 'r', encoding='utf-8') as f:
transcription_text = f.read()
# Generate new summary using DocumentGenerator
doc_generator = DocumentGenerator()
success, new_summary, metadata = doc_generator.generate_summary(
transcription_text,
base_name
)
if not success:
return jsonify({
'success': False,
'message': 'Failed to generate summary'
}), 500
# Upload to WebDAV if configured
files_updated = []
if settings.has_webdav_config:
try:
# Upload markdown
if 'markdown_path' in metadata:
md_path = Path(metadata['markdown_path'])
if md_path.exists():
remote_md_path = f"{settings.REMOTE_TXT_FOLDER}/{md_path.name}"
webdav_service.upload(str(md_path), remote_md_path)
files_updated.append(remote_md_path)
# Upload DOCX
if 'docx_path' in metadata:
docx_path = Path(metadata['docx_path'])
if docx_path.exists():
remote_docx_path = f"{settings.DOCX_FOLDER}/{docx_path.name}"
webdav_service.upload(str(docx_path), remote_docx_path)
files_updated.append(remote_docx_path)
# Upload PDF if available
if 'pdf_path' in metadata:
pdf_path = Path(metadata['pdf_path'])
if pdf_path.exists():
remote_pdf_path = f"{settings.REMOTE_PDF_FOLDER}/{pdf_path.name}"
webdav_service.upload(str(pdf_path), remote_pdf_path)
files_updated.append(remote_pdf_path)
except Exception as e:
app.logger.warning(f"WebDAV upload failed: {e}")
# Continue even if upload fails
processing_time = time.time() - start_time
return jsonify({
'success': True,
'message': 'Summary regenerated successfully',
'new_summary': new_summary,
'files_updated': files_updated,
'processing_time': f"{processing_time:.2f}s",
'metadata': metadata
})
except Exception as e:
app.logger.error(f"Error regenerating summary: {e}")
return jsonify({
'success': False,
'message': f"Error regenerating summary: {str(e)}"
}), 500
@app.route('/api/files-detailed')
def get_files_detailed():
"""Get detailed list of files with transcription and summary info"""
try:
files = get_audio_files_detailed()
# Calculate statistics
total = len(files)
with_transcription = sum(1 for f in files if f['has_transcription'])
with_summary = sum(1 for f in files if f['has_summary'])
return jsonify({
'success': True,
'files': files,
'total': total,
'with_transcription': with_transcription,
'with_summary': with_summary
})
except Exception as e:
app.logger.error(f"Error getting detailed files: {e}")
return jsonify({
'success': False,
'message': f"Error: {str(e)}"
}), 500
return app return app
def get_audio_files() -> List[Dict[str, Any]]: def get_audio_files() -> List[Dict[str, Any]]:
"""Get list of audio files from WebDAV and local""" """Get list of audio files from WebDAV and local"""
import logging
logger = logging.getLogger(__name__)
files = [] files = []
# Get files from WebDAV # Get files from WebDAV
@@ -202,7 +568,7 @@ def get_audio_files() -> List[Dict[str, Any]]:
'available_formats': get_available_formats(base_name) 'available_formats': get_available_formats(base_name)
}) })
except Exception as e: except Exception as e:
app.logger.error(f"Error getting WebDAV files: {e}") logger.warning(f"Error getting WebDAV files: {e}")
# Get local files # Get local files
try: try:
@@ -222,13 +588,113 @@ def get_audio_files() -> List[Dict[str, Any]]:
'available_formats': get_available_formats(file_path.name) 'available_formats': get_available_formats(file_path.name)
}) })
except Exception as e: except Exception as e:
app.logger.error(f"Error getting local files: {e}") logger.error(f"Error getting local files: {e}")
# Remove duplicates (WebDAV takes precedence) # Remove duplicates (keep both local and webdav - distinguish by source)
unique_files = {} unique_files = {}
for file in files: for file in files:
key = file['filename'] # Use (filename, source) as key to keep both local and webdav files
if key not in unique_files or file['source'] == 'webdav': key = (file['filename'], file['source'])
unique_files[key] = file
return sorted(unique_files.values(), key=lambda x: (x['source'], x['filename']))
def get_audio_files_detailed() -> List[Dict[str, Any]]:
"""Get detailed list of audio files with transcription and summary information"""
files = []
# Get local audio files only for detailed view
try:
if settings.LOCAL_DOWNLOADS_PATH.exists():
for ext in settings.AUDIO_EXTENSIONS:
for file_path in settings.LOCAL_DOWNLOADS_PATH.glob(f"*{ext}"):
stat = file_path.stat()
filename = file_path.name
base_name = file_path.stem
# Check for transcription
transcription_path = settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.txt"
has_transcription = transcription_path.exists()
transcription_words = 0
if has_transcription:
try:
with open(transcription_path, 'r', encoding='utf-8') as f:
transcription_text = f.read()
transcription_words = len(transcription_text.split())
except Exception:
pass
# Check for summary and formats
formats = get_available_formats(filename)
has_summary = formats.get('md', False)
# Get summary path
summary_path = None
if has_summary:
summary_variants = [
settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unificado.md",
settings.LOCAL_DOWNLOADS_PATH / f"{base_name}_unified.md",
settings.LOCAL_DOWNLOADS_PATH / f"{base_name}.md",
]
for variant in summary_variants:
if variant.exists():
summary_path = str(variant)
break
files.append({
'filename': filename,
'base_name': base_name,
'audio_path': str(file_path),
'has_transcription': has_transcription,
'transcription_path': str(transcription_path) if has_transcription else None,
'transcription_words': transcription_words,
'has_summary': has_summary,
'summary_path': summary_path,
'formats': formats,
'processed': processed_registry.is_processed(filename),
'last_modified': datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
'size': format_size(stat.st_size)
})
except Exception as e:
pass # Error logged in endpoint
# Get WebDAV files
if settings.has_webdav_config:
try:
webdav_files = webdav_service.list(settings.REMOTE_AUDIOS_FOLDER)
for file_path in webdav_files:
normalized_path = webdav_service.normalize_path(file_path)
base_name = Path(normalized_path).stem
if any(normalized_path.lower().endswith(ext) for ext in settings.AUDIO_EXTENSIONS):
# Check if already in local files
if not any(f['base_name'] == base_name for f in files):
formats = get_available_formats(base_name)
files.append({
'filename': Path(normalized_path).name,
'base_name': base_name,
'audio_path': normalized_path,
'has_transcription': formats.get('txt', False),
'transcription_path': None,
'transcription_words': 0,
'has_summary': formats.get('md', False),
'summary_path': None,
'formats': formats,
'processed': processed_registry.is_processed(normalized_path),
'last_modified': 'Unknown',
'size': 'Unknown'
})
except Exception as e:
pass # Error logged in endpoint
# Remove duplicates and sort
unique_files = {}
for file in files:
key = file['base_name']
if key not in unique_files:
unique_files[key] = file unique_files[key] = file
return sorted(unique_files.values(), key=lambda x: x['filename']) return sorted(unique_files.values(), key=lambda x: x['filename'])

File diff suppressed because it is too large Load Diff