feat: implement 33 nice-to-have features + fix 37 code review bugs

5 SDD batches archived:
- Batch 1: UI Polish (10 features, 14 tasks)
- Batch 2: Study System (8 features, 23 tasks)
- Batch 3: Infrastructure (5 features, 22 tasks)
- Batch 4: AI Advanced (5 features, 30 tasks) — RAG with @xenova/transformers
- Batch 5: Core Features (5 features, 19 tasks)

37 bugs fixed from comprehensive code review (11 CRITICAL, 12 HIGH, 14 MEDIUM/LOW):
- SSE streaming now works (event.token check)
- API keys no longer exposed via GET /api/models
- FTS5 injection sanitized
- DB backup/restore with admin auth
- Buddy mode wired (buddy_meta column)
- Exam auto-submit stale closure fixed
- CSS variables aligned with design tokens
- Progress data corruption fixed
- WebSocket protocol auto-detection
- Tests infrastructure completed (vitest + node:test)
This commit is contained in:
renato97
2026-06-08 18:18:47 -03:00
parent b7d1e7319f
commit 4ff4302a8c
79 changed files with 13667 additions and 389 deletions

View File

@@ -3,6 +3,8 @@ const multer = require('multer');
const path = require('path');
const fs = require('fs');
const db = require('../db');
const { chunkText } = require('../lib/rag');
const embeddings = require('../lib/embeddings');
const router = express.Router();
const uploadDir = path.resolve(__dirname, '..', '..', 'data', 'uploads');
@@ -45,7 +47,8 @@ async function vlmExtract(filePath, vlmConfig) {
const base64 = buffer.toString('base64');
const mimeType = 'application/pdf';
const resp = await fetch(`${vlmConfig.endpoint}/chat/completions`, {
const baseUrl = vlmConfig.endpoint.replace(/\/+$/, '');
const resp = await fetch(`${baseUrl}/chat/completions`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${vlmConfig.api_key}`,
@@ -90,9 +93,10 @@ router.post('/upload', upload.single('file'), async (req, res) => {
let usedVlm = false;
// 1. Try VLM first
const vlmEndpoint = db.prepare("SELECT value FROM config WHERE key = 'vlm_endpoint'").get();
const vlmApiKey = db.prepare("SELECT value FROM config WHERE key = 'vlm_api_key'").get();
const vlmModel = db.prepare("SELECT value FROM config WHERE key = 'vlm_model'").get();
const configs = db.prepare("SELECT key, value FROM config WHERE key IN ('vlm_endpoint','vlm_api_key','vlm_model')").all();
const vlmEndpoint = configs.find(c => c.key === 'vlm_endpoint');
const vlmApiKey = configs.find(c => c.key === 'vlm_api_key');
const vlmModel = configs.find(c => c.key === 'vlm_model');
if (vlmEndpoint?.value && vlmApiKey?.value) {
const vlmConfig = {
@@ -101,7 +105,16 @@ router.post('/upload', upload.single('file'), async (req, res) => {
model: vlmModel?.value || 'glm-4.6v',
};
text = await vlmExtract(filePath, vlmConfig);
if (text) usedVlm = true;
if (text) {
usedVlm = true;
// VLM succeeded — still get page count from PDF metadata
try {
const pdfjsLib = await import('pdfjs-dist/legacy/build/pdf.mjs');
const data = new Uint8Array(fs.readFileSync(filePath));
const doc = await pdfjsLib.getDocument({ data }).promise;
pages = doc.numPages;
} catch { /* keep pages = 0 if metadata read fails */ }
}
}
// 2. Fallback to pdfjs-dist
@@ -119,7 +132,26 @@ router.post('/upload', upload.single('file'), async (req, res) => {
VALUES (?, ?, ?, ?, ?)
`).run(req.file.filename, req.file.originalname, text || '', pages || 0, reorderIndex);
const row = db.prepare('SELECT * FROM pdfs WHERE id = ?').get(info.lastInsertRowid);
const pdfId = info.lastInsertRowid;
// Generate embeddings for the PDF text
if (text && text.trim().length > 0) {
try {
const chunks = chunkText(text, 500, 50);
const vectors = await embeddings.embedBatch(chunks);
for (let i = 0; i < chunks.length; i++) {
const vec = vectors[i];
const blob = Buffer.from(vec.buffer);
db.prepare(
'INSERT INTO embeddings (pdf_id, chunk_index, vector, content) VALUES (?, ?, ?, ?)'
).run(pdfId, i, blob, chunks[i]);
}
} catch (embErr) {
console.warn('[pdfs] embedding generation failed:', embErr.message);
}
}
const row = db.prepare('SELECT * FROM pdfs WHERE id = ?').get(pdfId);
res.status(201).json({ ...row, used_vlm: usedVlm });
} catch (err) {
console.error('[pdfs] upload error:', err.message);
@@ -196,10 +228,10 @@ router.delete('/:id', (req, res) => {
// Delete file from disk
const filePath = path.join(uploadDir, row.filename);
if (fs.existsSync(filePath)) {
fs.unlinkSync(filePath);
}
try { fs.unlinkSync(filePath); } catch {}
// Delete embeddings first
db.prepare('DELETE FROM embeddings WHERE pdf_id = ?').run(id);
db.prepare('DELETE FROM pdfs WHERE id = ?').run(id);
res.json({ deleted: true });
} catch (err) {
@@ -208,4 +240,26 @@ router.delete('/:id', (req, res) => {
}
});
// GET /api/pdfs/:id/embeddings — list embeddings for a PDF (debug)
router.get('/:id/embeddings', (req, res) => {
const id = parseInt(req.params.id, 10);
if (Number.isNaN(id)) {
return res.status(400).json({ error: 'Invalid pdf id' });
}
try {
const pdf = db.prepare('SELECT id FROM pdfs WHERE id = ?').get(id);
if (!pdf) {
return res.status(404).json({ error: 'PDF not found' });
}
const rows = db.prepare('SELECT id, pdf_id, chunk_index, content, created_at FROM embeddings WHERE pdf_id = ? ORDER BY chunk_index')
.all(id);
res.json({ pdf_id: id, count: rows.length, chunks: rows });
} catch (err) {
console.error('[pdfs] embeddings error:', err.message);
res.status(500).json({ error: err.message });
}
});
module.exports = router;