5 SDD batches archived: - Batch 1: UI Polish (10 features, 14 tasks) - Batch 2: Study System (8 features, 23 tasks) - Batch 3: Infrastructure (5 features, 22 tasks) - Batch 4: AI Advanced (5 features, 30 tasks) — RAG with @xenova/transformers - Batch 5: Core Features (5 features, 19 tasks) 37 bugs fixed from comprehensive code review (11 CRITICAL, 12 HIGH, 14 MEDIUM/LOW): - SSE streaming now works (event.token check) - API keys no longer exposed via GET /api/models - FTS5 injection sanitized - DB backup/restore with admin auth - Buddy mode wired (buddy_meta column) - Exam auto-submit stale closure fixed - CSS variables aligned with design tokens - Progress data corruption fixed - WebSocket protocol auto-detection - Tests infrastructure completed (vitest + node:test)
143 lines
3.8 KiB
JavaScript
143 lines
3.8 KiB
JavaScript
const crypto = require('crypto');
|
|
const path = require('path');
|
|
|
|
let pipelinePromise = null;
|
|
let _transformers = null;
|
|
|
|
// LRU cache: sha1(text) -> Float32Array, capped at 256
|
|
const lru = new Map();
|
|
const LRU_MAX = 256;
|
|
|
|
function _lruKey(text) {
|
|
return crypto.createHash('sha1').update(text).digest('hex');
|
|
}
|
|
|
|
function _lruGet(key) {
|
|
const val = lru.get(key);
|
|
if (val !== undefined) {
|
|
// move to back (most recently used)
|
|
lru.delete(key);
|
|
lru.set(key, val);
|
|
}
|
|
return val;
|
|
}
|
|
|
|
function _lruSet(key, vec) {
|
|
if (lru.has(key)) {
|
|
lru.delete(key);
|
|
} else if (lru.size >= LRU_MAX) {
|
|
const firstKey = lru.keys().next().value;
|
|
lru.delete(firstKey);
|
|
}
|
|
lru.set(key, vec);
|
|
}
|
|
|
|
async function _getPipeline() {
|
|
if (pipelinePromise) return pipelinePromise;
|
|
|
|
pipelinePromise = (async () => {
|
|
try {
|
|
const mod = await import('@xenova/transformers');
|
|
_transformers = mod;
|
|
mod.env.cacheDir = path.join(__dirname, '..', '..', 'node_modules', '.cache', 'transformers');
|
|
|
|
// Try webgpu first (DirectML on Windows/AMD), fallback to wasm
|
|
let pipe;
|
|
try {
|
|
pipe = await mod.pipeline('feature-extraction', 'Xenova/multilingual-e5-small', {
|
|
device: 'webgpu',
|
|
});
|
|
console.log('[embeddings] pipeline loaded with device=webgpu');
|
|
} catch (gpuErr) {
|
|
console.warn('[embeddings] webgpu failed, falling back to wasm:', gpuErr.message);
|
|
pipe = await mod.pipeline('feature-extraction', 'Xenova/multilingual-e5-small', {
|
|
device: 'wasm',
|
|
});
|
|
console.log('[embeddings] pipeline loaded with device=wasm');
|
|
}
|
|
return pipe;
|
|
} catch (err) {
|
|
console.error('[embeddings] failed to load pipeline:', err.message);
|
|
throw err;
|
|
}
|
|
})();
|
|
|
|
return pipelinePromise;
|
|
}
|
|
|
|
async function warmup() {
|
|
try {
|
|
await _getPipeline();
|
|
} catch (err) {
|
|
console.warn('[embeddings] warmup failed (model will retry on first use):', err.message);
|
|
}
|
|
}
|
|
|
|
async function embed(text) {
|
|
if (!text || typeof text !== 'string') {
|
|
throw new Error('embed() requires a non-empty string');
|
|
}
|
|
|
|
const key = _lruKey(text);
|
|
const cached = _lruGet(key);
|
|
if (cached) return cached;
|
|
|
|
const pipe = await _getPipeline();
|
|
const result = await pipe(text, { pooling: 'mean', normalize: true });
|
|
const vec = result.data instanceof Float32Array ? result.data : new Float32Array(result.data);
|
|
_lruSet(key, vec);
|
|
return vec;
|
|
}
|
|
|
|
async function embedBatch(texts) {
|
|
if (!Array.isArray(texts) || texts.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
// Check cache first
|
|
const uncached = [];
|
|
const indices = [];
|
|
const results = new Array(texts.length);
|
|
|
|
for (let i = 0; i < texts.length; i++) {
|
|
const key = _lruKey(texts[i]);
|
|
const cached = _lruGet(key);
|
|
if (cached) {
|
|
results[i] = cached;
|
|
} else {
|
|
uncached.push(texts[i]);
|
|
indices.push(i);
|
|
}
|
|
}
|
|
|
|
if (uncached.length === 0) {
|
|
return results;
|
|
}
|
|
|
|
const pipe = await _getPipeline();
|
|
const BATCH_SIZE = 32;
|
|
|
|
for (let start = 0; start < uncached.length; start += BATCH_SIZE) {
|
|
const batch = uncached.slice(start, start + BATCH_SIZE);
|
|
const batchResult = await pipe(batch, { pooling: 'mean', normalize: true });
|
|
// batchResult.data is a flat array for all batches; shape depends on library version
|
|
// For Transformers.js v2, when batching, result.data is flat and we need to slice
|
|
const dim = batch.length > 0 ? Math.floor(batchResult.data.length / batch.length) : 384;
|
|
for (let b = 0; b < batch.length; b++) {
|
|
const offset = b * dim;
|
|
const vec = new Float32Array(batchResult.data.slice(offset, offset + dim));
|
|
const originalIdx = indices[start + b];
|
|
results[originalIdx] = vec;
|
|
_lruSet(_lruKey(batch[b]), vec);
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
module.exports = {
|
|
warmup,
|
|
embed,
|
|
embedBatch,
|
|
};
|