mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-18 22:41:26 +02:00
fix(local-llm): allow jsdelivr in CSP + aggregate transformers.js progress
Two issues hit while loading Gemma 4 E2B in /llm-test for the first
time on a local dev server.
1. CSP script-src blocked cdn.jsdelivr.net.
@huggingface/transformers v4 lazy-loads the onnxruntime-web WASM
loader shim via a runtime dynamic `import()` from
cdn.jsdelivr.net/npm/onnxruntime-web@... at backend selection time
(the package itself is bundled, but the WASM-loader is fetched on
demand so the static bundle stays small). With the previous CSP the
import was blocked and "no available backend found" was the only
downstream error. Allowlist cdn.jsdelivr.net in the shared CSP
script-src so every Mana web app picks this up automatically.
2. Loading bar oscillated wildly during the model download.
transformers.js downloads many shards in parallel (config.json,
tokenizer.json, generation_config.json, model.onnx, model_data.bin,
…) and fires the progress callback per file. The previous engine
code reported the latest event verbatim, so the bar bounced
between whichever file happened to be progressing fastest.
Replace per-file reporting with a Map<file, {loaded, total}>
accumulator and emit an aggregated total on every event. The
denominator can grow as new files are discovered (causing brief
small dips), but both numerator and denominator are individually
monotonic, so the aggregate is much smoother. Also include a
human-readable byte count and file count in the status text:
Downloading model (47%, 240 MB / 510 MB, 8 files)
Pin completed files to 100% on the 'done' event so the final
aggregate visibly hits 100% before the loading→ready transition.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
243c09d97c
commit
b50a5c9ac7
2 changed files with 67 additions and 19 deletions
|
|
@ -105,8 +105,45 @@ export class LocalLLMEngine {
|
|||
|
||||
// transformers.js progress callback shape:
|
||||
// { status: 'initiate'|'download'|'progress'|'done'|'ready',
|
||||
// name?: string, file?: string, progress?: number, loaded?: number, total?: number }
|
||||
// We collapse it into our LoadingStatus union.
|
||||
// name?: string, file?: string, progress?: number,
|
||||
// loaded?: number, total?: number }
|
||||
//
|
||||
// The callback fires per-file, and the library downloads many
|
||||
// shards in parallel (config.json, tokenizer.json, several
|
||||
// onnx weight files, …). If we naively report the latest event
|
||||
// the bar bounces wildly between files. Instead we keep a
|
||||
// per-file byte-accounting map and emit an aggregated total
|
||||
// every time anything moves. The denominator can grow as new
|
||||
// files are discovered (causing brief dips), but both
|
||||
// numerator and denominator are individually monotonic, so the
|
||||
// dips are small and brief — much smoother than per-file.
|
||||
const fileProgress = new Map<string, { loaded: number; total: number }>();
|
||||
|
||||
const formatBytes = (bytes: number): string => {
|
||||
if (bytes < 1024) return `${bytes} B`;
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
|
||||
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(0)} MB`;
|
||||
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
|
||||
};
|
||||
|
||||
const emitAggregate = () => {
|
||||
let totalLoaded = 0;
|
||||
let totalSize = 0;
|
||||
for (const { loaded, total } of fileProgress.values()) {
|
||||
totalLoaded += loaded;
|
||||
totalSize += total;
|
||||
}
|
||||
const pct = totalSize > 0 ? totalLoaded / totalSize : 0;
|
||||
this.setStatus({
|
||||
state: 'downloading',
|
||||
progress: pct,
|
||||
text:
|
||||
totalSize > 0
|
||||
? `Downloading model (${(pct * 100).toFixed(0)}%, ${formatBytes(totalLoaded)} / ${formatBytes(totalSize)}, ${fileProgress.size} files)`
|
||||
: `Downloading model (${fileProgress.size} files queued)`,
|
||||
});
|
||||
};
|
||||
|
||||
const progressCallback = (report: {
|
||||
status: string;
|
||||
file?: string;
|
||||
|
|
@ -115,20 +152,23 @@ export class LocalLLMEngine {
|
|||
loaded?: number;
|
||||
total?: number;
|
||||
}) => {
|
||||
const label = report.file ?? report.name ?? '';
|
||||
if (report.status === 'progress' || report.status === 'download') {
|
||||
const pct = typeof report.progress === 'number' ? report.progress : 0;
|
||||
this.setStatus({
|
||||
state: 'downloading',
|
||||
progress: pct / 100,
|
||||
text: label
|
||||
? `Downloading ${label} (${pct.toFixed(0)}%)`
|
||||
: `Downloading (${pct.toFixed(0)}%)`,
|
||||
const file = report.file ?? report.name ?? '_unknown';
|
||||
if (report.status === 'initiate') {
|
||||
if (!fileProgress.has(file)) fileProgress.set(file, { loaded: 0, total: 0 });
|
||||
emitAggregate();
|
||||
} else if (report.status === 'download' || report.status === 'progress') {
|
||||
fileProgress.set(file, {
|
||||
loaded: report.loaded ?? 0,
|
||||
total: report.total ?? fileProgress.get(file)?.total ?? 0,
|
||||
});
|
||||
} else if (report.status === 'initiate') {
|
||||
this.setStatus({ state: 'downloading', progress: 0, text: `Starting ${label}` });
|
||||
emitAggregate();
|
||||
} else if (report.status === 'done') {
|
||||
this.setStatus({ state: 'loading', text: label ? `Loaded ${label}` : 'Loaded shard' });
|
||||
// Pin the file to 100% so a final emit shows it complete
|
||||
const existing = fileProgress.get(file);
|
||||
if (existing && existing.total > 0) {
|
||||
fileProgress.set(file, { loaded: existing.total, total: existing.total });
|
||||
}
|
||||
emitAggregate();
|
||||
}
|
||||
// 'ready' is handled below after both processor + model finish
|
||||
};
|
||||
|
|
|
|||
|
|
@ -61,11 +61,19 @@ export function setSecurityHeaders(response: Response, options: SecurityHeadersO
|
|||
// Content Security Policy
|
||||
const cspDirectives = [
|
||||
"default-src 'self'",
|
||||
// 'wasm-unsafe-eval' is required by @mana/local-llm (WebLLM) to
|
||||
// instantiate the MLC WebGPU runtime. It only permits WebAssembly
|
||||
// compilation, NOT eval()/new Function() — much narrower than the
|
||||
// legacy 'unsafe-eval' source. Supported by all evergreen browsers.
|
||||
`script-src 'self' 'unsafe-inline' 'wasm-unsafe-eval' https://stats.mana.how https://glitchtip.mana.how ${scriptSrc.join(' ')}`.trim(),
|
||||
// 'wasm-unsafe-eval' is required by @mana/local-llm to instantiate
|
||||
// browser inference WebGPU runtimes (both the old WebLLM/MLC path
|
||||
// and the current transformers.js/ONNX path). It only permits
|
||||
// WebAssembly compilation, NOT eval()/new Function() — much narrower
|
||||
// than the legacy 'unsafe-eval' source. Supported by all evergreen
|
||||
// browsers.
|
||||
//
|
||||
// cdn.jsdelivr.net is allowlisted because @huggingface/transformers
|
||||
// loads onnxruntime-web via a runtime dynamic `import()` from
|
||||
// jsDelivr (the package itself is bundled, but the WASM-loader
|
||||
// shim is fetched lazily so transformers.js v4 can pick the
|
||||
// right backend without bloating the static bundle).
|
||||
`script-src 'self' 'unsafe-inline' 'wasm-unsafe-eval' https://stats.mana.how https://glitchtip.mana.how https://cdn.jsdelivr.net ${scriptSrc.join(' ')}`.trim(),
|
||||
"style-src 'self' 'unsafe-inline'",
|
||||
`img-src 'self' data: blob: https: ${imgSrc.join(' ')}`.trim(),
|
||||
`connect-src 'self' https://stats.mana.how https://glitchtip.mana.how ${connectSrc.join(' ')}`.trim(),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue