fix(local-llm): allow jsdelivr in CSP + aggregate transformers.js progress

Two issues hit while loading Gemma 4 E2B in /llm-test for the first time on a local dev server. 1. CSP script-src blocked cdn.jsdelivr.net. @huggingface/transformers v4 lazy-loads the onnxruntime-web WASM loader shim via a runtime dynamic `import()` from cdn.jsdelivr.net/npm/onnxruntime-web@... at backend selection time (the package itself is bundled, but the WASM-loader is fetched on demand so the static bundle stays small). With the previous CSP the import was blocked and "no available backend found" was the only downstream error. Allowlist cdn.jsdelivr.net in the shared CSP script-src so every Mana web app picks this up automatically. 2. Loading bar oscillated wildly during the model download. transformers.js downloads many shards in parallel (config.json, tokenizer.json, generation_config.json, model.onnx, model_data.bin, …) and fires the progress callback per file. The previous engine code reported the latest event verbatim, so the bar bounced between whichever file happened to be progressing fastest. Replace per-file reporting with a Map<file, {loaded, total}> accumulator and emit an aggregated total on every event. The denominator can grow as new files are discovered (causing brief small dips), but both numerator and denominator are individually monotonic, so the aggregate is much smoother. Also include a human-readable byte count and file count in the status text: Downloading model (47%, 240 MB / 510 MB, 8 files) Pin completed files to 100% on the 'done' event so the final aggregate visibly hits 100% before the loading→ready transition. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-18 22:41:26 +02:00 · 2026-04-08 22:56:52 +02:00 · 2026-04-08 22:56:52 +02:00 · b50a5c9ac7
commit b50a5c9ac7
parent 243c09d97c
2 changed files with 67 additions and 19 deletions
--- a/packages/local-llm/src/engine.ts
+++ b/packages/local-llm/src/engine.ts
@ -105,8 +105,45 @@ export class LocalLLMEngine {

 			// transformers.js progress callback shape:
 			//   { status: 'initiate'|'download'|'progress'|'done'|'ready',
-			//     name?: string, file?: string, progress?: number, loaded?: number, total?: number }
-			// We collapse it into our LoadingStatus union.
+			//     name?: string, file?: string, progress?: number,
+			//     loaded?: number, total?: number }
+			//
+			// The callback fires per-file, and the library downloads many
+			// shards in parallel (config.json, tokenizer.json, several
+			// onnx weight files, …). If we naively report the latest event
+			// the bar bounces wildly between files. Instead we keep a
+			// per-file byte-accounting map and emit an aggregated total
+			// every time anything moves. The denominator can grow as new
+			// files are discovered (causing brief dips), but both
+			// numerator and denominator are individually monotonic, so the
+			// dips are small and brief — much smoother than per-file.
+			const fileProgress = new Map<string, { loaded: number; total: number }>();
+
+			const formatBytes = (bytes: number): string => {
+				if (bytes < 1024) return `${bytes} B`;
+				if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
+				if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(0)} MB`;
+				return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
+			};
+
+			const emitAggregate = () => {
+				let totalLoaded = 0;
+				let totalSize = 0;
+				for (const { loaded, total } of fileProgress.values()) {
+					totalLoaded += loaded;
+					totalSize += total;
+				}
+				const pct = totalSize > 0 ? totalLoaded / totalSize : 0;
+				this.setStatus({
+					state: 'downloading',
+					progress: pct,
+					text:
+						totalSize > 0
+							? `Downloading model (${(pct * 100).toFixed(0)}%, ${formatBytes(totalLoaded)} / ${formatBytes(totalSize)}, ${fileProgress.size} files)`
+							: `Downloading model (${fileProgress.size} files queued)`,
+				});
+			};
+
 			const progressCallback = (report: {
 				status: string;
 				file?: string;
@ -115,20 +152,23 @@ export class LocalLLMEngine {
 				loaded?: number;
 				total?: number;
 			}) => {
-				const label = report.file ?? report.name ?? '';
-				if (report.status === 'progress' || report.status === 'download') {
-					const pct = typeof report.progress === 'number' ? report.progress : 0;
-					this.setStatus({
-						state: 'downloading',
-						progress: pct / 100,
-						text: label
-							? `Downloading ${label} (${pct.toFixed(0)}%)`
-							: `Downloading (${pct.toFixed(0)}%)`,
+				const file = report.file ?? report.name ?? '_unknown';
+				if (report.status === 'initiate') {
+					if (!fileProgress.has(file)) fileProgress.set(file, { loaded: 0, total: 0 });
+					emitAggregate();
+				} else if (report.status === 'download' || report.status === 'progress') {
+					fileProgress.set(file, {
+						loaded: report.loaded ?? 0,
+						total: report.total ?? fileProgress.get(file)?.total ?? 0,
 					});
-				} else if (report.status === 'initiate') {
-					this.setStatus({ state: 'downloading', progress: 0, text: `Starting ${label}` });
+					emitAggregate();
 				} else if (report.status === 'done') {
-					this.setStatus({ state: 'loading', text: label ? `Loaded ${label}` : 'Loaded shard' });
+					// Pin the file to 100% so a final emit shows it complete
+					const existing = fileProgress.get(file);
+					if (existing && existing.total > 0) {
+						fileProgress.set(file, { loaded: existing.total, total: existing.total });
+					}
+					emitAggregate();
 				}
 				// 'ready' is handled below after both processor + model finish
 			};
--- a/packages/shared-utils/src/security-headers.ts
+++ b/packages/shared-utils/src/security-headers.ts
@ -61,11 +61,19 @@ export function setSecurityHeaders(response: Response, options: SecurityHeadersO
 	// Content Security Policy
 	const cspDirectives = [
 		"default-src 'self'",
-		// 'wasm-unsafe-eval' is required by @mana/local-llm (WebLLM) to
-		// instantiate the MLC WebGPU runtime. It only permits WebAssembly
-		// compilation, NOT eval()/new Function() — much narrower than the
-		// legacy 'unsafe-eval' source. Supported by all evergreen browsers.
-		`script-src 'self' 'unsafe-inline' 'wasm-unsafe-eval' https://stats.mana.how https://glitchtip.mana.how ${scriptSrc.join(' ')}`.trim(),
+		// 'wasm-unsafe-eval' is required by @mana/local-llm to instantiate
+		// browser inference WebGPU runtimes (both the old WebLLM/MLC path
+		// and the current transformers.js/ONNX path). It only permits
+		// WebAssembly compilation, NOT eval()/new Function() — much narrower
+		// than the legacy 'unsafe-eval' source. Supported by all evergreen
+		// browsers.
+		//
+		// cdn.jsdelivr.net is allowlisted because @huggingface/transformers
+		// loads onnxruntime-web via a runtime dynamic `import()` from
+		// jsDelivr (the package itself is bundled, but the WASM-loader
+		// shim is fetched lazily so transformers.js v4 can pick the
+		// right backend without bloating the static bundle).
+		`script-src 'self' 'unsafe-inline' 'wasm-unsafe-eval' https://stats.mana.how https://glitchtip.mana.how https://cdn.jsdelivr.net ${scriptSrc.join(' ')}`.trim(),
 		"style-src 'self' 'unsafe-inline'",
 		`img-src 'self' data: blob: https: ${imgSrc.join(' ')}`.trim(),
 		`connect-src 'self' https://stats.mana.how https://glitchtip.mana.how ${connectSrc.join(' ')}`.trim(),