From b50a5c9ac7c57f9442e9adba71ce865de3007e71 Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Wed, 8 Apr 2026 22:56:52 +0200
Subject: [PATCH] fix(local-llm): allow jsdelivr in CSP + aggregate
 transformers.js progress
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two issues hit while loading Gemma 4 E2B in /llm-test for the first
time on a local dev server.

1. CSP script-src blocked cdn.jsdelivr.net.
   @huggingface/transformers v4 lazy-loads the onnxruntime-web WASM
   loader shim via a runtime dynamic `import()` from
   cdn.jsdelivr.net/npm/onnxruntime-web@... at backend selection time
   (the package itself is bundled, but the WASM-loader is fetched on
   demand so the static bundle stays small). With the previous CSP the
   import was blocked and "no available backend found" was the only
   downstream error. Allowlist cdn.jsdelivr.net in the shared CSP
   script-src so every Mana web app picks this up automatically.

2. Loading bar oscillated wildly during the model download.
   transformers.js downloads many shards in parallel (config.json,
   tokenizer.json, generation_config.json, model.onnx, model_data.bin,
   …) and fires the progress callback per file. The previous engine
   code reported the latest event verbatim, so the bar bounced
   between whichever file happened to be progressing fastest.

   Replace per-file reporting with a Map<file, {loaded, total}>
   accumulator and emit an aggregated total on every event. The
   denominator can grow as new files are discovered (causing brief
   small dips), but both numerator and denominator are individually
   monotonic, so the aggregate is much smoother. Also include a
   human-readable byte count and file count in the status text:
       Downloading model (47%, 240 MB / 510 MB, 8 files)

   Pin completed files to 100% on the 'done' event so the final
   aggregate visibly hits 100% before the loading→ready transition.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/local-llm/src/engine.ts              | 68 +++++++++++++++----
 packages/shared-utils/src/security-headers.ts | 18 +++--
 2 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/packages/local-llm/src/engine.ts b/packages/local-llm/src/engine.ts
index 60a7360c0..c2c9021e3 100644
--- a/packages/local-llm/src/engine.ts
+++ b/packages/local-llm/src/engine.ts
@@ -105,8 +105,45 @@ export class LocalLLMEngine {
 
 			// transformers.js progress callback shape:
 			//   { status: 'initiate'|'download'|'progress'|'done'|'ready',
-			//     name?: string, file?: string, progress?: number, loaded?: number, total?: number }
-			// We collapse it into our LoadingStatus union.
+			//     name?: string, file?: string, progress?: number,
+			//     loaded?: number, total?: number }
+			//
+			// The callback fires per-file, and the library downloads many
+			// shards in parallel (config.json, tokenizer.json, several
+			// onnx weight files, …). If we naively report the latest event
+			// the bar bounces wildly between files. Instead we keep a
+			// per-file byte-accounting map and emit an aggregated total
+			// every time anything moves. The denominator can grow as new
+			// files are discovered (causing brief dips), but both
+			// numerator and denominator are individually monotonic, so the
+			// dips are small and brief — much smoother than per-file.
+			const fileProgress = new Map<string, { loaded: number; total: number }>();
+
+			const formatBytes = (bytes: number): string => {
+				if (bytes < 1024) return `${bytes} B`;
+				if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
+				if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(0)} MB`;
+				return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
+			};
+
+			const emitAggregate = () => {
+				let totalLoaded = 0;
+				let totalSize = 0;
+				for (const { loaded, total } of fileProgress.values()) {
+					totalLoaded += loaded;
+					totalSize += total;
+				}
+				const pct = totalSize > 0 ? totalLoaded / totalSize : 0;
+				this.setStatus({
+					state: 'downloading',
+					progress: pct,
+					text:
+						totalSize > 0
+							? `Downloading model (${(pct * 100).toFixed(0)}%, ${formatBytes(totalLoaded)} / ${formatBytes(totalSize)}, ${fileProgress.size} files)`
+							: `Downloading model (${fileProgress.size} files queued)`,
+				});
+			};
+
 			const progressCallback = (report: {
 				status: string;
 				file?: string;
@@ -115,20 +152,23 @@ export class LocalLLMEngine {
 				loaded?: number;
 				total?: number;
 			}) => {
-				const label = report.file ?? report.name ?? '';
-				if (report.status === 'progress' || report.status === 'download') {
-					const pct = typeof report.progress === 'number' ? report.progress : 0;
-					this.setStatus({
-						state: 'downloading',
-						progress: pct / 100,
-						text: label
-							? `Downloading ${label} (${pct.toFixed(0)}%)`
-							: `Downloading (${pct.toFixed(0)}%)`,
+				const file = report.file ?? report.name ?? '_unknown';
+				if (report.status === 'initiate') {
+					if (!fileProgress.has(file)) fileProgress.set(file, { loaded: 0, total: 0 });
+					emitAggregate();
+				} else if (report.status === 'download' || report.status === 'progress') {
+					fileProgress.set(file, {
+						loaded: report.loaded ?? 0,
+						total: report.total ?? fileProgress.get(file)?.total ?? 0,
 					});
-				} else if (report.status === 'initiate') {
-					this.setStatus({ state: 'downloading', progress: 0, text: `Starting ${label}` });
+					emitAggregate();
 				} else if (report.status === 'done') {
-					this.setStatus({ state: 'loading', text: label ? `Loaded ${label}` : 'Loaded shard' });
+					// Pin the file to 100% so a final emit shows it complete
+					const existing = fileProgress.get(file);
+					if (existing && existing.total > 0) {
+						fileProgress.set(file, { loaded: existing.total, total: existing.total });
+					}
+					emitAggregate();
 				}
 				// 'ready' is handled below after both processor + model finish
 			};
diff --git a/packages/shared-utils/src/security-headers.ts b/packages/shared-utils/src/security-headers.ts
index 074bb32e0..ba4d8b9c7 100644
--- a/packages/shared-utils/src/security-headers.ts
+++ b/packages/shared-utils/src/security-headers.ts
@@ -61,11 +61,19 @@ export function setSecurityHeaders(response: Response, options: SecurityHeadersO
 	// Content Security Policy
 	const cspDirectives = [
 		"default-src 'self'",
-		// 'wasm-unsafe-eval' is required by @mana/local-llm (WebLLM) to
-		// instantiate the MLC WebGPU runtime. It only permits WebAssembly
-		// compilation, NOT eval()/new Function() — much narrower than the
-		// legacy 'unsafe-eval' source. Supported by all evergreen browsers.
-		`script-src 'self' 'unsafe-inline' 'wasm-unsafe-eval' https://stats.mana.how https://glitchtip.mana.how ${scriptSrc.join(' ')}`.trim(),
+		// 'wasm-unsafe-eval' is required by @mana/local-llm to instantiate
+		// browser inference WebGPU runtimes (both the old WebLLM/MLC path
+		// and the current transformers.js/ONNX path). It only permits
+		// WebAssembly compilation, NOT eval()/new Function() — much narrower
+		// than the legacy 'unsafe-eval' source. Supported by all evergreen
+		// browsers.
+		//
+		// cdn.jsdelivr.net is allowlisted because @huggingface/transformers
+		// loads onnxruntime-web via a runtime dynamic `import()` from
+		// jsDelivr (the package itself is bundled, but the WASM-loader
+		// shim is fetched lazily so transformers.js v4 can pick the
+		// right backend without bloating the static bundle).
+		`script-src 'self' 'unsafe-inline' 'wasm-unsafe-eval' https://stats.mana.how https://glitchtip.mana.how https://cdn.jsdelivr.net ${scriptSrc.join(' ')}`.trim(),
 		"style-src 'self' 'unsafe-inline'",
 		`img-src 'self' data: blob: https: ${imgSrc.join(' ')}`.trim(),
 		`connect-src 'self' https://stats.mana.how https://glitchtip.mana.how ${connectSrc.join(' ')}`.trim(),