diff --git a/packages/local-llm/src/engine.ts b/packages/local-llm/src/engine.ts index 60a7360c0..c2c9021e3 100644 --- a/packages/local-llm/src/engine.ts +++ b/packages/local-llm/src/engine.ts @@ -105,8 +105,45 @@ export class LocalLLMEngine { // transformers.js progress callback shape: // { status: 'initiate'|'download'|'progress'|'done'|'ready', - // name?: string, file?: string, progress?: number, loaded?: number, total?: number } - // We collapse it into our LoadingStatus union. + // name?: string, file?: string, progress?: number, + // loaded?: number, total?: number } + // + // The callback fires per-file, and the library downloads many + // shards in parallel (config.json, tokenizer.json, several + // onnx weight files, …). If we naively report the latest event + // the bar bounces wildly between files. Instead we keep a + // per-file byte-accounting map and emit an aggregated total + // every time anything moves. The denominator can grow as new + // files are discovered (causing brief dips), but both + // numerator and denominator are individually monotonic, so the + // dips are small and brief — much smoother than per-file. + const fileProgress = new Map(); + + const formatBytes = (bytes: number): string => { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`; + if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(0)} MB`; + return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`; + }; + + const emitAggregate = () => { + let totalLoaded = 0; + let totalSize = 0; + for (const { loaded, total } of fileProgress.values()) { + totalLoaded += loaded; + totalSize += total; + } + const pct = totalSize > 0 ? totalLoaded / totalSize : 0; + this.setStatus({ + state: 'downloading', + progress: pct, + text: + totalSize > 0 + ? `Downloading model (${(pct * 100).toFixed(0)}%, ${formatBytes(totalLoaded)} / ${formatBytes(totalSize)}, ${fileProgress.size} files)` + : `Downloading model (${fileProgress.size} files queued)`, + }); + }; + const progressCallback = (report: { status: string; file?: string; @@ -115,20 +152,23 @@ export class LocalLLMEngine { loaded?: number; total?: number; }) => { - const label = report.file ?? report.name ?? ''; - if (report.status === 'progress' || report.status === 'download') { - const pct = typeof report.progress === 'number' ? report.progress : 0; - this.setStatus({ - state: 'downloading', - progress: pct / 100, - text: label - ? `Downloading ${label} (${pct.toFixed(0)}%)` - : `Downloading (${pct.toFixed(0)}%)`, + const file = report.file ?? report.name ?? '_unknown'; + if (report.status === 'initiate') { + if (!fileProgress.has(file)) fileProgress.set(file, { loaded: 0, total: 0 }); + emitAggregate(); + } else if (report.status === 'download' || report.status === 'progress') { + fileProgress.set(file, { + loaded: report.loaded ?? 0, + total: report.total ?? fileProgress.get(file)?.total ?? 0, }); - } else if (report.status === 'initiate') { - this.setStatus({ state: 'downloading', progress: 0, text: `Starting ${label}` }); + emitAggregate(); } else if (report.status === 'done') { - this.setStatus({ state: 'loading', text: label ? `Loaded ${label}` : 'Loaded shard' }); + // Pin the file to 100% so a final emit shows it complete + const existing = fileProgress.get(file); + if (existing && existing.total > 0) { + fileProgress.set(file, { loaded: existing.total, total: existing.total }); + } + emitAggregate(); } // 'ready' is handled below after both processor + model finish }; diff --git a/packages/shared-utils/src/security-headers.ts b/packages/shared-utils/src/security-headers.ts index 074bb32e0..ba4d8b9c7 100644 --- a/packages/shared-utils/src/security-headers.ts +++ b/packages/shared-utils/src/security-headers.ts @@ -61,11 +61,19 @@ export function setSecurityHeaders(response: Response, options: SecurityHeadersO // Content Security Policy const cspDirectives = [ "default-src 'self'", - // 'wasm-unsafe-eval' is required by @mana/local-llm (WebLLM) to - // instantiate the MLC WebGPU runtime. It only permits WebAssembly - // compilation, NOT eval()/new Function() — much narrower than the - // legacy 'unsafe-eval' source. Supported by all evergreen browsers. - `script-src 'self' 'unsafe-inline' 'wasm-unsafe-eval' https://stats.mana.how https://glitchtip.mana.how ${scriptSrc.join(' ')}`.trim(), + // 'wasm-unsafe-eval' is required by @mana/local-llm to instantiate + // browser inference WebGPU runtimes (both the old WebLLM/MLC path + // and the current transformers.js/ONNX path). It only permits + // WebAssembly compilation, NOT eval()/new Function() — much narrower + // than the legacy 'unsafe-eval' source. Supported by all evergreen + // browsers. + // + // cdn.jsdelivr.net is allowlisted because @huggingface/transformers + // loads onnxruntime-web via a runtime dynamic `import()` from + // jsDelivr (the package itself is bundled, but the WASM-loader + // shim is fetched lazily so transformers.js v4 can pick the + // right backend without bloating the static bundle). + `script-src 'self' 'unsafe-inline' 'wasm-unsafe-eval' https://stats.mana.how https://glitchtip.mana.how https://cdn.jsdelivr.net ${scriptSrc.join(' ')}`.trim(), "style-src 'self' 'unsafe-inline'", `img-src 'self' data: blob: https: ${imgSrc.join(' ')}`.trim(), `connect-src 'self' https://stats.mana.how https://glitchtip.mana.how ${connectSrc.join(' ')}`.trim(),