From 4fd5ff31995fe31ab2ccb6663cfcd038d3cafdb5 Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Wed, 8 Apr 2026 18:00:57 +0200
Subject: [PATCH] feat(local-llm): add Gemma 2 + allow HF/MLC hosts in CSP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

WebLLM was blocked by connect-src — model config and weight shards live
on huggingface.co (+ cdn-lfs.* for LFS), and the WebGPU model_lib WASM
comes from raw.githubusercontent.com (binary-mlc-llm-libs). Also wires
Gemma 2 2B/9B into the model registry so /llm-test picks them up.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 apps/mana/apps/web/src/hooks.server.ts |  8 ++++++++
 packages/local-llm/src/models.ts       | 14 ++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/apps/mana/apps/web/src/hooks.server.ts b/apps/mana/apps/web/src/hooks.server.ts
index ae1c86f50..d1f46cb49 100644
--- a/apps/mana/apps/web/src/hooks.server.ts
+++ b/apps/mana/apps/web/src/hooks.server.ts
@@ -108,6 +108,14 @@ window.__PUBLIC_GLITCHTIP_DSN__ = ${JSON.stringify(PUBLIC_GLITCHTIP_DSN)};
 			PUBLIC_MANA_LLM_URL_CLIENT,
 			PUBLIC_MANA_EVENTS_URL_CLIENT,
 			'wss://sync.mana.how',
+			// @mana/local-llm (WebLLM) downloads model weights + config from
+			// the mlc-ai HuggingFace repos and the WebGPU model library WASM
+			// from the binary-mlc-llm-libs GitHub raw host.
+			'https://huggingface.co',
+			'https://*.huggingface.co',
+			'https://cdn-lfs.huggingface.co',
+			'https://cdn-lfs-us-1.huggingface.co',
+			'https://raw.githubusercontent.com',
 			// Allow all localhost ports in development
 			...(isDev ? ['http://localhost:*', 'ws://localhost:*'] : []),
 		].filter(Boolean),
diff --git a/packages/local-llm/src/models.ts b/packages/local-llm/src/models.ts
index 812a1d8ec..200219a1a 100644
--- a/packages/local-llm/src/models.ts
+++ b/packages/local-llm/src/models.ts
@@ -20,6 +20,20 @@ export const MODELS = {
 		downloadSizeMb: 400,
 		ramUsageMb: 800,
 	},
+	/** Google Gemma 2 — strong general-purpose model, similar size class to Qwen 1.5B */
+	'gemma-2-2b': {
+		modelId: 'gemma-2-2b-it-q4f16_1-MLC',
+		displayName: 'Gemma 2 2B',
+		downloadSizeMb: 1400,
+		ramUsageMb: 2200,
+	},
+	/** Google Gemma 2 9B — much higher quality, needs a beefy GPU (~6GB VRAM) */
+	'gemma-2-9b': {
+		modelId: 'gemma-2-9b-it-q4f16_1-MLC',
+		displayName: 'Gemma 2 9B',
+		downloadSizeMb: 5300,
+		ramUsageMb: 6500,
+	},
 } as const satisfies Record<string, ModelConfig>;
 
 export type ModelKey = keyof typeof MODELS;