fix(mana/web): unwrap $state proxy in workbench-scenes Dexie writes

Adding an app to a workbench scene threw DataCloneError. scenesState is a $state array, so current.openApps was a Svelte 5 proxy and spreading it into a new array left proxy entries inside; IndexedDB's structured clone refuses to serialise those. Snapshot before handing the array to patchScene / createScene so Dexie sees plain objects. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-14 19:01:08 +02:00 · 2026-04-09 00:44:00 +02:00 · 2026-04-09 00:44:00 +02:00 · 56065c8537
commit 56065c8537
parent 761851847f
38 changed files with 1415 additions and 1665 deletions
--- a/apps/mana/apps/web/Dockerfile
+++ b/apps/mana/apps/web/Dockerfile
@ -15,6 +15,7 @@ COPY apps/calc/packages/shared ./apps/calc/packages/shared
 COPY apps/zitare/packages/content ./apps/zitare/packages/content
 COPY packages/shared-uload ./packages/shared-uload
 COPY packages/local-llm ./packages/local-llm
+COPY packages/shared-llm ./packages/shared-llm

 RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \
    pnpm install --no-frozen-lockfile --ignore-scripts
--- a/apps/mana/apps/web/package.json
+++ b/apps/mana/apps/web/package.json
@ -60,6 +60,7 @@
 		"@mana/shared-i18n": "workspace:*",
 		"@mana/shared-icons": "workspace:*",
 		"@mana/shared-links": "workspace:*",
+		"@mana/shared-llm": "workspace:*",
 		"@mana/shared-stores": "workspace:*",
 		"@mana/shared-tags": "workspace:*",
 		"@mana/shared-tailwind": "workspace:*",
--- a/apps/mana/apps/web/src/lib/llm-tasks/extract-date.ts
+++ b/apps/mana/apps/web/src/lib/llm-tasks/extract-date.ts
@ -0,0 +1,119 @@
+/**
+ * extractDateTask — pulls an ISO date out of a free-form German/English
+ * string. Used by Quick-Add features that want to recognize phrases like
+ * "morgen 14 Uhr" or "next Tuesday".
+ *
+ * Has a runRules() fallback so it works even on Tier 0 (no AI) — the
+ * fallback uses a hand-rolled regex set covering the most common
+ * shortcuts. It's intentionally narrow: it only catches the patterns it
+ * KNOWS, and returns null otherwise. This is the right semantic for
+ * Tier 0 — "I'm not certain enough to guess" is a valid answer when
+ * the user has explicitly opted out of LLM use.
+ *
+ * For production-grade NL date parsing without an LLM, replacing the
+ * regex stub with chrono-node would be a one-line change in runRules().
+ */
+
+import type { LlmBackend, LlmTask } from '@mana/shared-llm';
+
+export interface ExtractDateInput {
+	text: string;
+	/** Reference date for relative parsing ("morgen", "next week"). Defaults to now. */
+	now?: Date;
+}
+
+export type ExtractDateOutput = Date | null;
+
+export const extractDateTask: LlmTask<ExtractDateInput, ExtractDateOutput> = {
+	name: 'common.extractDate',
+	minTier: 'none', // works on Tier 0 thanks to the regex fallback
+	contentClass: 'personal',
+	displayLabel: 'Datum aus Text erkennen',
+
+	async runLlm(input, backend: LlmBackend): Promise<ExtractDateOutput> {
+		const refIso = (input.now ?? new Date()).toISOString();
+		const result = await backend.generate({
+			taskName: extractDateTask.name,
+			contentClass: extractDateTask.contentClass,
+			messages: [
+				{
+					role: 'system',
+					content:
+						'You extract date+time references from short user input. Always respond with strict JSON of the form {"iso":"YYYY-MM-DDTHH:MM:SSZ"} or {"iso":null}. No prose, no markdown.',
+				},
+				{
+					role: 'user',
+					content: `Reference time: ${refIso}\nUser input: ${input.text}`,
+				},
+			],
+			temperature: 0,
+			maxTokens: 80,
+		});
+
+		try {
+			// Strip markdown fences if a less-disciplined model added them
+			const cleaned = result.content.replace(/```(?:json)?|```/g, '').trim();
+			const parsed = JSON.parse(cleaned) as { iso: string | null };
+			return parsed.iso ? new Date(parsed.iso) : null;
+		} catch {
+			return null;
+		}
+	},
+
+	async runRules(input): Promise<ExtractDateOutput> {
+		const text = input.text.toLowerCase().trim();
+		const now = input.now ?? new Date();
+
+		// "heute" / "today"
+		if (/\b(heute|today)\b/.test(text)) {
+			return withTime(new Date(now), text);
+		}
+
+		// "morgen" / "tomorrow"
+		if (/\b(morgen|tomorrow)\b/.test(text)) {
+			const d = new Date(now);
+			d.setDate(d.getDate() + 1);
+			return withTime(d, text);
+		}
+
+		// "übermorgen" / "day after tomorrow"
+		if (/\b(übermorgen|day after tomorrow)\b/.test(text)) {
+			const d = new Date(now);
+			d.setDate(d.getDate() + 2);
+			return withTime(d, text);
+		}
+
+		// "in N tagen" / "in N days"
+		const inDays = text.match(/\bin (\d+) (tagen|days?)\b/);
+		if (inDays) {
+			const d = new Date(now);
+			d.setDate(d.getDate() + parseInt(inDays[1], 10));
+			return withTime(d, text);
+		}
+
+		// Explicit ISO date "2026-04-09" or "2026-04-09T14:00"
+		const iso = text.match(/(\d{4}-\d{2}-\d{2}(?:t\d{2}:\d{2}(?::\d{2})?)?)/);
+		if (iso) {
+			const d = new Date(iso[1]);
+			if (!Number.isNaN(d.getTime())) return d;
+		}
+
+		return null;
+	},
+};
+
+/** Apply a "HH:MM" or "HH Uhr" time hint to a date if found in the text. */
+function withTime(date: Date, text: string): Date {
+	const hhmm = text.match(/\b(\d{1,2}):(\d{2})\b/);
+	if (hhmm) {
+		date.setHours(parseInt(hhmm[1], 10), parseInt(hhmm[2], 10), 0, 0);
+		return date;
+	}
+	const hhUhr = text.match(/\b(\d{1,2})\s*uhr\b/);
+	if (hhUhr) {
+		date.setHours(parseInt(hhUhr[1], 10), 0, 0, 0);
+		return date;
+	}
+	// No time hint — keep the original time-of-day
+	return date;
+}
--- a/apps/mana/apps/web/src/lib/llm-tasks/summarize.ts
+++ b/apps/mana/apps/web/src/lib/llm-tasks/summarize.ts
@ -0,0 +1,57 @@
+/**
+ * summarizeTextTask — produces a short summary of a longer piece of
+ * text. Used for things like dream entries, voice memo transcripts,
+ * meeting notes.
+ *
+ * Has NO runRules() implementation: a meaningful summary genuinely
+ * requires an LLM, and a fake "first sentence + ellipsis" fallback
+ * would mislead the user. Tasks without a runRules forces the user
+ * to actually pick a higher tier in settings — and the orchestrator's
+ * canRun() will return false for them when they're on Tier 0.
+ *
+ * minTier is set to 'browser' rather than 'mana-server' because Gemma
+ * 4 E2B handles short summarization tasks well in the browser. For
+ * very long inputs (>4k tokens) the task could escalate to
+ * mana-server via a per-task override.
+ */
+
+import type { LlmBackend, LlmTask } from '@mana/shared-llm';
+
+export interface SummarizeInput {
+	text: string;
+	/** Approximate target length in sentences. Default 3. */
+	sentences?: number;
+}
+
+export type SummarizeOutput = string;
+
+export const summarizeTextTask: LlmTask<SummarizeInput, SummarizeOutput> = {
+	name: 'common.summarize',
+	minTier: 'browser', // genuinely needs an LLM — no rules-based equivalent
+	contentClass: 'personal',
+	displayLabel: 'Text zusammenfassen',
+
+	async runLlm(input, backend: LlmBackend): Promise<SummarizeOutput> {
+		const sentences = input.sentences ?? 3;
+		const result = await backend.generate({
+			taskName: summarizeTextTask.name,
+			contentClass: summarizeTextTask.contentClass,
+			messages: [
+				{
+					role: 'system',
+					content: `Du fasst Text in ${sentences} prägnanten Sätzen zusammen. Behalte die wichtigsten Fakten und Beschlüsse, lasse Füller weg. Kein Markdown, keine Aufzählungen, keine Vorrede — nur die Zusammenfassung.`,
+				},
+				{ role: 'user', content: input.text },
+			],
+			temperature: 0.3,
+			maxTokens: 500,
+		});
+
+		return result.content.trim();
+	},
+
+	// No runRules — this task is impossible without an LLM. The
+	// orchestrator's canRun() will return false for users on Tier 0,
+	// and modules using this task should hide their summarize button
+	// when canRun() is false.
+};
--- a/apps/mana/apps/web/src/lib/stores/workbench-scenes.svelte.ts
+++ b/apps/mana/apps/web/src/lib/stores/workbench-scenes.svelte.ts
@ -99,10 +99,9 @@ async function patchScene(
 	id: string,
 	patch: Partial<Pick<LocalWorkbenchScene, 'name' | 'icon' | 'openApps' | 'order'>>
 ) {
-	await db.table<LocalWorkbenchScene>(TABLE).update(id, {
-		...patch,
-		updatedAt: nowIso(),
-	});
+	// Strip Svelte 5 $state proxies — IndexedDB's structured clone can't serialize them.
+	const clean = $state.snapshot({ ...patch, updatedAt: nowIso() });
+	await db.table<LocalWorkbenchScene>(TABLE).update(id, clean);
 }

 async function patchActiveScene(fn: (apps: WorkbenchSceneApp[]) => WorkbenchSceneApp[]) {
@ -110,7 +109,9 @@ async function patchActiveScene(fn: (apps: WorkbenchSceneApp[]) => WorkbenchScen
 	if (!id) return;
 	const current = scenesState.find((s) => s.id === id);
 	if (!current) return;
-	await patchScene(id, { openApps: fn(current.openApps) });
+	// Snapshot before handing to the mutator so callers operate on plain objects.
+	const plainApps = $state.snapshot(current.openApps) as WorkbenchSceneApp[];
+	await patchScene(id, { openApps: fn(plainApps) });
 }

 // ─── Public store ─────────────────────────────────────────────
@ -191,7 +192,7 @@ export const workbenchScenesStore = {
 			id,
 			name: opts.name.trim() || 'Neue Szene',
 			icon: opts.icon,
-			openApps: opts.seedApps ? structuredClone(opts.seedApps) : [],
+			openApps: opts.seedApps ? ($state.snapshot(opts.seedApps) as WorkbenchSceneApp[]) : [],
 			order: maxOrder + 1,
 			createdAt: now,
 			updatedAt: now,
--- a/apps/mana/apps/web/src/routes/(app)/llm-test/+page.svelte
+++ b/apps/mana/apps/web/src/routes/(app)/llm-test/+page.svelte
@ -11,6 +11,16 @@
 		type ModelKey,
 	} from '@mana/local-llm';
 	import { hasModelInCache } from '@mana/local-llm';
+	import {
+		llmOrchestrator,
+		llmSettingsState,
+		updateLlmSettings,
+		ALL_TIERS,
+		tierLabel,
+		type LlmTier,
+	} from '@mana/shared-llm';
+	import { extractDateTask } from '$lib/llm-tasks/extract-date';
+	import { summarizeTextTask } from '$lib/llm-tasks/summarize';
 	import { marked } from 'marked';
 	import { Robot, Trash, PaperPlaneRight, ClockCounterClockwise } from '@mana/shared-icons';

@ -43,7 +53,47 @@

 	// --- State ---
 	let selectedModel: ModelKey = $state('gemma-4-e2b');
-	let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' = $state('chat');
+	let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' | 'router' =
+		$state('chat');
+
+	// --- Router tab state ---
+	const settings = $derived(llmSettingsState.current);
+	let routerInput = $state('Treffen mit Sara morgen 14:30');
+	let routerRunning = $state(false);
+	let routerResult = $state<{
+		value: unknown;
+		source: string;
+		latencyMs: number;
+		attempted: string[];
+	} | null>(null);
+	let routerError = $state<string | null>(null);
+
+	function toggleAllowedTier(tier: LlmTier) {
+		const current = settings.allowedTiers;
+		const next = current.includes(tier) ? current.filter((t) => t !== tier) : [...current, tier];
+		updateLlmSettings({ allowedTiers: next });
+	}
+
+	async function runRouterTask(task: typeof extractDateTask | typeof summarizeTextTask) {
+		routerRunning = true;
+		routerResult = null;
+		routerError = null;
+		try {
+			const input = task === extractDateTask ? { text: routerInput } : { text: routerInput };
+			// eslint-disable-next-line @typescript-eslint/no-explicit-any
+			const result = await llmOrchestrator.run(task as any, input);
+			routerResult = {
+				value: result.value,
+				source: result.source,
+				latencyMs: result.latencyMs,
+				attempted: result.attempted,
+			};
+		} catch (err) {
+			routerError = err instanceof Error ? `${err.name}: ${err.message}` : String(err);
+		} finally {
+			routerRunning = false;
+		}
+	}
 	const supported = isLocalLlmSupported();
 	const status = getLocalLlmStatus();

@ -581,7 +631,7 @@

 		<!-- Tabs -->
 		<div class="mb-4 flex gap-1 rounded-lg border border-border bg-card p-1">
-			{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }] as tab}
+			{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }, { id: 'router', label: 'Router' }] as tab}
 				<button
 					onclick={() => (activeTab = tab.id as typeof activeTab)}
 					class="flex-1 rounded-md px-3 py-1.5 text-sm font-medium transition-colors {activeTab ===
@ -1134,5 +1184,100 @@
 				{/if}
 			</div>
 		{/if}
+
+		<!-- Router Tab — exercises the @mana/shared-llm tiered orchestrator -->
+		{#if activeTab === 'router'}
+			<div class="flex flex-col gap-4">
+				<div class="rounded-xl border border-border bg-card p-4">
+					<p class="mb-3 text-sm text-muted-foreground">
+						Smoke-Test für den tiered LLM-Router. Wähle welche Tiers der Orchestrator benutzen darf
+						— der Router wählt dann pro Task die erste passende Schicht aus deiner Liste.
+					</p>
+
+					<div class="mb-4">
+						<div class="mb-2 text-xs font-medium text-muted-foreground">Erlaubte Tiers</div>
+						<div class="flex flex-wrap gap-2">
+							{#each ALL_TIERS as tier}
+								{@const enabled = settings.allowedTiers.includes(tier)}
+								<button
+									onclick={() => toggleAllowedTier(tier)}
+									class="rounded-lg border px-3 py-1.5 text-xs font-medium transition-colors {enabled
+										? 'border-primary bg-primary/20 text-primary'
+										: 'border-border bg-background text-muted-foreground hover:text-foreground'}"
+								>
+									{tierLabel(tier)}
+								</button>
+							{/each}
+						</div>
+						<div class="mt-2 text-xs text-muted-foreground">
+							Aktuell: {settings.allowedTiers.length === 0
+								? 'keine LLM-Tiers — nur Tier 0 (Regeln)'
+								: settings.allowedTiers.map(tierLabel).join(' → ')}
+						</div>
+					</div>
+
+					<input
+						type="text"
+						bind:value={routerInput}
+						placeholder="Eingabetext für den Task..."
+						class="mb-3 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
+					/>
+
+					<div class="flex flex-wrap gap-2">
+						<button
+							onclick={() => runRouterTask(extractDateTask)}
+							disabled={routerRunning || !routerInput.trim()}
+							class="rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
+						>
+							extractDate (hat T0-Fallback)
+						</button>
+						<button
+							onclick={() => runRouterTask(summarizeTextTask)}
+							disabled={routerRunning || !routerInput.trim()}
+							class="rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
+						>
+							summarize (kein T0)
+						</button>
+					</div>
+
+					<div class="mt-2 text-xs text-muted-foreground">
+						extractDate.canRun: {llmOrchestrator.canRun(extractDateTask)} · summarize.canRun: {llmOrchestrator.canRun(
+							summarizeTextTask
+						)}
+					</div>
+				</div>
+
+				{#if routerError}
+					<div class="rounded-xl border border-red-500/30 bg-red-500/10 p-4">
+						<div class="text-sm font-medium text-red-400">Task fehlgeschlagen</div>
+						<div class="mt-1 font-mono text-xs text-red-300">{routerError}</div>
+					</div>
+				{/if}
+
+				{#if routerResult}
+					<div class="rounded-xl border border-border bg-card p-4">
+						<div class="mb-2 flex items-center gap-2">
+							<span
+								class="rounded-full border border-primary/40 bg-primary/10 px-2 py-0.5 text-xs font-medium text-primary"
+							>
+								{tierLabel(routerResult.source as LlmTier)}
+							</span>
+							<span class="text-xs text-muted-foreground">{routerResult.latencyMs} ms</span>
+							{#if routerResult.attempted.length > 1}
+								<span class="text-xs text-muted-foreground"
+									>(versucht: {routerResult.attempted.join(' → ')})</span
+								>
+							{/if}
+						</div>
+						<pre
+							class="overflow-x-auto rounded-lg bg-background p-3 font-mono text-xs text-foreground">{JSON.stringify(
+								routerResult.value,
+								null,
+								2
+							)}</pre>
+					</div>
+				{/if}
+			</div>
+		{/if}
 	{/if}
 </div>
--- a/docker/Dockerfile.sveltekit-base
+++ b/docker/Dockerfile.sveltekit-base
@ -67,6 +67,7 @@ COPY packages/credits ./packages/credits
 COPY packages/spiral-db ./packages/spiral-db
 COPY packages/wallpaper-generator ./packages/wallpaper-generator
 COPY packages/local-llm ./packages/local-llm
+COPY packages/shared-llm ./packages/shared-llm

 # Install dependencies (shared packages only - app deps added later)
 RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \
--- a/packages/shared-llm/package.json
+++ b/packages/shared-llm/package.json
@ -1,47 +1,26 @@
 {
 	"name": "@mana/shared-llm",
-	"version": "1.0.0",
+	"version": "2.0.0",
 	"private": true,
-	"description": "Unified LLM client for all Mana backends via mana-llm service",
-	"main": "dist/index.js",
-	"types": "dist/index.d.ts",
+	"description": "Tiered LLM orchestrator for Mana — routes tasks across rules / browser-edge / mana-server / cloud backends with explicit user-controlled privacy tiers",
+	"main": "./src/index.ts",
+	"types": "./src/index.ts",
 	"exports": {
-		".": {
-			"types": "./dist/index.d.ts",
-			"import": "./dist/index.js",
-			"require": "./dist/index.js"
-		},
-		"./standalone": {
-			"types": "./dist/standalone.d.ts",
-			"import": "./dist/standalone.js",
-			"require": "./dist/standalone.js"
-		}
+		".": "./src/index.ts"
 	},
 	"scripts": {
-		"build": "tsc",
-		"dev": "tsc --watch",
-		"clean": "rm -rf dist",
 		"type-check": "tsc --noEmit",
-		"test": "vitest run"
+		"clean": "rm -rf dist"
 	},
 	"dependencies": {
-		"@nestjs/common": "^10.0.0 || ^11.0.0",
-		"@nestjs/config": "^3.0.0 || ^4.0.0",
-		"@nestjs/core": "^10.0.0 || ^11.0.0",
-		"reflect-metadata": "^0.1.13 || ^0.2.0",
-		"rxjs": "^7.0.0"
-	},
-	"peerDependencies": {
-		"@nestjs/common": "^10.0.0 || ^11.0.0",
-		"@nestjs/config": "^3.0.0 || ^4.0.0",
-		"@nestjs/core": "^10.0.0 || ^11.0.0"
+		"@mana/local-llm": "workspace:*"
 	},
 	"devDependencies": {
-		"@types/node": "^20.0.0",
-		"typescript": "^5.0.0",
-		"vitest": "^4.1.2"
+		"@types/node": "^24.10.1",
+		"svelte": "^5.0.0",
+		"typescript": "^5.9.3"
 	},
-	"files": [
-		"dist"
-	]
+	"peerDependencies": {
+		"svelte": "^5.0.0"
+	}
 }
--- a/packages/shared-llm/src/tests/json-extractor.spec.ts
+++ b/packages/shared-llm/src/tests/json-extractor.spec.ts
@ -1,119 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { extractJson } from '../utils/json-extractor';
-
-describe('extractJson', () => {
-	it('parses direct JSON object', () => {
-		const result = extractJson('{"name": "test", "value": 42}');
-		expect(result).toEqual({ name: 'test', value: 42 });
-	});
-
-	it('parses direct JSON array', () => {
-		const result = extractJson('[1, 2, 3]');
-		expect(result).toEqual([1, 2, 3]);
-	});
-
-	it('strips markdown json code fence', () => {
-		const input = '```json\n{"category": "bug", "title": "Fix login"}\n```';
-		const result = extractJson(input);
-		expect(result).toEqual({ category: 'bug', title: 'Fix login' });
-	});
-
-	it('strips markdown code fence without json label', () => {
-		const input = '```\n{"key": "value"}\n```';
-		const result = extractJson(input);
-		expect(result).toEqual({ key: 'value' });
-	});
-
-	it('extracts JSON from surrounding text', () => {
-		const input =
-			'Here is the analysis:\n{"confidence": 0.95, "species": "Rose"}\nHope this helps!';
-		const result = extractJson(input);
-		expect(result).toEqual({ confidence: 0.95, species: 'Rose' });
-	});
-
-	it('extracts JSON array from surrounding text', () => {
-		const input = 'The items are: [1, 2, 3] as requested.';
-		const result = extractJson(input);
-		expect(result).toEqual([1, 2, 3]);
-	});
-
-	it('handles nested JSON objects', () => {
-		const input = '{"outer": {"inner": {"deep": true}}, "list": [1, 2]}';
-		const result = extractJson(input);
-		expect(result).toEqual({ outer: { inner: { deep: true } }, list: [1, 2] });
-	});
-
-	it('handles JSON with escaped quotes in strings', () => {
-		const input = '{"text": "He said \\"hello\\""}';
-		const result = extractJson(input);
-		expect(result).toEqual({ text: 'He said "hello"' });
-	});
-
-	it('handles JSON with braces inside strings', () => {
-		const input = 'Result: {"code": "if (x) { return }"}';
-		const result = extractJson(input);
-		expect(result).toEqual({ code: 'if (x) { return }' });
-	});
-
-	it('trims whitespace before parsing', () => {
-		const input = '  \n  {"key": "value"}  \n  ';
-		const result = extractJson(input);
-		expect(result).toEqual({ key: 'value' });
-	});
-
-	it('applies validation function on success', () => {
-		const validate = (data: unknown) => {
-			const obj = data as { name: string };
-			if (!obj.name) throw new Error('missing name');
-			return obj;
-		};
-		const result = extractJson('{"name": "test"}', validate);
-		expect(result).toEqual({ name: 'test' });
-	});
-
-	it('throws when validation fails', () => {
-		const validate = (data: unknown) => {
-			const obj = data as { name?: string };
-			if (!obj.name) throw new Error('missing name');
-			return obj;
-		};
-		expect(() => extractJson('{"value": 123}', validate)).toThrow();
-	});
-
-	it('throws on completely invalid input', () => {
-		expect(() => extractJson('This is just plain text with no JSON')).toThrow(
-			'Failed to extract JSON'
-		);
-	});
-
-	it('throws on empty input', () => {
-		expect(() => extractJson('')).toThrow('Failed to extract JSON');
-	});
-
-	it('handles real-world LLM response with preamble', () => {
-		const input = `Based on my analysis, here is the result:
-
-\`\`\`json
-{
-  "foods": [
-    {"name": "Apple", "calories": 95, "protein": 0.5}
-  ],
-  "totalCalories": 95,
-  "confidence": 0.9
-}
-\`\`\`
-
-This analysis is based on the image provided.`;
-
-		const result = extractJson<{ foods: unknown[]; totalCalories: number }>(input);
-		expect(result.totalCalories).toBe(95);
-		expect(result.foods).toHaveLength(1);
-	});
-
-	it('prefers object over array when both exist', () => {
-		// Direct parse fails, fence fails, tries object first
-		const input = 'Some text {"key": "val"} and [1, 2, 3]';
-		const result = extractJson(input);
-		expect(result).toEqual({ key: 'val' });
-	});
-});
--- a/packages/shared-llm/src/tests/llm-client.spec.ts
+++ b/packages/shared-llm/src/tests/llm-client.spec.ts
@ -1,277 +0,0 @@
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { LlmClient } from '../llm-client';
-import type { ResolvedLlmOptions } from '../interfaces/llm-options.interface';
-import type { ChatCompletionResponse } from '../types/openai-compat.types';
-
-const mockFetch = vi.fn();
-vi.stubGlobal('fetch', mockFetch);
-
-const DEFAULT_OPTIONS: ResolvedLlmOptions = {
-	manaLlmUrl: 'http://localhost:3025',
-	defaultModel: 'ollama/gemma3:4b',
-	defaultVisionModel: 'ollama/llava:7b',
-	timeout: 30_000,
-	maxRetries: 0, // No retries in tests for simplicity
-	debug: false,
-};
-
-function mockCompletionResponse(
-	content: string,
-	model = 'ollama/gemma3:4b'
-): ChatCompletionResponse {
-	return {
-		id: 'chatcmpl-test123',
-		object: 'chat.completion',
-		created: Date.now(),
-		model,
-		choices: [{ index: 0, message: { role: 'assistant', content }, finish_reason: 'stop' }],
-		usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
-	};
-}
-
-function mockFetchOk(body: unknown): void {
-	mockFetch.mockResolvedValueOnce({
-		ok: true,
-		status: 200,
-		json: () => Promise.resolve(body),
-		text: () => Promise.resolve(JSON.stringify(body)),
-	} as unknown as Response);
-}
-
-function mockFetchError(status: number, body = ''): void {
-	mockFetch.mockResolvedValueOnce({
-		ok: false,
-		status,
-		statusText: `Error ${status}`,
-		json: () => Promise.resolve({}),
-		text: () => Promise.resolve(body),
-	} as unknown as Response);
-}
-
-describe('LlmClient', () => {
-	let client: LlmClient;
-
-	beforeEach(() => {
-		vi.clearAllMocks();
-		client = new LlmClient(DEFAULT_OPTIONS);
-	});
-
-	describe('chat', () => {
-		it('sends correct request body', async () => {
-			mockFetchOk(mockCompletionResponse('Hello!'));
-
-			await client.chat('Hi there');
-
-			expect(mockFetch).toHaveBeenCalledTimes(1);
-			const [url, init] = mockFetch.mock.calls[0];
-			expect(url).toBe('http://localhost:3025/v1/chat/completions');
-
-			const body = JSON.parse(init.body);
-			expect(body.model).toBe('ollama/gemma3:4b');
-			expect(body.messages).toEqual([{ role: 'user', content: 'Hi there' }]);
-			expect(body.stream).toBe(false);
-		});
-
-		it('includes system prompt when provided', async () => {
-			mockFetchOk(mockCompletionResponse('Response'));
-
-			await client.chat('Question', { systemPrompt: 'You are helpful.' });
-
-			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
-			expect(body.messages).toEqual([
-				{ role: 'system', content: 'You are helpful.' },
-				{ role: 'user', content: 'Question' },
-			]);
-		});
-
-		it('uses custom model and temperature', async () => {
-			mockFetchOk(mockCompletionResponse('Response'));
-
-			await client.chat('Prompt', { model: 'openrouter/gpt-4o', temperature: 0.3 });
-
-			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
-			expect(body.model).toBe('openrouter/gpt-4o');
-			expect(body.temperature).toBe(0.3);
-		});
-
-		it('returns ChatResult with content and usage', async () => {
-			mockFetchOk(mockCompletionResponse('Generated text'));
-
-			const result = await client.chat('Prompt');
-
-			expect(result.content).toBe('Generated text');
-			expect(result.model).toBe('ollama/gemma3:4b');
-			expect(result.usage.total_tokens).toBe(30);
-			expect(result.latencyMs).toBeGreaterThanOrEqual(0);
-		});
-
-		it('throws on error response', async () => {
-			mockFetchError(500, 'Internal Server Error');
-
-			await expect(client.chat('Prompt')).rejects.toThrow('mana-llm error 500');
-		});
-	});
-
-	describe('json', () => {
-		it('extracts JSON from response', async () => {
-			mockFetchOk(mockCompletionResponse('{"category": "bug", "title": "Fix it"}'));
-
-			const result = await client.json<{ category: string; title: string }>('Analyze this');
-
-			expect(result.data).toEqual({ category: 'bug', title: 'Fix it' });
-			expect(result.content).toBe('{"category": "bug", "title": "Fix it"}');
-		});
-
-		it('extracts JSON from markdown-wrapped response', async () => {
-			mockFetchOk(mockCompletionResponse('```json\n{"key": "value"}\n```'));
-
-			const result = await client.json('Parse this');
-			expect(result.data).toEqual({ key: 'value' });
-		});
-
-		it('applies validation function', async () => {
-			mockFetchOk(mockCompletionResponse('{"name": "test"}'));
-
-			const validate = (data: unknown) => {
-				const obj = data as { name: string };
-				if (typeof obj.name !== 'string') throw new Error('invalid');
-				return obj;
-			};
-
-			const result = await client.json('Prompt', { validate });
-			expect(result.data.name).toBe('test');
-		});
-
-		it('retries JSON extraction on parse failure', async () => {
-			// First attempt returns bad JSON, second returns good
-			mockFetchOk(mockCompletionResponse('not json at all'));
-			mockFetchOk(mockCompletionResponse('{"valid": true}'));
-
-			const result = await client.json('Prompt', { jsonRetries: 1 });
-			expect(result.data).toEqual({ valid: true });
-			expect(mockFetch).toHaveBeenCalledTimes(2);
-		});
-	});
-
-	describe('vision', () => {
-		it('builds multimodal message with base64 image', async () => {
-			mockFetchOk(mockCompletionResponse('A rose'));
-
-			await client.vision('What is this?', 'abc123base64', 'image/jpeg');
-
-			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
-			expect(body.model).toBe('ollama/llava:7b');
-			expect(body.messages[0].content).toEqual([
-				{ type: 'text', text: 'What is this?' },
-				{ type: 'image_url', image_url: { url: 'data:image/jpeg;base64,abc123base64' } },
-			]);
-		});
-
-		it('uses data URL as-is if already formatted', async () => {
-			mockFetchOk(mockCompletionResponse('A cat'));
-
-			await client.vision('What?', 'data:image/png;base64,xyz');
-
-			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
-			const imageUrl = body.messages[0].content[1].image_url.url;
-			expect(imageUrl).toBe('data:image/png;base64,xyz');
-		});
-
-		it('uses custom vision model when specified', async () => {
-			mockFetchOk(mockCompletionResponse('Result'));
-
-			await client.vision('Prompt', 'img', 'image/jpeg', {
-				visionModel: 'ollama/qwen3-vl:4b',
-			});
-
-			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
-			expect(body.model).toBe('ollama/qwen3-vl:4b');
-		});
-	});
-
-	describe('visionJson', () => {
-		it('extracts JSON from vision response', async () => {
-			mockFetchOk(mockCompletionResponse('```json\n{"species": "Rose", "confidence": 0.95}\n```'));
-
-			const result = await client.visionJson<{ species: string }>(
-				'Identify plant',
-				'imgdata',
-				'image/jpeg'
-			);
-
-			expect(result.data.species).toBe('Rose');
-		});
-	});
-
-	describe('health', () => {
-		it('returns health status', async () => {
-			mockFetch.mockResolvedValueOnce({
-				ok: true,
-				status: 200,
-				json: () =>
-					Promise.resolve({
-						status: 'healthy',
-						providers: { ollama: { status: 'healthy' } },
-					}),
-			} as unknown as Response);
-
-			const health = await client.health();
-			expect(health.status).toBe('healthy');
-		});
-
-		it('returns unhealthy on network error', async () => {
-			mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
-
-			const health = await client.health();
-			expect(health.status).toBe('unhealthy');
-		});
-	});
-
-	describe('listModels', () => {
-		it('returns model list', async () => {
-			mockFetch.mockResolvedValueOnce({
-				ok: true,
-				status: 200,
-				json: () =>
-					Promise.resolve({
-						data: [{ id: 'ollama/gemma3:4b', object: 'model', created: 0, owned_by: 'ollama' }],
-					}),
-			} as unknown as Response);
-
-			const models = await client.listModels();
-			expect(models).toHaveLength(1);
-			expect(models[0].id).toBe('ollama/gemma3:4b');
-		});
-	});
-
-	describe('chatMessages', () => {
-		it('sends full message history', async () => {
-			mockFetchOk(mockCompletionResponse('Answer'));
-
-			await client.chatMessages([
-				{ role: 'system', content: 'Be brief.' },
-				{ role: 'user', content: 'Hello' },
-				{ role: 'assistant', content: 'Hi!' },
-				{ role: 'user', content: 'How are you?' },
-			]);
-
-			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
-			expect(body.messages).toHaveLength(4);
-		});
-	});
-
-	describe('embed', () => {
-		it('sends embedding request', async () => {
-			mockFetchOk({
-				object: 'list',
-				data: [{ object: 'embedding', index: 0, embedding: [0.1, 0.2, 0.3] }],
-				model: 'ollama/gemma3:4b',
-				usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 },
-			});
-
-			const result = await client.embed('Hello world');
-			expect(result.embeddings).toHaveLength(1);
-			expect(result.embeddings[0]).toEqual([0.1, 0.2, 0.3]);
-		});
-	});
-});
--- a/packages/shared-llm/src/tests/retry.spec.ts
+++ b/packages/shared-llm/src/tests/retry.spec.ts
@ -1,118 +0,0 @@
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { retryFetch } from '../utils/retry';
-
-// Mock global fetch
-const mockFetch = vi.fn();
-vi.stubGlobal('fetch', mockFetch);
-
-function mockResponse(status: number, body = ''): Response {
-	return {
-		ok: status >= 200 && status < 300,
-		status,
-		statusText: `Status ${status}`,
-		text: () => Promise.resolve(body),
-		json: () => Promise.resolve(JSON.parse(body || '{}')),
-		headers: new Headers(),
-	} as unknown as Response;
-}
-
-describe('retryFetch', () => {
-	beforeEach(() => {
-		vi.clearAllMocks();
-	});
-
-	it('returns on first successful attempt', async () => {
-		mockFetch.mockResolvedValueOnce(mockResponse(200, '{"ok": true}'));
-
-		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
-		expect(response.ok).toBe(true);
-		expect(mockFetch).toHaveBeenCalledTimes(1);
-	});
-
-	it('retries on 503 and succeeds', async () => {
-		mockFetch
-			.mockResolvedValueOnce(mockResponse(503))
-			.mockResolvedValueOnce(mockResponse(200, '{}'));
-
-		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
-		expect(response.ok).toBe(true);
-		expect(mockFetch).toHaveBeenCalledTimes(2);
-	});
-
-	it('retries on 429 rate limit', async () => {
-		mockFetch
-			.mockResolvedValueOnce(mockResponse(429))
-			.mockResolvedValueOnce(mockResponse(200, '{}'));
-
-		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
-		expect(response.ok).toBe(true);
-		expect(mockFetch).toHaveBeenCalledTimes(2);
-	});
-
-	it('retries on network error and succeeds', async () => {
-		mockFetch
-			.mockRejectedValueOnce(new Error('ECONNREFUSED'))
-			.mockResolvedValueOnce(mockResponse(200, '{}'));
-
-		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
-		expect(response.ok).toBe(true);
-		expect(mockFetch).toHaveBeenCalledTimes(2);
-	});
-
-	it('does NOT retry on 400 client error', async () => {
-		mockFetch.mockResolvedValueOnce(mockResponse(400, 'Bad Request'));
-
-		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
-		expect(response.status).toBe(400);
-		expect(mockFetch).toHaveBeenCalledTimes(1);
-	});
-
-	it('does NOT retry on 401 unauthorized', async () => {
-		mockFetch.mockResolvedValueOnce(mockResponse(401));
-
-		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
-		expect(response.status).toBe(401);
-		expect(mockFetch).toHaveBeenCalledTimes(1);
-	});
-
-	it('does NOT retry on 404 not found', async () => {
-		mockFetch.mockResolvedValueOnce(mockResponse(404));
-
-		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
-		expect(response.status).toBe(404);
-		expect(mockFetch).toHaveBeenCalledTimes(1);
-	});
-
-	it('throws after exhausting all retries', async () => {
-		mockFetch
-			.mockResolvedValueOnce(mockResponse(503))
-			.mockResolvedValueOnce(mockResponse(503))
-			.mockResolvedValueOnce(mockResponse(503));
-
-		await expect(retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 })).rejects.toThrow(
-			'HTTP 503'
-		);
-
-		expect(mockFetch).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
-	});
-
-	it('throws after exhausting retries on network errors', async () => {
-		mockFetch
-			.mockRejectedValueOnce(new Error('ECONNREFUSED'))
-			.mockRejectedValueOnce(new Error('ECONNREFUSED'));
-
-		await expect(retryFetch('http://test', {}, { maxRetries: 1, baseDelay: 10 })).rejects.toThrow(
-			'ECONNREFUSED'
-		);
-
-		expect(mockFetch).toHaveBeenCalledTimes(2);
-	});
-
-	it('works with maxRetries: 0 (no retries)', async () => {
-		mockFetch.mockResolvedValueOnce(mockResponse(503));
-
-		await expect(retryFetch('http://test', {}, { maxRetries: 0, baseDelay: 10 })).rejects.toThrow();
-
-		expect(mockFetch).toHaveBeenCalledTimes(1);
-	});
-});
--- a/packages/shared-llm/src/backends/browser.ts
+++ b/packages/shared-llm/src/backends/browser.ts
@ -0,0 +1,62 @@
+/**
+ * Browser-edge backend — wraps @mana/local-llm.
+ *
+ * Inference happens 100% on the user's device via WebGPU. The model
+ * (currently Gemma 4 E2B) is a one-time ~500 MB download cached in the
+ * browser. We do NOT auto-load on backend creation; the user has to
+ * explicitly trigger a load via the settings page or by using a feature
+ * that calls `ensureLoaded()`. This avoids surprising 500 MB downloads.
+ */
+
+import {
+	localLLM,
+	LocalLLMEngine,
+	loadLocalLlm,
+	type ChatMessage as LocalChatMessage,
+} from '@mana/local-llm';
+import { EdgeLoadFailedError } from '../errors';
+import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
+
+export class BrowserBackend implements LlmBackend {
+	readonly tier = 'browser' as const;
+
+	isAvailable(): boolean {
+		return LocalLLMEngine.isSupported();
+	}
+
+	isReady(): boolean {
+		return localLLM.isReady;
+	}
+
+	/** Trigger the one-time model download + WebGPU initialization.
+	 *  Idempotent — safe to call repeatedly. Throws EdgeLoadFailedError
+	 *  on failure (model corrupt, WebGPU OOM, etc.). */
+	async ensureLoaded(): Promise<void> {
+		try {
+			await loadLocalLlm();
+		} catch (err) {
+			throw new EdgeLoadFailedError(err instanceof Error ? err.message : String(err));
+		}
+	}
+
+	async generate(req: LlmTaskRequest): Promise<GenerateResult> {
+		await this.ensureLoaded();
+
+		const result = await localLLM.generate({
+			messages: req.messages as LocalChatMessage[],
+			temperature: req.temperature,
+			maxTokens: req.maxTokens,
+			onToken: req.onToken,
+		});
+
+		return {
+			content: result.content,
+			usage: {
+				promptTokens: result.usage.prompt_tokens,
+				completionTokens: result.usage.completion_tokens,
+				totalTokens: result.usage.total_tokens,
+			},
+			latencyMs: result.latencyMs,
+		};
+	}
+}
--- a/packages/shared-llm/src/backends/cloud.ts
+++ b/packages/shared-llm/src/backends/cloud.ts
@ -0,0 +1,44 @@
+/**
+ * Cloud backend — calls services/mana-llm with a `google/...` model
+ * string. mana-llm's ProviderRouter recognizes the `google/` prefix
+ * and routes to its Google Gemini provider, which holds the API key
+ * server-side (we never expose the key to the browser).
+ *
+ * Default model is google/gemini-2.0-flash. The mana-llm google.py
+ * provider also supports gemini-2.5-pro for higher-quality calls but
+ * 2.0-flash is the right default — fast, cheap, multimodal, plenty
+ * good for the kind of structured-output tasks Mana modules need.
+ *
+ * Cloud is gated by `cloudConsentGiven` in LlmSettings — even if a
+ * user has 'cloud' in their allowedTiers, the orchestrator will skip
+ * this backend until they've ticked the consent checkbox once.
+ */
+
+import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
+import { callManaLlmStreaming, resolveLlmBaseUrl } from './remote';
+
+export interface CloudBackendOptions {
+	/** Gemini model to send. Default 'google/gemini-2.0-flash'. */
+	defaultModel?: string;
+}
+
+export class CloudBackend implements LlmBackend {
+	readonly tier = 'cloud' as const;
+	private readonly defaultModel: string;
+
+	constructor(opts: CloudBackendOptions = {}) {
+		this.defaultModel = opts.defaultModel ?? 'google/gemini-2.0-flash';
+	}
+
+	isAvailable(): boolean {
+		return resolveLlmBaseUrl().length > 0;
+	}
+
+	isReady(): boolean {
+		return this.isAvailable();
+	}
+
+	async generate(req: LlmTaskRequest): Promise<GenerateResult> {
+		return callManaLlmStreaming(this.tier, this.defaultModel, req);
+	}
+}
--- a/packages/shared-llm/src/backends/mana-server.ts
+++ b/packages/shared-llm/src/backends/mana-server.ts
@ -0,0 +1,43 @@
+/**
+ * Mana-server backend — calls services/mana-llm with an Ollama model
+ * string. mana-llm's ProviderRouter recognizes plain Ollama model names
+ * (no provider prefix) and routes them to the local Ollama instance on
+ * the Mac Mini, with automatic Gemini fallback if Ollama is overloaded.
+ *
+ * The default model is gemma3:4b — same model family as the browser
+ * tier (Gemma 4 E2B is the smaller sibling), so prompts behave
+ * consistently when a task auto-falls between tiers.
+ */
+
+import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
+import { callManaLlmStreaming, resolveLlmBaseUrl } from './remote';
+
+export interface ManaServerBackendOptions {
+	/** Ollama model name to send to mana-llm. Default 'gemma3:4b'. */
+	defaultModel?: string;
+}
+
+export class ManaServerBackend implements LlmBackend {
+	readonly tier = 'mana-server' as const;
+	private readonly defaultModel: string;
+
+	constructor(opts: ManaServerBackendOptions = {}) {
+		this.defaultModel = opts.defaultModel ?? 'gemma3:4b';
+	}
+
+	isAvailable(): boolean {
+		// Available if we have a base URL configured at all. We don't
+		// ping /health here — that adds latency to every isAvailable()
+		// check. The first real call will fail loudly if mana-llm is down.
+		return resolveLlmBaseUrl().length > 0;
+	}
+
+	isReady(): boolean {
+		// Stateless from our side — assume ready if available.
+		return this.isAvailable();
+	}
+
+	async generate(req: LlmTaskRequest): Promise<GenerateResult> {
+		return callManaLlmStreaming(this.tier, this.defaultModel, req);
+	}
+}
--- a/packages/shared-llm/src/backends/remote.ts
+++ b/packages/shared-llm/src/backends/remote.ts
@ -0,0 +1,135 @@
+/**
+ * Shared HTTP transport for the mana-server and cloud backends.
+ *
+ * Both tiers POST to the same OpenAI-compatible endpoint on
+ * services/mana-llm — they only differ in the `model:` string they
+ * send (which selects which provider mana-llm internally routes to).
+ *
+ * The endpoint is `/v1/chat/completions` and the wire format is
+ * straight OpenAI SSE: `data: {…}\n\n` lines, terminated by
+ * `data: [DONE]`. The hand-rolled parser is the same shape as the
+ * existing playground client (apps/mana/apps/web/src/lib/modules/
+ * playground/llm.ts) so the two consumers stay aligned and can be
+ * unified later if we want.
+ */
+
+import { BackendUnreachableError, ProviderBlockedError } from '../errors';
+import type { LlmTier } from '../tiers';
+import type { GenerateResult, LlmTaskRequest } from '../types';
+
+const DEFAULT_LLM_URL = 'http://localhost:3025';
+
+/** Resolve the mana-llm base URL from the window-injected env, falling
+ *  back to localhost. Mirrors the playground client pattern. */
+export function resolveLlmBaseUrl(): string {
+	if (typeof window !== 'undefined') {
+		const fromWindow = (window as unknown as { __PUBLIC_MANA_LLM_URL__?: string })
+			.__PUBLIC_MANA_LLM_URL__;
+		if (fromWindow) return fromWindow.replace(/\/$/, '');
+	}
+	return DEFAULT_LLM_URL;
+}
+
+/**
+ * Send a chat completion to mana-llm and yield streaming token deltas.
+ * The caller is responsible for assembling the final string and tracking
+ * latency.
+ *
+ * `tier` is only used for error tagging — both 'mana-server' and 'cloud'
+ * call the same endpoint with different model strings.
+ */
+export async function callManaLlmStreaming(
+	tier: Exclude<LlmTier, 'none' | 'browser'>,
+	model: string,
+	req: LlmTaskRequest
+): Promise<GenerateResult> {
+	const url = `${resolveLlmBaseUrl()}/v1/chat/completions`;
+	const start = performance.now();
+
+	let res: Response;
+	try {
+		res = await fetch(url, {
+			method: 'POST',
+			headers: { 'Content-Type': 'application/json' },
+			credentials: 'include', // forwards the Mana auth cookie if present
+			body: JSON.stringify({
+				model,
+				messages: req.messages,
+				temperature: req.temperature ?? 0.7,
+				max_tokens: req.maxTokens ?? 1024,
+				stream: true,
+			}),
+		});
+	} catch (err) {
+		// Network failure — DNS, refused connection, CORS preflight, etc.
+		throw new BackendUnreachableError(
+			tier,
+			undefined,
+			err instanceof Error ? err.message : String(err)
+		);
+	}
+
+	if (!res.ok || !res.body) {
+		const text = await res.text().catch(() => '');
+		// 451 = upstream blocked content (we use this convention; Gemini
+		// safety blocks are mapped to 451 in mana-llm's google provider).
+		// Other 4xx/5xx are generic server errors.
+		if (res.status === 451 || /safety|blocked|filter/i.test(text)) {
+			throw new ProviderBlockedError(tier, text || `HTTP ${res.status}`);
+		}
+		throw new BackendUnreachableError(tier, res.status, text);
+	}
+
+	const reader = res.body.getReader();
+	const decoder = new TextDecoder();
+	let buffer = '';
+	let collected = '';
+	let promptTokens = 0;
+	let completionTokens = 0;
+
+	while (true) {
+		const { value, done } = await reader.read();
+		if (done) break;
+		buffer += decoder.decode(value, { stream: true });
+
+		// SSE frames are separated by blank lines.
+		let sep: number;
+		while ((sep = buffer.indexOf('\n\n')) !== -1) {
+			const frame = buffer.slice(0, sep);
+			buffer = buffer.slice(sep + 2);
+
+			for (const line of frame.split('\n')) {
+				if (!line.startsWith('data:')) continue;
+				const data = line.slice(5).trim();
+				if (!data || data === '[DONE]') continue;
+				try {
+					const json = JSON.parse(data) as {
+						choices?: Array<{ delta?: { content?: string } }>;
+						usage?: { prompt_tokens?: number; completion_tokens?: number };
+					};
+					const delta = json.choices?.[0]?.delta?.content;
+					if (delta) {
+						collected += delta;
+						req.onToken?.(delta);
+					}
+					if (json.usage) {
+						promptTokens = json.usage.prompt_tokens ?? promptTokens;
+						completionTokens = json.usage.completion_tokens ?? completionTokens;
+					}
+				} catch {
+					// Malformed frame — keepalive comment, skip silently.
+				}
+			}
+		}
+	}
+
+	return {
+		content: collected,
+		usage: {
+			promptTokens,
+			completionTokens,
+			totalTokens: promptTokens + completionTokens,
+		},
+		latencyMs: Math.round(performance.now() - start),
+	};
+}
--- a/packages/shared-llm/src/errors.ts
+++ b/packages/shared-llm/src/errors.ts
@ -0,0 +1,80 @@
+/**
+ * Typed error classes for the LLM orchestrator. UI code can `instanceof`
+ * these to render task-appropriate failure states (retry button, switch
+ * tier prompt, "blocked by safety filter" notice, etc.).
+ */
+
+import type { LlmTier } from './tiers';
+
+export class LlmError extends Error {
+	constructor(message: string) {
+		super(message);
+		this.name = 'LlmError';
+	}
+}
+
+/** No tier from the user's preference list was able to run the task. */
+export class NoTierAvailableError extends LlmError {
+	constructor(
+		public readonly taskName: string,
+		public readonly attempted: LlmTier[]
+	) {
+		super(`No tier could run task '${taskName}' (attempted: ${attempted.join(', ') || 'none'})`);
+		this.name = 'NoTierAvailableError';
+	}
+}
+
+/** The user's chosen tier is below the task's declared minimum tier. */
+export class TierTooLowError extends LlmError {
+	constructor(
+		public readonly taskName: string,
+		public readonly requiredTier: LlmTier,
+		public readonly userTier: LlmTier
+	) {
+		super(
+			`Task '${taskName}' requires tier '${requiredTier}' but user is on '${userTier}'. Activate the higher tier in settings.`
+		);
+		this.name = 'TierTooLowError';
+	}
+}
+
+/**
+ * The upstream provider blocked the content (e.g. Gemini safety filter,
+ * OpenAI moderation). The UI should offer "retry" + "switch to another
+ * provider" options to the user — this is NOT auto-recoverable because
+ * a different provider might allow the same content (or might not).
+ */
+export class ProviderBlockedError extends LlmError {
+	constructor(
+		public readonly tier: LlmTier,
+		public readonly providerMessage: string
+	) {
+		super(`Provider '${tier}' blocked the request: ${providerMessage}`);
+		this.name = 'ProviderBlockedError';
+	}
+}
+
+/** Network/server error from a remote tier (mana-server, cloud). */
+export class BackendUnreachableError extends LlmError {
+	constructor(
+		public readonly tier: LlmTier,
+		public readonly httpStatus?: number,
+		details?: string
+	) {
+		super(
+			`Backend '${tier}' is unreachable${httpStatus ? ` (HTTP ${httpStatus})` : ''}${details ? `: ${details}` : ''}`
+		);
+		this.name = 'BackendUnreachableError';
+	}
+}
+
+/**
+ * The browser tier specifically failed to load — model download
+ * interrupted, WebGPU adapter request failed, OOM, etc.
+ */
+export class EdgeLoadFailedError extends LlmError {
+	constructor(public readonly cause: string) {
+		super(`Edge LLM failed to load: ${cause}`);
+		this.name = 'EdgeLoadFailedError';
+	}
+}
--- a/packages/shared-llm/src/index.ts
+++ b/packages/shared-llm/src/index.ts
@ -1,39 +1,44 @@
-// Module
-export { LlmModule } from './llm.module';
-export { LlmClientService } from './llm-client.service';
-export { LLM_MODULE_OPTIONS } from './llm.constants';
-
-// Core client (for advanced use cases)
-export { LlmClient } from './llm-client';
-
-// Interfaces
-export type {
-	LlmModuleOptions,
-	LlmModuleAsyncOptions,
-	LlmOptionsFactory,
-	ResolvedLlmOptions,
-} from './interfaces';
-export { resolveOptions } from './interfaces';
-
-// Types
+// Tiers + types
+export { ALL_TIERS, TIER_RANK, tierLabel, type LlmTier } from './tiers';
 export type {
+	CapabilityRequirements,
 	ChatMessage,
-	ContentPart,
-	TextContentPart,
-	ImageContentPart,
-	ChatOptions,
-	JsonOptions,
-	VisionOptions,
-	TokenUsage,
-	ChatResult,
-	JsonResult,
-	ModelInfo,
-	HealthStatus,
+	ContentClass,
+	GenerateOptions,
+	GenerateResult,
+	LlmBackend,
+	LlmSettings,
+	LlmTaskRequest,
+	LlmTaskResult,
 } from './types';
+export { DEFAULT_LLM_SETTINGS } from './types';

-// Utilities
-export { extractJson } from './utils';
+// Errors
+export {
+	BackendUnreachableError,
+	EdgeLoadFailedError,
+	LlmError,
+	NoTierAvailableError,
+	ProviderBlockedError,
+	TierTooLowError,
+} from './errors';

-// Metrics
-export { LlmMetricsCollector } from './utils';
-export type { LlmRequestMetrics, MetricsCallback } from './utils';
+// Task contract
+export { buildTaskRequest, type LlmTask } from './task';
+
+// Orchestrator (rarely instantiated directly — most consumers use the
+// store's singleton instead)
+export { LlmOrchestrator, type LlmOrchestratorOptions } from './orchestrator';
+
+// Backends (exported for tests + custom orchestrator setups)
+export { BrowserBackend } from './backends/browser';
+export { CloudBackend, type CloudBackendOptions } from './backends/cloud';
+export { ManaServerBackend, type ManaServerBackendOptions } from './backends/mana-server';
+
+// Singleton store + Svelte 5 reactive hooks
+export {
+	llmOrchestrator,
+	llmSettingsState,
+	updateLlmSettings,
+	useTaskAvailability,
+} from './store.svelte';
--- a/packages/shared-llm/src/interfaces/index.ts
+++ b/packages/shared-llm/src/interfaces/index.ts
@ -1,8 +0,0 @@
-export type {
-	LlmModuleOptions,
-	LlmModuleAsyncOptions,
-	LlmOptionsFactory,
-	ResolvedLlmOptions,
-} from './llm-options.interface';
-
-export { resolveOptions } from './llm-options.interface';
--- a/packages/shared-llm/src/interfaces/llm-options.interface.ts
+++ b/packages/shared-llm/src/interfaces/llm-options.interface.ts
@ -1,52 +0,0 @@
-import type { ModuleMetadata, Type } from '@nestjs/common';
-import type { MetricsCallback } from '../utils/metrics';
-
-export interface LlmModuleOptions {
-	/** mana-llm service URL (default: http://localhost:3025) */
-	manaLlmUrl?: string;
-	/** Default text model (default: ollama/gemma3:4b) */
-	defaultModel?: string;
-	/** Default vision model (default: ollama/llava:7b) */
-	defaultVisionModel?: string;
-	/** Request timeout in ms (default: 120000) */
-	timeout?: number;
-	/** Max retries on transient failures (default: 2) */
-	maxRetries?: number;
-	/** Enable debug logging (default: false) */
-	debug?: boolean;
-	/** Optional callback invoked after every LLM request with metrics */
-	onMetrics?: MetricsCallback;
-}
-
-export interface LlmModuleAsyncOptions extends Pick<ModuleMetadata, 'imports'> {
-	useExisting?: Type<LlmOptionsFactory>;
-	useClass?: Type<LlmOptionsFactory>;
-	useFactory?: (...args: any[]) => Promise<LlmModuleOptions> | LlmModuleOptions;
-	inject?: any[];
-}
-
-export interface LlmOptionsFactory {
-	createLlmOptions(): Promise<LlmModuleOptions> | LlmModuleOptions;
-}
-
-export interface ResolvedLlmOptions {
-	manaLlmUrl: string;
-	defaultModel: string;
-	defaultVisionModel: string;
-	timeout: number;
-	maxRetries: number;
-	debug: boolean;
-	onMetrics?: MetricsCallback;
-}
-
-export function resolveOptions(options: LlmModuleOptions): ResolvedLlmOptions {
-	return {
-		manaLlmUrl: options.manaLlmUrl ?? 'http://localhost:3025',
-		defaultModel: options.defaultModel ?? 'ollama/gemma3:4b',
-		defaultVisionModel: options.defaultVisionModel ?? 'ollama/llava:7b',
-		timeout: options.timeout ?? 120_000,
-		maxRetries: options.maxRetries ?? 2,
-		debug: options.debug ?? false,
-		onMetrics: options.onMetrics,
-	};
-}
--- a/packages/shared-llm/src/llm-client.service.ts
+++ b/packages/shared-llm/src/llm-client.service.ts
@ -1,16 +0,0 @@
-import { Inject, Injectable } from '@nestjs/common';
-import { LlmClient } from './llm-client';
-import { LLM_MODULE_OPTIONS } from './llm.constants';
-import type { LlmModuleOptions } from './interfaces/llm-options.interface';
-import { resolveOptions } from './interfaces/llm-options.interface';
-
-/**
- * NestJS injectable wrapper around LlmClient.
- * All logic lives in the framework-agnostic LlmClient base class.
- */
-@Injectable()
-export class LlmClientService extends LlmClient {
-	constructor(@Inject(LLM_MODULE_OPTIONS) options: LlmModuleOptions) {
-		super(resolveOptions(options));
-	}
-}
--- a/packages/shared-llm/src/llm-client.ts
+++ b/packages/shared-llm/src/llm-client.ts
@ -1,392 +0,0 @@
-/**
- * Framework-agnostic LLM client that communicates with the mana-llm service.
- *
- * This is the core implementation shared between the NestJS LlmClientService
- * and the standalone LlmClient export (for non-NestJS consumers like bot-services).
- */
-
-import type { ResolvedLlmOptions } from './interfaces/llm-options.interface';
-import type {
-	ChatMessage,
-	ChatOptions,
-	ChatResult,
-	JsonOptions,
-	JsonResult,
-	VisionOptions,
-	TokenUsage,
-	ModelInfo,
-	HealthStatus,
-} from './types/chat.types';
-import type {
-	ChatCompletionRequest,
-	ChatCompletionResponse,
-	EmbeddingResponse,
-} from './types/openai-compat.types';
-import type { LlmRequestMetrics } from './utils/metrics';
-import { extractJson } from './utils/json-extractor';
-import { retryFetch } from './utils/retry';
-
-function createTimeoutSignal(ms: number): any {
-	const controller = new AbortController();
-	setTimeout(() => controller.abort(), ms);
-	return controller.signal;
-}
-
-export class LlmClient {
-	private readonly baseUrl: string;
-	private readonly options: ResolvedLlmOptions;
-
-	constructor(options: ResolvedLlmOptions) {
-		this.options = options;
-		this.baseUrl = options.manaLlmUrl.replace(/\/+$/, '');
-	}
-
-	// ---------------------------------------------------------------------------
-	// Text Chat
-	// ---------------------------------------------------------------------------
-
-	/** Simple chat with a single prompt string. */
-	async chat(prompt: string, opts?: ChatOptions): Promise<ChatResult> {
-		const messages = this.buildMessages(prompt, opts?.systemPrompt);
-		return this.chatMessages(messages, opts);
-	}
-
-	/** Chat with full message history. */
-	async chatMessages(messages: ChatMessage[], opts?: ChatOptions): Promise<ChatResult> {
-		const requestedModel = opts?.model ?? this.options.defaultModel;
-		const body = this.buildRequest(messages, opts, false);
-		const start = Date.now();
-
-		try {
-			const response = await this.fetchCompletion(body, opts?.timeout);
-			const latencyMs = Date.now() - start;
-			const usage = response.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
-
-			this.emitMetrics({
-				model: requestedModel,
-				actualModel: response.model,
-				type: 'chat',
-				latencyMs,
-				promptTokens: usage.prompt_tokens,
-				completionTokens: usage.completion_tokens,
-				totalTokens: usage.total_tokens,
-				wasFallback: response.model !== requestedModel && !response.model.endsWith(requestedModel),
-				success: true,
-			});
-
-			return {
-				content: response.choices[0]?.message?.content ?? '',
-				model: response.model,
-				usage,
-				latencyMs,
-			};
-		} catch (error) {
-			this.emitMetrics({
-				model: requestedModel,
-				actualModel: requestedModel,
-				type: 'chat',
-				latencyMs: Date.now() - start,
-				promptTokens: 0,
-				completionTokens: 0,
-				totalTokens: 0,
-				wasFallback: false,
-				success: false,
-				error: error instanceof Error ? error.message : String(error),
-			});
-			throw error;
-		}
-	}
-
-	// ---------------------------------------------------------------------------
-	// Streaming
-	// ---------------------------------------------------------------------------
-
-	/** Streaming chat - returns an async iterable of text tokens. */
-	async *chatStream(prompt: string, opts?: ChatOptions): AsyncIterable<string> {
-		const messages = this.buildMessages(prompt, opts?.systemPrompt);
-		yield* this.chatStreamMessages(messages, opts);
-	}
-
-	/** Streaming chat with full message history. */
-	async *chatStreamMessages(messages: ChatMessage[], opts?: ChatOptions): AsyncIterable<string> {
-		const body = this.buildRequest(messages, opts, true);
-		const timeout = opts?.timeout ?? this.options.timeout;
-
-		const response = await retryFetch(
-			`${this.baseUrl}/v1/chat/completions`,
-			{
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify(body),
-				signal: createTimeoutSignal(timeout),
-			},
-			{ maxRetries: this.options.maxRetries }
-		);
-
-		if (!response.ok) {
-			const text = await response.text().catch(() => '');
-			throw new Error(`mana-llm stream error ${response.status}: ${text}`);
-		}
-
-		if (!response.body) {
-			throw new Error('mana-llm returned no response body for stream');
-		}
-
-		const reader = response.body.getReader();
-		const decoder = new TextDecoder();
-		let buffer = '';
-
-		try {
-			while (true) {
-				const { done, value } = await reader.read();
-				if (done) break;
-
-				buffer += decoder.decode(value, { stream: true });
-				const lines = buffer.split('\n');
-				buffer = lines.pop() ?? '';
-
-				for (const line of lines) {
-					const trimmed = line.trim();
-					if (!trimmed || !trimmed.startsWith('data: ')) continue;
-
-					const data = trimmed.slice(6);
-					if (data === '[DONE]') return;
-
-					try {
-						const chunk = JSON.parse(data);
-						const content = chunk.choices?.[0]?.delta?.content;
-						if (content) yield content;
-					} catch {
-						// Skip unparseable chunks
-					}
-				}
-			}
-		} finally {
-			reader.releaseLock();
-		}
-	}
-
-	// ---------------------------------------------------------------------------
-	// Structured JSON Output
-	// ---------------------------------------------------------------------------
-
-	/** Chat that extracts and parses JSON from the response. */
-	async json<T = unknown>(prompt: string, opts?: JsonOptions<T>): Promise<JsonResult<T>> {
-		const messages = this.buildMessages(prompt, opts?.systemPrompt);
-		return this.jsonMessages<T>(messages, opts);
-	}
-
-	/** JSON extraction from full message history. */
-	async jsonMessages<T = unknown>(
-		messages: ChatMessage[],
-		opts?: JsonOptions<T>
-	): Promise<JsonResult<T>> {
-		const maxAttempts = (opts?.jsonRetries ?? 1) + 1;
-		let lastError: Error | undefined;
-
-		for (let attempt = 0; attempt < maxAttempts; attempt++) {
-			const result = await this.chatMessages(messages, opts);
-
-			try {
-				const data = extractJson<T>(result.content, opts?.validate);
-				return { ...result, data };
-			} catch (error) {
-				lastError = error instanceof Error ? error : new Error(String(error));
-				if (this.options.debug) {
-					console.warn(
-						`[shared-llm] JSON extraction attempt ${attempt + 1}/${maxAttempts} failed:`,
-						lastError.message
-					);
-				}
-			}
-		}
-
-		throw lastError ?? new Error('JSON extraction failed');
-	}
-
-	// ---------------------------------------------------------------------------
-	// Vision
-	// ---------------------------------------------------------------------------
-
-	/** Analyze an image with a text prompt. */
-	async vision(
-		prompt: string,
-		imageBase64: string,
-		mimeType?: string,
-		opts?: VisionOptions
-	): Promise<ChatResult> {
-		const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
-		const model = opts?.visionModel ?? this.options.defaultVisionModel;
-		return this.chatMessages(messages, { ...opts, model });
-	}
-
-	/** Vision + JSON extraction. */
-	async visionJson<T = unknown>(
-		prompt: string,
-		imageBase64: string,
-		mimeType?: string,
-		opts?: VisionOptions & JsonOptions<T>
-	): Promise<JsonResult<T>> {
-		const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
-		const model = opts?.visionModel ?? this.options.defaultVisionModel;
-		return this.jsonMessages<T>(messages, { ...opts, model });
-	}
-
-	// ---------------------------------------------------------------------------
-	// Embeddings
-	// ---------------------------------------------------------------------------
-
-	/** Generate embeddings for text input. */
-	async embed(
-		input: string | string[],
-		model?: string
-	): Promise<{ embeddings: number[][]; usage: TokenUsage }> {
-		const response = await retryFetch(
-			`${this.baseUrl}/v1/embeddings`,
-			{
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify({
-					model: model ?? this.options.defaultModel,
-					input,
-				}),
-				signal: createTimeoutSignal(this.options.timeout),
-			},
-			{ maxRetries: this.options.maxRetries }
-		);
-
-		if (!response.ok) {
-			const text = await response.text().catch(() => '');
-			throw new Error(`mana-llm embeddings error ${response.status}: ${text}`);
-		}
-
-		const data = (await response.json()) as EmbeddingResponse;
-		return {
-			embeddings: data.data.map((d) => d.embedding),
-			usage: data.usage,
-		};
-	}
-
-	// ---------------------------------------------------------------------------
-	// Health & Models
-	// ---------------------------------------------------------------------------
-
-	/** Check mana-llm health and provider status. */
-	async health(): Promise<HealthStatus> {
-		try {
-			const response = await fetch(`${this.baseUrl}/health`, {
-				signal: createTimeoutSignal(5_000),
-			});
-			if (!response.ok) {
-				return { status: 'unhealthy', providers: {} };
-			}
-			return (await response.json()) as HealthStatus;
-		} catch {
-			return { status: 'unhealthy', providers: {} };
-		}
-	}
-
-	/** List available models from all providers. */
-	async listModels(): Promise<ModelInfo[]> {
-		const response = await fetch(`${this.baseUrl}/v1/models`, {
-			signal: createTimeoutSignal(10_000),
-		});
-
-		if (!response.ok) {
-			throw new Error(`mana-llm models error ${response.status}`);
-		}
-
-		const data = (await response.json()) as { data: ModelInfo[] };
-		return data.data ?? [];
-	}
-
-	// ---------------------------------------------------------------------------
-	// Private helpers
-	// ---------------------------------------------------------------------------
-
-	private buildMessages(prompt: string, systemPrompt?: string): ChatMessage[] {
-		const messages: ChatMessage[] = [];
-		if (systemPrompt) {
-			messages.push({ role: 'system', content: systemPrompt });
-		}
-		messages.push({ role: 'user', content: prompt });
-		return messages;
-	}
-
-	private buildVisionMessages(
-		prompt: string,
-		imageBase64: string,
-		mimeType?: string,
-		systemPrompt?: string
-	): ChatMessage[] {
-		const mime = mimeType ?? 'image/jpeg';
-		const dataUrl = imageBase64.startsWith('data:')
-			? imageBase64
-			: `data:${mime};base64,${imageBase64}`;
-
-		const messages: ChatMessage[] = [];
-		if (systemPrompt) {
-			messages.push({ role: 'system', content: systemPrompt });
-		}
-		messages.push({
-			role: 'user',
-			content: [
-				{ type: 'text', text: prompt },
-				{ type: 'image_url', image_url: { url: dataUrl } },
-			],
-		});
-		return messages;
-	}
-
-	private buildRequest(
-		messages: ChatMessage[],
-		opts: ChatOptions | undefined,
-		stream: boolean
-	): ChatCompletionRequest {
-		const request: ChatCompletionRequest = {
-			model: opts?.model ?? this.options.defaultModel,
-			messages,
-			stream,
-		};
-
-		if (opts?.temperature !== undefined) request.temperature = opts.temperature;
-		if (opts?.maxTokens !== undefined) request.max_tokens = opts.maxTokens;
-
-		return request;
-	}
-
-	private async fetchCompletion(
-		body: ChatCompletionRequest,
-		timeoutOverride?: number
-	): Promise<ChatCompletionResponse> {
-		const timeout = timeoutOverride ?? this.options.timeout;
-
-		const response = await retryFetch(
-			`${this.baseUrl}/v1/chat/completions`,
-			{
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify(body),
-				signal: createTimeoutSignal(timeout),
-			},
-			{ maxRetries: this.options.maxRetries }
-		);
-
-		if (!response.ok) {
-			const text = await response.text().catch(() => '');
-			throw new Error(`mana-llm error ${response.status}: ${text}`);
-		}
-
-		return (await response.json()) as ChatCompletionResponse;
-	}
-
-	private emitMetrics(metrics: LlmRequestMetrics): void {
-		if (this.options.onMetrics) {
-			try {
-				this.options.onMetrics(metrics);
-			} catch {
-				// Never let metrics callback break the request
-			}
-		}
-	}
-}
--- a/packages/shared-llm/src/llm.constants.ts
+++ b/packages/shared-llm/src/llm.constants.ts
@ -1 +0,0 @@
-export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';
--- a/packages/shared-llm/src/llm.module.ts
+++ b/packages/shared-llm/src/llm.module.ts
@ -1,80 +0,0 @@
-import { DynamicModule, Module, Global, Provider } from '@nestjs/common';
-import type {
-	LlmModuleOptions,
-	LlmModuleAsyncOptions,
-	LlmOptionsFactory,
-} from './interfaces/llm-options.interface';
-import { LlmClientService } from './llm-client.service';
-import { LLM_MODULE_OPTIONS } from './llm.constants';
-
-@Global()
-@Module({})
-export class LlmModule {
-	static forRoot(options: LlmModuleOptions): DynamicModule {
-		return {
-			module: LlmModule,
-			providers: [
-				{
-					provide: LLM_MODULE_OPTIONS,
-					useValue: options,
-				},
-				LlmClientService,
-			],
-			exports: [LLM_MODULE_OPTIONS, LlmClientService],
-		};
-	}
-
-	static forRootAsync(options: LlmModuleAsyncOptions): DynamicModule {
-		const asyncProviders = this.createAsyncProviders(options);
-
-		return {
-			module: LlmModule,
-			imports: options.imports || [],
-			providers: [...asyncProviders, LlmClientService],
-			exports: [LLM_MODULE_OPTIONS, LlmClientService],
-		};
-	}
-
-	private static createAsyncProviders(options: LlmModuleAsyncOptions): Provider[] {
-		if (options.useFactory) {
-			return [
-				{
-					provide: LLM_MODULE_OPTIONS,
-					useFactory: options.useFactory,
-					inject: options.inject || [],
-				},
-			];
-		}
-
-		const useClass = options.useClass;
-		const useExisting = options.useExisting;
-
-		if (useClass) {
-			return [
-				{
-					provide: LLM_MODULE_OPTIONS,
-					useFactory: async (optionsFactory: LlmOptionsFactory) =>
-						await optionsFactory.createLlmOptions(),
-					inject: [useClass],
-				},
-				{
-					provide: useClass,
-					useClass,
-				},
-			];
-		}
-
-		if (useExisting) {
-			return [
-				{
-					provide: LLM_MODULE_OPTIONS,
-					useFactory: async (optionsFactory: LlmOptionsFactory) =>
-						await optionsFactory.createLlmOptions(),
-					inject: [useExisting],
-				},
-			];
-		}
-
-		return [];
-	}
-}
--- a/packages/shared-llm/src/orchestrator.ts
+++ b/packages/shared-llm/src/orchestrator.ts
@ -0,0 +1,258 @@
+/**
+ * LlmOrchestrator — routes LlmTasks across the four privacy tiers
+ * (none / browser / mana-server / cloud) according to the user's
+ * settings, the task's minimum tier, and the input's content class.
+ *
+ * Routing rules — applied in this exact order:
+ *
+ *   1. If the task's minTier is above the user's HIGHEST allowed tier,
+ *      we cannot run the LLM path at all. Try runRules() if defined,
+ *      else throw TierTooLowError.
+ *
+ *   2. If contentClass is 'sensitive', strip 'mana-server' and 'cloud'
+ *      from the candidate tier list — sensitive content NEVER leaves
+ *      the device, even if the user has these tiers enabled globally.
+ *      This is the privacy backstop the user can't accidentally
+ *      override task-by-task.
+ *
+ *   3. If a per-task override exists in settings.taskOverrides, use it
+ *      verbatim (still subject to rule 2 — task overrides cannot
+ *      bypass the sensitive-content backstop).
+ *
+ *   4. Otherwise, pick the FIRST tier from settings.allowedTiers that
+ *      (a) is in the candidate set after rules 1+2, (b) has an
+ *      available + ready backend, (c) the cloud-consent gate is
+ *      satisfied if it's the cloud tier.
+ *
+ *   5. Run the task on the chosen backend.
+ *
+ *   6. If the run throws and settings.fallbackToRulesOnError is true
+ *      and the task has a runRules() implementation, fall back to
+ *      rules. We do NOT auto-fall to a different LLM tier on error —
+ *      the user explicitly chose this tier and silently switching
+ *      providers would be a privacy/trust break.
+ *
+ *   7. If everything fails, throw NoTierAvailableError. UI catches it
+ *      and offers a "retry" / "switch tier" / "enter manually" prompt.
+ */
+
+import {
+	BackendUnreachableError,
+	NoTierAvailableError,
+	ProviderBlockedError,
+	TierTooLowError,
+} from './errors';
+import type { LlmTask } from './task';
+import type { LlmTier } from './tiers';
+import { TIER_RANK } from './tiers';
+import type { LlmBackend, LlmSettings, LlmTaskRequest, LlmTaskResult } from './types';
+
+export interface LlmOrchestratorOptions {
+	settings: LlmSettings;
+	backends: LlmBackend[];
+}
+
+export class LlmOrchestrator {
+	private settings: LlmSettings;
+	private backendsByTier: Map<LlmTier, LlmBackend>;
+
+	constructor(opts: LlmOrchestratorOptions) {
+		this.settings = opts.settings;
+		this.backendsByTier = new Map();
+		for (const b of opts.backends) {
+			this.backendsByTier.set(b.tier, b);
+		}
+	}
+
+	/** Replace the settings object — call this when the user updates
+	 *  their preferences in the settings UI. */
+	updateSettings(settings: LlmSettings): void {
+		this.settings = settings;
+	}
+
+	/** Public read-only view for UI components that want to react to
+	 *  the current settings (e.g. the tier selector). */
+	getSettings(): Readonly<LlmSettings> {
+		return this.settings;
+	}
+
+	/**
+	 * Can the user (with their current settings) run this task at all?
+	 * The UI uses this to decide whether to show a feature button as
+	 * enabled / disabled / hidden. Does NOT check backend readiness —
+	 * that's a per-call concern. Just checks "is there any conceivable
+	 * tier in the user's allowedTiers that satisfies task.minTier and
+	 * is permitted for task.contentClass?".
+	 */
+	canRun<TIn, TOut>(task: LlmTask<TIn, TOut>): boolean {
+		// Rules-only tasks always run if they have a fallback
+		if (task.minTier === 'none') return true;
+		if (task.runRules) return true;
+
+		const candidates = this.candidateTiers(task);
+		return candidates.some((t) => {
+			const backend = this.backendsByTier.get(t);
+			return backend?.isAvailable() ?? false;
+		});
+	}
+
+	/**
+	 * Run the task. Honors the routing rules above. The returned
+	 * LlmTaskResult includes which tier actually ran, plus a trail
+	 * of tiers that were attempted and skipped before it.
+	 */
+	async run<TIn, TOut>(task: LlmTask<TIn, TOut>, input: TIn): Promise<LlmTaskResult<TOut>> {
+		const start = performance.now();
+		const attempted: LlmTier[] = [];
+
+		// Rule 1: tier-too-low check
+		const userMaxTier = this.userMaxTier();
+		if (TIER_RANK[task.minTier] > TIER_RANK[userMaxTier]) {
+			if (task.runRules) {
+				const value = await task.runRules(input);
+				return {
+					value,
+					source: 'none',
+					latencyMs: Math.round(performance.now() - start),
+					attempted: ['none'],
+				};
+			}
+			throw new TierTooLowError(task.name, task.minTier, userMaxTier);
+		}
+
+		// Rules-2-3: candidate tier list and per-task override
+		const candidates = this.candidateTiers(task);
+		const override = this.settings.taskOverrides[task.name];
+		const orderedTiers = override ? [override].filter((t) => candidates.includes(t)) : candidates;
+
+		// Rule 4-5: try the first runnable tier
+		for (const tier of orderedTiers) {
+			if (tier === 'none') {
+				if (task.runRules) {
+					const value = await task.runRules(input);
+					return {
+						value,
+						source: 'none',
+						latencyMs: Math.round(performance.now() - start),
+						attempted: [...attempted, 'none'],
+					};
+				}
+				attempted.push('none');
+				continue;
+			}
+
+			// Cloud-consent gate
+			if (tier === 'cloud' && !this.settings.cloudConsentGiven) {
+				attempted.push('cloud');
+				continue;
+			}
+
+			const backend = this.backendsByTier.get(tier);
+			if (!backend) {
+				attempted.push(tier);
+				continue;
+			}
+			if (!backend.isAvailable()) {
+				attempted.push(tier);
+				continue;
+			}
+			const ready = await backend.isReady();
+			if (!ready) {
+				attempted.push(tier);
+				continue;
+			}
+
+			try {
+				const request = this.buildRequest(task, input);
+				const generated = await task.runLlm(input, backend);
+				return {
+					value: generated,
+					source: tier,
+					latencyMs: Math.round(performance.now() - start),
+					attempted: [...attempted, tier],
+				};
+				// `request` is intentionally unused — the task constructs
+				// its own LlmTaskRequest internally via runLlm. We build
+				// it here only as a future hook for telemetry.
+				void request;
+			} catch (err) {
+				attempted.push(tier);
+				// Rule 6: rules-fallback on error
+				if (
+					this.settings.fallbackToRulesOnError &&
+					task.runRules &&
+					!(err instanceof ProviderBlockedError)
+				) {
+					// Provider-blocked errors should NOT silently fall to
+					// rules — they should bubble up so the UI can offer
+					// "retry" / "switch tier" prompts. Other errors
+					// (network failure, OOM, model not loaded) get the
+					// silent rules fallback.
+					try {
+						const value = await task.runRules(input);
+						return {
+							value,
+							source: 'none',
+							latencyMs: Math.round(performance.now() - start),
+							attempted: [...attempted, 'none'],
+						};
+					} catch {
+						// rules fallback also failed — re-throw original
+						throw err;
+					}
+				}
+				// Re-throw provider blocks and unrecoverable errors
+				if (err instanceof ProviderBlockedError || err instanceof BackendUnreachableError) {
+					throw err;
+				}
+				// Unknown error — try the next tier in the list
+				continue;
+			}
+		}
+
+		throw new NoTierAvailableError(task.name, attempted);
+	}
+
+	/** Highest tier in the user's allowedTiers list (by rank). */
+	private userMaxTier(): LlmTier {
+		if (this.settings.allowedTiers.length === 0) return 'none';
+		return this.settings.allowedTiers.reduce(
+			(max, t) => (TIER_RANK[t] > TIER_RANK[max] ? t : max),
+			'none' as LlmTier
+		);
+	}
+
+	/** Candidate tier list after applying rules 1 + 2.
+	 *  - Rule 1: only tiers >= task.minTier
+	 *  - Rule 2: sensitive content excludes mana-server + cloud
+	 *  Also always includes 'none' at the end if the task has runRules. */
+	private candidateTiers<TIn, TOut>(task: LlmTask<TIn, TOut>): LlmTier[] {
+		// Start from the user's allowed tiers, in their preference order
+		let tiers = this.settings.allowedTiers.filter((t) => TIER_RANK[t] >= TIER_RANK[task.minTier]);
+
+		// Rule 2: sensitive content backstop
+		if (task.contentClass === 'sensitive') {
+			tiers = tiers.filter((t) => t === 'browser');
+		}
+
+		// 'none' is always tail-appended if the task has a rules implementation,
+		// so the for-loop in run() naturally falls through to it.
+		if (task.runRules && !tiers.includes('none')) {
+			tiers.push('none');
+		}
+		return tiers;
+	}
+
+	private buildRequest<TIn, TOut>(task: LlmTask<TIn, TOut>, _input: TIn): LlmTaskRequest {
+		// Right now this is a placeholder — tasks build their own
+		// LlmTaskRequest inside runLlm. Once we add token-counting
+		// telemetry we'll move that construction up here so the
+		// orchestrator can prepend the task metadata uniformly.
+		return {
+			taskName: task.name,
+			contentClass: task.contentClass,
+			requires: task.requires,
+			messages: [],
+		};
+	}
+}
--- a/packages/shared-llm/src/standalone.ts
+++ b/packages/shared-llm/src/standalone.ts
@ -1,30 +0,0 @@
-/**
- * Standalone exports for non-NestJS consumers (e.g. bot-services).
- *
- * Usage:
- *   import { LlmClient } from '@mana/shared-llm/standalone';
- *   const llm = new LlmClient({ manaLlmUrl: 'http://localhost:3025' });
- */
-
-export { LlmClient } from './llm-client';
-export { resolveOptions } from './interfaces/llm-options.interface';
-export type { LlmModuleOptions, ResolvedLlmOptions } from './interfaces/llm-options.interface';
-
-// Types
-export type {
-	ChatMessage,
-	ContentPart,
-	TextContentPart,
-	ImageContentPart,
-	ChatOptions,
-	JsonOptions,
-	VisionOptions,
-	TokenUsage,
-	ChatResult,
-	JsonResult,
-	ModelInfo,
-	HealthStatus,
-} from './types';
-
-// Utilities
-export { extractJson } from './utils';
--- a/packages/shared-llm/src/store.svelte.ts
+++ b/packages/shared-llm/src/store.svelte.ts
@ -0,0 +1,107 @@
+/**
+ * Svelte 5 reactive store for the LLM orchestrator.
+ *
+ * Lives at module-scope as a singleton because there is exactly one
+ * orchestrator + settings per page session. Settings are persisted to
+ * localStorage for now (Phase 1) — Phase 2 will move them into the
+ * encrypted IndexedDB settings table once that exists.
+ *
+ * Usage in a Svelte 5 component:
+ *
+ *   import { llmOrchestrator, llmSettingsState, useTaskAvailability } from '@mana/shared-llm';
+ *   import { extractDateTask } from '$lib/llm-tasks/extract-date';
+ *
+ *   const available = useTaskAvailability(extractDateTask);
+ *   // ... reactively true/false based on settings + backend readiness
+ *
+ *   {#if available.current}
+ *     <button onclick={() => orchestrator.run(extractDateTask, text)}>...</button>
+ *   {/if}
+ */
+
+import { BrowserBackend } from './backends/browser';
+import { CloudBackend } from './backends/cloud';
+import { ManaServerBackend } from './backends/mana-server';
+import { LlmOrchestrator } from './orchestrator';
+import type { LlmTask } from './task';
+import { DEFAULT_LLM_SETTINGS, type LlmSettings } from './types';
+
+const STORAGE_KEY = 'mana.llm.settings.v1';
+
+/** Load persisted settings, falling back to defaults on first run or
+ *  any parse error. localStorage is fine for Phase 1 — small payload,
+ *  not encrypted-sensitive (the user's tier preference is hardly
+ *  secret), and trivial to migrate to IndexedDB later. */
+function loadSettings(): LlmSettings {
+	if (typeof localStorage === 'undefined') return { ...DEFAULT_LLM_SETTINGS };
+	try {
+		const raw = localStorage.getItem(STORAGE_KEY);
+		if (!raw) return { ...DEFAULT_LLM_SETTINGS };
+		const parsed = JSON.parse(raw) as Partial<LlmSettings>;
+		return { ...DEFAULT_LLM_SETTINGS, ...parsed };
+	} catch {
+		return { ...DEFAULT_LLM_SETTINGS };
+	}
+}
+
+function persistSettings(settings: LlmSettings): void {
+	if (typeof localStorage === 'undefined') return;
+	try {
+		localStorage.setItem(STORAGE_KEY, JSON.stringify(settings));
+	} catch {
+		// Quota exceeded or storage disabled — non-fatal, settings just
+		// won't persist across sessions.
+	}
+}
+
+// ─── Reactive state ──────────────────────────────────────────────
+
+let _settings = $state<LlmSettings>(loadSettings());
+
+// Backends are constructed once per page session. They're stateless
+// (or hold their own internal state in the case of BrowserBackend
+// pointing at @mana/local-llm's singleton), so a fresh instance per
+// orchestrator is fine.
+const backends = [new BrowserBackend(), new ManaServerBackend(), new CloudBackend()];
+
+export const llmOrchestrator = new LlmOrchestrator({
+	settings: _settings,
+	backends,
+});
+
+/** Reactive accessor for the current settings. UI components read
+ *  via `llmSettingsState.current` to get a $state-tracked snapshot. */
+export const llmSettingsState = {
+	get current(): LlmSettings {
+		return _settings;
+	},
+};
+
+/** Update settings (or part of them). Persists to localStorage and
+ *  pushes the new value into the orchestrator. */
+export function updateLlmSettings(patch: Partial<LlmSettings>): void {
+	_settings = { ..._settings, ...patch };
+	persistSettings(_settings);
+	llmOrchestrator.updateSettings(_settings);
+}
+
+/**
+ * Svelte 5 reactive hook: returns `{ current: boolean }` indicating
+ * whether the given task can run with the user's current settings.
+ * Reactive against `llmSettingsState` so the UI re-renders when the
+ * user toggles a tier in the settings page.
+ *
+ * Use this to gate feature buttons — show them as enabled when the
+ * task is runnable, disabled (with a tooltip) when not.
+ */
+export function useTaskAvailability<TIn, TOut>(
+	task: LlmTask<TIn, TOut>
+): { readonly current: boolean } {
+	return {
+		get current() {
+			// Reading _settings here registers the reactive dependency
+			void _settings;
+			return llmOrchestrator.canRun(task);
+		},
+	};
+}
--- a/packages/shared-llm/src/task.ts
+++ b/packages/shared-llm/src/task.ts
@ -0,0 +1,82 @@
+/**
+ * The LlmTask contract — the unit of work modules describe to the
+ * orchestrator. Tasks bundle:
+ *
+ *   1. The LLM-side implementation (used for browser/server/cloud tiers)
+ *   2. An optional rules-tier fallback (used when the LLM tier is
+ *      unavailable, fails, or the user has opted out of all LLM tiers)
+ *   3. Routing metadata (minimum tier, content class, capability needs)
+ *
+ * Tasks live next to the modules that use them — there is intentionally
+ * no central task registry. The convention is:
+ *
+ *   apps/mana/apps/web/src/lib/llm-tasks/   ← cross-module helpers
+ *   apps/mana/apps/web/src/lib/modules/notes/llm-tasks/   ← notes-specific
+ *
+ * The orchestrator never imports tasks directly — modules import tasks
+ * AND the orchestrator and call `orchestrator.run(task, input)`.
+ */
+
+import type { LlmTier } from './tiers';
+import type { ContentClass, CapabilityRequirements, LlmBackend, LlmTaskRequest } from './types';
+
+export interface LlmTask<TInput, TOutput> {
+	/**
+	 * Stable identifier for this task. Used for telemetry, per-task
+	 * tier overrides in user settings, and debug logs. Convention is
+	 * `{module}.{action}` — e.g. `notes.extractTags`, `todo.parseQuickAdd`.
+	 */
+	readonly name: string;
+
+	/** Lowest tier this task can produce a useful result on. */
+	readonly minTier: LlmTier;
+
+	/** Privacy class of inputs this task handles. */
+	readonly contentClass: ContentClass;
+
+	/** Capability requirements that exclude tiers/backends that can't satisfy them. */
+	readonly requires?: CapabilityRequirements;
+
+	/**
+	 * User-facing label, shown when telling the user "this task needs
+	 * AI" or "this result was computed via tier X".
+	 */
+	readonly displayLabel: string;
+
+	/**
+	 * The LLM-based implementation. Builds an LlmTaskRequest from the
+	 * task input and asks the backend to run it, then maps the
+	 * generated text back into the typed TOutput shape (e.g. parses
+	 * JSON, validates a date, looks up a tag).
+	 */
+	runLlm(input: TInput, backend: LlmBackend): Promise<TOutput>;
+
+	/**
+	 * Optional deterministic fallback — runs when no LLM tier is
+	 * available, or when the LLM tier failed and
+	 * `fallbackToRulesOnError` is enabled in user settings.
+	 *
+	 * Returning the typed TOutput indicates success. Throwing means
+	 * the rules implementation also can't handle this input — the
+	 * orchestrator will then surface a NoTierAvailableError so the
+	 * UI can ask the user for direct input.
+	 */
+	runRules?(input: TInput): Promise<TOutput>;
+}
+
+/**
+ * Helper for tasks that need to construct an LlmTaskRequest from their
+ * own input. Centralizes the boilerplate so individual tasks don't have
+ * to redeclare taskName / contentClass / requires every time.
+ */
+export function buildTaskRequest<TInput, TOutput>(
+	task: LlmTask<TInput, TOutput>,
+	overrides: Omit<LlmTaskRequest, 'taskName' | 'contentClass' | 'requires'>
+): LlmTaskRequest {
+	return {
+		...overrides,
+		taskName: task.name,
+		contentClass: task.contentClass,
+		requires: task.requires,
+	};
+}
--- a/packages/shared-llm/src/tiers.ts
+++ b/packages/shared-llm/src/tiers.ts
@ -0,0 +1,50 @@
+/**
+ * Tier definitions for the Mana LLM orchestrator.
+ *
+ * Four tiers, ordered from most-private to least-private:
+ *
+ *   none        — Deterministic parsers / heuristics. No LLM at all.
+ *                 Always available. Zero cost. Quality varies by task.
+ *
+ *   browser     — Gemma 4 E2B running in the user's browser via WebGPU
+ *                 (@mana/local-llm). 100% on-device. Requires the
+ *                 ~500 MB model to be downloaded once and ~2 GB VRAM.
+ *
+ *   mana-server — services/mana-llm + Ollama on our own infrastructure
+ *                 (currently the Mac Mini, gemma3:4b by default).
+ *                 Data leaves the device but stays in our control.
+ *
+ *   cloud       — services/mana-llm proxied to a third-party provider
+ *                 (Google Gemini, configured via google_api_key in the
+ *                 mana-llm service env). Data goes to the third party.
+ *
+ * The numeric rank is used by the orchestrator to compare a user's
+ * preferred tier against a task's minimum tier ("can the user even
+ * run this task?") and is the canonical sort order for the privacy
+ * gradient.
+ */
+
+export type LlmTier = 'none' | 'browser' | 'mana-server' | 'cloud';
+
+export const TIER_RANK: Record<LlmTier, number> = {
+	none: 0,
+	browser: 1,
+	'mana-server': 2,
+	cloud: 3,
+};
+
+export const ALL_TIERS: readonly LlmTier[] = ['none', 'browser', 'mana-server', 'cloud'];
+
+/** Human-readable label, kept here so backends/UI agree on naming. */
+export function tierLabel(tier: LlmTier): string {
+	switch (tier) {
+		case 'none':
+			return 'Lokal (ohne KI)';
+		case 'browser':
+			return 'Auf deinem Gerät';
+		case 'mana-server':
+			return 'Mana-Server';
+		case 'cloud':
+			return 'Google Gemini';
+	}
+}
--- a/packages/shared-llm/src/types.ts
+++ b/packages/shared-llm/src/types.ts
@ -0,0 +1,150 @@
+/**
+ * Shared types for the Mana LLM orchestrator.
+ *
+ * These deliberately mirror the surface of @mana/local-llm so that the
+ * browser tier can pass them straight through, but they are intentionally
+ * a SUPERSET (with task name, content class, capability requirements,
+ * rule fallback) so the orchestrator can route intelligently.
+ */
+
+import type { LlmTier } from './tiers';
+
+export interface ChatMessage {
+	role: 'system' | 'user' | 'assistant';
+	content: string;
+}
+
+export interface GenerateOptions {
+	messages: ChatMessage[];
+	temperature?: number;
+	maxTokens?: number;
+	/** Optional streaming callback — called once per emitted token chunk */
+	onToken?: (token: string) => void;
+}
+
+export interface GenerateResult {
+	content: string;
+	usage?: {
+		promptTokens: number;
+		completionTokens: number;
+		totalTokens: number;
+	};
+	latencyMs: number;
+}
+
+/**
+ * The privacy class of the input being processed. The orchestrator uses
+ * this to ENFORCE that sensitive content never leaves the device, even
+ * if the user has globally allowed cloud tiers.
+ *
+ *   public    — already public-domain content (e.g. an open URL the user
+ *               wants summarized). Anything is fair game.
+ *   personal  — the user's own content but routine (a calendar event, a
+ *               todo title). Default for most module tasks. Allowed on
+ *               any tier the user has enabled.
+ *   sensitive — explicitly private content (notes flagged sensitive,
+ *               diary entries, dreams, financial data). The orchestrator
+ *               restricts these to {none, browser} regardless of user's
+ *               global settings — the user has to explicitly opt out of
+ *               this protection per-task to send sensitive content to
+ *               server/cloud tiers.
+ */
+export type ContentClass = 'public' | 'personal' | 'sensitive';
+
+export interface CapabilityRequirements {
+	/** Task needs to receive structured JSON in response */
+	json?: boolean;
+	/** Task needs at least this many context tokens (input + output) */
+	minContextTokens?: number;
+	/** Task needs streaming support (per-token onToken callbacks) */
+	streaming?: boolean;
+}
+
+/**
+ * The high-level "I want to do X" descriptor that flows from a module
+ * to the orchestrator. Concrete LlmTask implementations build these
+ * internally before delegating to the orchestrator.
+ */
+export interface LlmTaskRequest extends GenerateOptions {
+	/** Stable name for analytics + per-task overrides — e.g. "notes.extractTags" */
+	taskName: string;
+	contentClass: ContentClass;
+	requires?: CapabilityRequirements;
+}
+
+/**
+ * The result of running a task through the orchestrator. Carries the
+ * tier that actually executed (which may differ from the user's
+ * preferred tier if a fallback kicked in) and the trail of tiers
+ * that were tried first — useful for telemetry and for debugging
+ * "why did this task end up running on tier X?".
+ */
+export interface LlmTaskResult<T = string> {
+	value: T;
+	source: LlmTier;
+	latencyMs: number;
+	/** Tiers that were attempted before `source` succeeded */
+	attempted: LlmTier[];
+}
+
+/**
+ * Backend interface that the orchestrator talks to. The "none" tier
+ * does NOT implement this — rule-based fallbacks live on each
+ * concrete LlmTask, not on a backend object.
+ */
+export interface LlmBackend {
+	readonly tier: Exclude<LlmTier, 'none'>;
+
+	/** Could this backend run AT ALL given the current environment?
+	 *  e.g. browser tier checks for WebGPU + user-enabled, server tier
+	 *  checks for a configured base URL. */
+	isAvailable(): boolean;
+
+	/** Could this backend run RIGHT NOW? e.g. browser tier checks if
+	 *  the model is loaded into VRAM. May return false even when
+	 *  isAvailable() is true (model still downloading, server in
+	 *  startup, …). */
+	isReady(): boolean | Promise<boolean>;
+
+	/** Run a task. The backend is responsible for actually performing
+	 *  the inference and returning the result; it does NOT decide
+	 *  whether it SHOULD run (the orchestrator did that). */
+	generate(req: LlmTaskRequest): Promise<GenerateResult>;
+}
+
+/**
+ * The mutable user preferences that drive routing.
+ */
+export interface LlmSettings {
+	/** Tiers the orchestrator is allowed to use, in preference order.
+	 *  An empty array means "no AI at all" — only Tier 0 (rules) runs. */
+	allowedTiers: LlmTier[];
+
+	/** Per-task overrides — keyed by task name, value is the tier to
+	 *  use for that task specifically (overrides allowedTiers order). */
+	taskOverrides: Record<string, LlmTier>;
+
+	/** When the user-chosen tier fails to run a task, fall back to
+	 *  the rules tier (if the task has a runT0 implementation).
+	 *  When false, failures surface as errors instead. */
+	fallbackToRulesOnError: boolean;
+
+	/** Show a small "via Edge / via Server / via Gemini" badge under
+	 *  every LLM result. Default true — helps the user understand
+	 *  where their data went. */
+	showSourceInUi: boolean;
+
+	/** First-time consent for the cloud tier. Until this is true, the
+	 *  cloud tier is treated as unavailable even if it's in
+	 *  allowedTiers. The user must explicitly tick a "yes I understand
+	 *  Google sees my data" checkbox once. */
+	cloudConsentGiven: boolean;
+}
+
+export const DEFAULT_LLM_SETTINGS: LlmSettings = {
+	allowedTiers: [], // ZERO opt-in by default — every user starts in Tier 0 only
+	taskOverrides: {},
+	fallbackToRulesOnError: true,
+	showSourceInUi: true,
+	cloudConsentGiven: false,
+};
--- a/packages/shared-llm/src/types/chat.types.ts
+++ b/packages/shared-llm/src/types/chat.types.ts
@ -1,100 +0,0 @@
-/**
- * Core chat types for the LLM client.
- * These are the high-level types that consumers interact with.
- */
-
-// ---------------------------------------------------------------------------
-// Messages
-// ---------------------------------------------------------------------------
-
-export interface TextContentPart {
-	type: 'text';
-	text: string;
-}
-
-export interface ImageContentPart {
-	type: 'image_url';
-	image_url: { url: string };
-}
-
-export type ContentPart = TextContentPart | ImageContentPart;
-
-export interface ChatMessage {
-	role: 'system' | 'user' | 'assistant';
-	content: string | ContentPart[];
-}
-
-// ---------------------------------------------------------------------------
-// Options
-// ---------------------------------------------------------------------------
-
-export interface ChatOptions {
-	/** Model to use (default from module config, e.g. "ollama/gemma3:4b") */
-	model?: string;
-	/** Sampling temperature 0.0-2.0 */
-	temperature?: number;
-	/** Max tokens to generate */
-	maxTokens?: number;
-	/** System prompt prepended to messages */
-	systemPrompt?: string;
-	/** Request timeout in ms (overrides module default) */
-	timeout?: number;
-}
-
-export interface JsonOptions<T = unknown> extends ChatOptions {
-	/** Validation function applied to parsed JSON. Should throw on invalid data. */
-	validate?: (data: unknown) => T;
-	/** Number of extraction retries on parse failure (default: 1) */
-	jsonRetries?: number;
-}
-
-export interface VisionOptions extends ChatOptions {
-	/** Vision model override (default from module config, e.g. "ollama/llava:7b") */
-	visionModel?: string;
-}
-
-// ---------------------------------------------------------------------------
-// Results
-// ---------------------------------------------------------------------------
-
-export interface TokenUsage {
-	prompt_tokens: number;
-	completion_tokens: number;
-	total_tokens: number;
-}
-
-export interface ChatResult {
-	/** Generated text content */
-	content: string;
-	/** Model that was actually used */
-	model: string;
-	/** Token usage statistics */
-	usage: TokenUsage;
-	/** Request latency in milliseconds */
-	latencyMs: number;
-}
-
-export interface JsonResult<T = unknown> extends ChatResult {
-	/** Parsed and optionally validated data */
-	data: T;
-}
-
-// ---------------------------------------------------------------------------
-// Models
-// ---------------------------------------------------------------------------
-
-export interface ModelInfo {
-	id: string;
-	object: 'model';
-	created: number;
-	owned_by: string;
-}
-
-// ---------------------------------------------------------------------------
-// Health
-// ---------------------------------------------------------------------------
-
-export interface HealthStatus {
-	status: 'healthy' | 'degraded' | 'unhealthy';
-	providers: Record<string, unknown>;
-}
--- a/packages/shared-llm/src/types/index.ts
+++ b/packages/shared-llm/src/types/index.ts
@ -1,26 +0,0 @@
-export type {
-	ChatMessage,
-	ContentPart,
-	TextContentPart,
-	ImageContentPart,
-	ChatOptions,
-	JsonOptions,
-	VisionOptions,
-	TokenUsage,
-	ChatResult,
-	JsonResult,
-	ModelInfo,
-	HealthStatus,
-} from './chat.types';
-
-export type {
-	ChatCompletionRequest,
-	ChatCompletionResponse,
-	ChatCompletionChoice,
-	ChatCompletionStreamChunk,
-	StreamChoice,
-	EmbeddingRequest,
-	EmbeddingResponse,
-	EmbeddingData,
-	ModelsListResponse,
-} from './openai-compat.types';
--- a/packages/shared-llm/src/types/openai-compat.types.ts
+++ b/packages/shared-llm/src/types/openai-compat.types.ts
@ -1,97 +0,0 @@
-/**
- * OpenAI-compatible wire format types matching the mana-llm API contract.
- * These are internal types used for HTTP communication - consumers should
- * use the high-level types from chat.types.ts instead.
- */
-
-import type { ChatMessage, TokenUsage } from './chat.types';
-
-// ---------------------------------------------------------------------------
-// Request (POST /v1/chat/completions)
-// ---------------------------------------------------------------------------
-
-export interface ChatCompletionRequest {
-	model: string;
-	messages: ChatMessage[];
-	stream?: boolean;
-	temperature?: number;
-	max_tokens?: number;
-	top_p?: number;
-	frequency_penalty?: number;
-	presence_penalty?: number;
-	stop?: string | string[];
-}
-
-// ---------------------------------------------------------------------------
-// Response (non-streaming)
-// ---------------------------------------------------------------------------
-
-export interface ChatCompletionResponse {
-	id: string;
-	object: 'chat.completion';
-	created: number;
-	model: string;
-	choices: ChatCompletionChoice[];
-	usage: TokenUsage;
-}
-
-export interface ChatCompletionChoice {
-	index: number;
-	message: { role: 'assistant'; content: string };
-	finish_reason: 'stop' | 'length' | 'content_filter' | null;
-}
-
-// ---------------------------------------------------------------------------
-// Response (streaming)
-// ---------------------------------------------------------------------------
-
-export interface ChatCompletionStreamChunk {
-	id: string;
-	object: 'chat.completion.chunk';
-	created: number;
-	model: string;
-	choices: StreamChoice[];
-}
-
-export interface StreamChoice {
-	index: number;
-	delta: { role?: 'assistant'; content?: string };
-	finish_reason: string | null;
-}
-
-// ---------------------------------------------------------------------------
-// Embeddings
-// ---------------------------------------------------------------------------
-
-export interface EmbeddingRequest {
-	model: string;
-	input: string | string[];
-	encoding_format?: 'float' | 'base64';
-}
-
-export interface EmbeddingResponse {
-	object: 'list';
-	data: EmbeddingData[];
-	model: string;
-	usage: TokenUsage;
-}
-
-export interface EmbeddingData {
-	object: 'embedding';
-	index: number;
-	embedding: number[];
-}
-
-// ---------------------------------------------------------------------------
-// Models (GET /v1/models)
-// ---------------------------------------------------------------------------
-
-export interface ModelsListResponse {
-	object: 'list';
-	data: Array<{
-		id: string;
-		object: 'model';
-		created: number;
-		owned_by: string;
-	}>;
-}
--- a/packages/shared-llm/src/utils/index.ts
+++ b/packages/shared-llm/src/utils/index.ts
@ -1,5 +0,0 @@
-export { extractJson } from './json-extractor';
-export { retryFetch } from './retry';
-export type { RetryOptions } from './retry';
-export { LlmMetricsCollector } from './metrics';
-export type { LlmRequestMetrics, MetricsCallback } from './metrics';
--- a/packages/shared-llm/src/utils/json-extractor.ts
+++ b/packages/shared-llm/src/utils/json-extractor.ts
@ -1,94 +0,0 @@
-/**
- * Extract and parse JSON from LLM responses.
- *
- * LLMs often wrap JSON in markdown code fences or include extra text.
- * This utility handles all common patterns:
- * 1. Direct JSON parse
- * 2. Markdown ```json ... ``` fences
- * 3. First { ... } or [ ... ] block in text
- */
-export function extractJson<T = unknown>(text: string, validate?: (data: unknown) => T): T {
-	const trimmed = text.trim();
-
-	// Step 1: Try direct parse
-	const direct = tryParse<T>(trimmed, validate);
-	if (direct !== undefined) return direct;
-
-	// Step 2: Strip markdown code fences
-	const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
-	if (fenceMatch) {
-		const fenced = tryParse<T>(fenceMatch[1].trim(), validate);
-		if (fenced !== undefined) return fenced;
-	}
-
-	// Step 3: Find first JSON object
-	const objectStart = trimmed.indexOf('{');
-	if (objectStart !== -1) {
-		const objectStr = extractBalanced(trimmed, objectStart, '{', '}');
-		if (objectStr) {
-			const obj = tryParse<T>(objectStr, validate);
-			if (obj !== undefined) return obj;
-		}
-	}
-
-	// Step 4: Find first JSON array
-	const arrayStart = trimmed.indexOf('[');
-	if (arrayStart !== -1) {
-		const arrayStr = extractBalanced(trimmed, arrayStart, '[', ']');
-		if (arrayStr) {
-			const arr = tryParse<T>(arrayStr, validate);
-			if (arr !== undefined) return arr;
-		}
-	}
-
-	throw new Error(`Failed to extract JSON from LLM response: ${trimmed.slice(0, 200)}...`);
-}
-
-function tryParse<T>(text: string, validate?: (data: unknown) => T): T | undefined {
-	try {
-		const parsed = JSON.parse(text);
-		return validate ? validate(parsed) : parsed;
-	} catch {
-		return undefined;
-	}
-}
-
-/**
- * Extract a balanced block starting from the given position.
- * Handles nested braces/brackets but not strings with escaped delimiters.
- */
-function extractBalanced(text: string, start: number, open: string, close: string): string | null {
-	let depth = 0;
-	let inString = false;
-	let escape = false;
-
-	for (let i = start; i < text.length; i++) {
-		const ch = text[i];
-
-		if (escape) {
-			escape = false;
-			continue;
-		}
-
-		if (ch === '\\') {
-			escape = true;
-			continue;
-		}
-
-		if (ch === '"') {
-			inString = !inString;
-			continue;
-		}
-
-		if (inString) continue;
-
-		if (ch === open) depth++;
-		if (ch === close) depth--;
-
-		if (depth === 0) {
-			return text.slice(start, i + 1);
-		}
-	}
-
-	return null;
-}
--- a/packages/shared-llm/src/utils/metrics.ts
+++ b/packages/shared-llm/src/utils/metrics.ts
@ -1,88 +0,0 @@
-/**
- * Request-level metrics for LLM calls.
- *
- * Provides an optional callback system that backends can hook into
- * for monitoring, logging, or forwarding to Prometheus/Grafana.
- */
-
-export interface LlmRequestMetrics {
-	/** Model requested (e.g. "ollama/gemma3:4b") */
-	model: string;
-	/** Model actually used (may differ if fallback occurred) */
-	actualModel: string;
-	/** Request type */
-	type: 'chat' | 'json' | 'vision' | 'visionJson' | 'embed' | 'stream';
-	/** Total request duration in ms */
-	latencyMs: number;
-	/** Token usage */
-	promptTokens: number;
-	completionTokens: number;
-	totalTokens: number;
-	/** Whether this request was a fallback (model differs from requested) */
-	wasFallback: boolean;
-	/** Whether the request succeeded */
-	success: boolean;
-	/** Error message if failed */
-	error?: string;
-}
-
-export type MetricsCallback = (metrics: LlmRequestMetrics) => void;
-
-/**
- * Simple in-memory metrics aggregator.
- * Useful for health endpoints and debugging.
- */
-export class LlmMetricsCollector {
-	private _totalRequests = 0;
-	private _totalErrors = 0;
-	private _totalFallbacks = 0;
-	private _totalTokens = 0;
-	private _totalLatencyMs = 0;
-	private _byModel: Map<string, { requests: number; tokens: number; errors: number }> = new Map();
-
-	/** Use as MetricsCallback */
-	readonly collect = (metrics: LlmRequestMetrics): void => {
-		this._totalRequests++;
-		this._totalLatencyMs += metrics.latencyMs;
-		this._totalTokens += metrics.totalTokens;
-
-		if (!metrics.success) this._totalErrors++;
-		if (metrics.wasFallback) this._totalFallbacks++;
-
-		const modelKey = metrics.actualModel;
-		const existing = this._byModel.get(modelKey) ?? { requests: 0, tokens: 0, errors: 0 };
-		existing.requests++;
-		existing.tokens += metrics.totalTokens;
-		if (!metrics.success) existing.errors++;
-		this._byModel.set(modelKey, existing);
-	};
-
-	/** Get summary stats for health endpoints / dashboards */
-	getSummary() {
-		return {
-			totalRequests: this._totalRequests,
-			totalErrors: this._totalErrors,
-			totalFallbacks: this._totalFallbacks,
-			totalTokens: this._totalTokens,
-			averageLatencyMs:
-				this._totalRequests > 0 ? Math.round(this._totalLatencyMs / this._totalRequests) : 0,
-			fallbackRate:
-				this._totalRequests > 0
-					? Math.round((this._totalFallbacks / this._totalRequests) * 100)
-					: 0,
-			errorRate:
-				this._totalRequests > 0 ? Math.round((this._totalErrors / this._totalRequests) * 100) : 0,
-			byModel: Object.fromEntries(this._byModel),
-		};
-	}
-
-	/** Reset all counters */
-	reset(): void {
-		this._totalRequests = 0;
-		this._totalErrors = 0;
-		this._totalFallbacks = 0;
-		this._totalTokens = 0;
-		this._totalLatencyMs = 0;
-		this._byModel.clear();
-	}
-}
--- a/packages/shared-llm/src/utils/retry.ts
+++ b/packages/shared-llm/src/utils/retry.ts
@ -1,51 +0,0 @@
-/**
- * Fetch wrapper with exponential backoff retry for transient failures.
- *
- * Retries on: 429 (rate limit), 502, 503, 504 (server errors), network errors.
- * Does NOT retry on: 400, 401, 403, 404 (client errors).
- */
-
-const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]);
-
-export interface RetryOptions {
-	maxRetries: number;
-	/** Base delay in ms (doubles each retry). Default: 200 */
-	baseDelay?: number;
-}
-
-export async function retryFetch(
-	url: string,
-	init: RequestInit,
-	options: RetryOptions
-): Promise<Response> {
-	const { maxRetries, baseDelay = 200 } = options;
-	let lastError: Error | undefined;
-
-	for (let attempt = 0; attempt <= maxRetries; attempt++) {
-		try {
-			const response = await fetch(url, init);
-
-			if (response.ok || !RETRYABLE_STATUS_CODES.has(response.status)) {
-				return response;
-			}
-
-			// Retryable status code
-			lastError = new Error(`HTTP ${response.status}: ${response.statusText}`);
-		} catch (error) {
-			// Network error (connection refused, timeout, etc.)
-			lastError = error instanceof Error ? error : new Error(String(error));
-		}
-
-		// Don't sleep after the last attempt
-		if (attempt < maxRetries) {
-			const delay = baseDelay * Math.pow(2, attempt);
-			await sleep(delay);
-		}
-	}
-
-	throw lastError ?? new Error('retryFetch exhausted all retries');
-}
-
-function sleep(ms: number): Promise<void> {
-	return new Promise((resolve) => setTimeout(resolve, ms));
-}
--- a/packages/shared-llm/tsconfig.json
+++ b/packages/shared-llm/tsconfig.json
@ -1,21 +1,14 @@
 {
 	"compilerOptions": {
-		"target": "ES2021",
-		"module": "commonjs",
-		"lib": ["ES2021"],
-		"declaration": true,
-		"declarationMap": true,
-		"sourceMap": true,
-		"outDir": "./dist",
-		"rootDir": "./src",
+		"target": "ES2022",
+		"module": "ESNext",
+		"moduleResolution": "bundler",
+		"lib": ["ES2022", "DOM"],
 		"strict": true,
-		"esModuleInterop": true,
+		"noEmit": true,
 		"skipLibCheck": true,
-		"forceConsistentCasingInFileNames": true,
-		"moduleResolution": "node",
-		"experimentalDecorators": true,
-		"emitDecoratorMetadata": true
+		"forceConsistentCasingInFileNames": true
 	},
 	"include": ["src/**/*"],
-	"exclude": ["node_modules", "dist"]
+	"exclude": ["node_modules"]
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -978,6 +978,9 @@ importers:
      '@mana/shared-links':
        specifier: workspace:*
        version: link:../../../../packages/shared-links
+      '@mana/shared-llm':
+        specifier: workspace:*
+        version: link:../../../../packages/shared-llm
      '@mana/shared-stores':
        specifier: workspace:*
        version: link:../../../../packages/shared-stores
@ -2981,31 +2984,19 @@ importers:

  packages/shared-llm:
    dependencies:
-      '@nestjs/common':
-        specifier: ^10.0.0 || ^11.0.0
-        version: 10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2)
-      '@nestjs/config':
-        specifier: ^3.0.0 || ^4.0.0
-        version: 3.3.0(@nestjs/common@10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2))(rxjs@7.8.2)
-      '@nestjs/core':
-        specifier: ^10.0.0 || ^11.0.0
-        version: 10.4.22(@nestjs/common@10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/platform-express@10.4.22)(reflect-metadata@0.2.2)(rxjs@7.8.2)
-      reflect-metadata:
-        specifier: ^0.1.13 || ^0.2.0
-        version: 0.2.2
-      rxjs:
-        specifier: ^7.0.0
-        version: 7.8.2
+      '@mana/local-llm':
+        specifier: workspace:*
+        version: link:../local-llm
    devDependencies:
      '@types/node':
-        specifier: ^20.0.0
-        version: 20.19.39
-      typescript:
+        specifier: ^24.10.1
+        version: 24.12.2
+      svelte:
        specifier: ^5.0.0
+        version: 5.55.1
+      typescript:
+        specifier: ^5.9.3
        version: 5.9.3
-      vitest:
-        specifier: ^4.1.2
-        version: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@20.19.39)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@20.19.39)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3))

  packages/shared-logger:
    devDependencies:
				`@ -1 +0,0 @@`
				`export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';`