From 56065c85378cc41c2018c5e2e6604820794a45c1 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 9 Apr 2026 00:44:00 +0200 Subject: [PATCH] fix(mana/web): unwrap $state proxy in workbench-scenes Dexie writes Adding an app to a workbench scene threw DataCloneError. scenesState is a $state array, so current.openApps was a Svelte 5 proxy and spreading it into a new array left proxy entries inside; IndexedDB's structured clone refuses to serialise those. Snapshot before handing the array to patchScene / createScene so Dexie sees plain objects. Co-Authored-By: Claude Opus 4.6 (1M context) --- apps/mana/apps/web/Dockerfile | 1 + apps/mana/apps/web/package.json | 1 + .../web/src/lib/llm-tasks/extract-date.ts | 119 ++++++ .../apps/web/src/lib/llm-tasks/summarize.ts | 57 +++ .../src/lib/stores/workbench-scenes.svelte.ts | 13 +- .../src/routes/(app)/llm-test/+page.svelte | 149 ++++++- docker/Dockerfile.sveltekit-base | 1 + packages/shared-llm/package.json | 47 +-- .../src/__tests__/json-extractor.spec.ts | 119 ------ .../src/__tests__/llm-client.spec.ts | 277 ------------- .../shared-llm/src/__tests__/retry.spec.ts | 118 ------ packages/shared-llm/src/backends/browser.ts | 62 +++ packages/shared-llm/src/backends/cloud.ts | 44 ++ .../shared-llm/src/backends/mana-server.ts | 43 ++ packages/shared-llm/src/backends/remote.ts | 135 ++++++ packages/shared-llm/src/errors.ts | 80 ++++ packages/shared-llm/src/index.ts | 73 ++-- packages/shared-llm/src/interfaces/index.ts | 8 - .../src/interfaces/llm-options.interface.ts | 52 --- packages/shared-llm/src/llm-client.service.ts | 16 - packages/shared-llm/src/llm-client.ts | 392 ------------------ packages/shared-llm/src/llm.constants.ts | 1 - packages/shared-llm/src/llm.module.ts | 80 ---- packages/shared-llm/src/orchestrator.ts | 258 ++++++++++++ packages/shared-llm/src/standalone.ts | 30 -- packages/shared-llm/src/store.svelte.ts | 107 +++++ packages/shared-llm/src/task.ts | 82 ++++ packages/shared-llm/src/tiers.ts | 50 +++ packages/shared-llm/src/types.ts | 150 +++++++ packages/shared-llm/src/types/chat.types.ts | 100 ----- packages/shared-llm/src/types/index.ts | 26 -- .../src/types/openai-compat.types.ts | 97 ----- packages/shared-llm/src/utils/index.ts | 5 - .../shared-llm/src/utils/json-extractor.ts | 94 ----- packages/shared-llm/src/utils/metrics.ts | 88 ---- packages/shared-llm/src/utils/retry.ts | 51 --- packages/shared-llm/tsconfig.json | 21 +- pnpm-lock.yaml | 33 +- 38 files changed, 1415 insertions(+), 1665 deletions(-) create mode 100644 apps/mana/apps/web/src/lib/llm-tasks/extract-date.ts create mode 100644 apps/mana/apps/web/src/lib/llm-tasks/summarize.ts delete mode 100644 packages/shared-llm/src/__tests__/json-extractor.spec.ts delete mode 100644 packages/shared-llm/src/__tests__/llm-client.spec.ts delete mode 100644 packages/shared-llm/src/__tests__/retry.spec.ts create mode 100644 packages/shared-llm/src/backends/browser.ts create mode 100644 packages/shared-llm/src/backends/cloud.ts create mode 100644 packages/shared-llm/src/backends/mana-server.ts create mode 100644 packages/shared-llm/src/backends/remote.ts create mode 100644 packages/shared-llm/src/errors.ts delete mode 100644 packages/shared-llm/src/interfaces/index.ts delete mode 100644 packages/shared-llm/src/interfaces/llm-options.interface.ts delete mode 100644 packages/shared-llm/src/llm-client.service.ts delete mode 100644 packages/shared-llm/src/llm-client.ts delete mode 100644 packages/shared-llm/src/llm.constants.ts delete mode 100644 packages/shared-llm/src/llm.module.ts create mode 100644 packages/shared-llm/src/orchestrator.ts delete mode 100644 packages/shared-llm/src/standalone.ts create mode 100644 packages/shared-llm/src/store.svelte.ts create mode 100644 packages/shared-llm/src/task.ts create mode 100644 packages/shared-llm/src/tiers.ts create mode 100644 packages/shared-llm/src/types.ts delete mode 100644 packages/shared-llm/src/types/chat.types.ts delete mode 100644 packages/shared-llm/src/types/index.ts delete mode 100644 packages/shared-llm/src/types/openai-compat.types.ts delete mode 100644 packages/shared-llm/src/utils/index.ts delete mode 100644 packages/shared-llm/src/utils/json-extractor.ts delete mode 100644 packages/shared-llm/src/utils/metrics.ts delete mode 100644 packages/shared-llm/src/utils/retry.ts diff --git a/apps/mana/apps/web/Dockerfile b/apps/mana/apps/web/Dockerfile index 718512b59..9b18ada54 100644 --- a/apps/mana/apps/web/Dockerfile +++ b/apps/mana/apps/web/Dockerfile @@ -15,6 +15,7 @@ COPY apps/calc/packages/shared ./apps/calc/packages/shared COPY apps/zitare/packages/content ./apps/zitare/packages/content COPY packages/shared-uload ./packages/shared-uload COPY packages/local-llm ./packages/local-llm +COPY packages/shared-llm ./packages/shared-llm RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \ pnpm install --no-frozen-lockfile --ignore-scripts diff --git a/apps/mana/apps/web/package.json b/apps/mana/apps/web/package.json index 3f95e97ff..ad1e71f09 100644 --- a/apps/mana/apps/web/package.json +++ b/apps/mana/apps/web/package.json @@ -60,6 +60,7 @@ "@mana/shared-i18n": "workspace:*", "@mana/shared-icons": "workspace:*", "@mana/shared-links": "workspace:*", + "@mana/shared-llm": "workspace:*", "@mana/shared-stores": "workspace:*", "@mana/shared-tags": "workspace:*", "@mana/shared-tailwind": "workspace:*", diff --git a/apps/mana/apps/web/src/lib/llm-tasks/extract-date.ts b/apps/mana/apps/web/src/lib/llm-tasks/extract-date.ts new file mode 100644 index 000000000..701597039 --- /dev/null +++ b/apps/mana/apps/web/src/lib/llm-tasks/extract-date.ts @@ -0,0 +1,119 @@ +/** + * extractDateTask — pulls an ISO date out of a free-form German/English + * string. Used by Quick-Add features that want to recognize phrases like + * "morgen 14 Uhr" or "next Tuesday". + * + * Has a runRules() fallback so it works even on Tier 0 (no AI) — the + * fallback uses a hand-rolled regex set covering the most common + * shortcuts. It's intentionally narrow: it only catches the patterns it + * KNOWS, and returns null otherwise. This is the right semantic for + * Tier 0 — "I'm not certain enough to guess" is a valid answer when + * the user has explicitly opted out of LLM use. + * + * For production-grade NL date parsing without an LLM, replacing the + * regex stub with chrono-node would be a one-line change in runRules(). + */ + +import type { LlmBackend, LlmTask } from '@mana/shared-llm'; + +export interface ExtractDateInput { + text: string; + /** Reference date for relative parsing ("morgen", "next week"). Defaults to now. */ + now?: Date; +} + +export type ExtractDateOutput = Date | null; + +export const extractDateTask: LlmTask = { + name: 'common.extractDate', + minTier: 'none', // works on Tier 0 thanks to the regex fallback + contentClass: 'personal', + displayLabel: 'Datum aus Text erkennen', + + async runLlm(input, backend: LlmBackend): Promise { + const refIso = (input.now ?? new Date()).toISOString(); + const result = await backend.generate({ + taskName: extractDateTask.name, + contentClass: extractDateTask.contentClass, + messages: [ + { + role: 'system', + content: + 'You extract date+time references from short user input. Always respond with strict JSON of the form {"iso":"YYYY-MM-DDTHH:MM:SSZ"} or {"iso":null}. No prose, no markdown.', + }, + { + role: 'user', + content: `Reference time: ${refIso}\nUser input: ${input.text}`, + }, + ], + temperature: 0, + maxTokens: 80, + }); + + try { + // Strip markdown fences if a less-disciplined model added them + const cleaned = result.content.replace(/```(?:json)?|```/g, '').trim(); + const parsed = JSON.parse(cleaned) as { iso: string | null }; + return parsed.iso ? new Date(parsed.iso) : null; + } catch { + return null; + } + }, + + async runRules(input): Promise { + const text = input.text.toLowerCase().trim(); + const now = input.now ?? new Date(); + + // "heute" / "today" + if (/\b(heute|today)\b/.test(text)) { + return withTime(new Date(now), text); + } + + // "morgen" / "tomorrow" + if (/\b(morgen|tomorrow)\b/.test(text)) { + const d = new Date(now); + d.setDate(d.getDate() + 1); + return withTime(d, text); + } + + // "übermorgen" / "day after tomorrow" + if (/\b(übermorgen|day after tomorrow)\b/.test(text)) { + const d = new Date(now); + d.setDate(d.getDate() + 2); + return withTime(d, text); + } + + // "in N tagen" / "in N days" + const inDays = text.match(/\bin (\d+) (tagen|days?)\b/); + if (inDays) { + const d = new Date(now); + d.setDate(d.getDate() + parseInt(inDays[1], 10)); + return withTime(d, text); + } + + // Explicit ISO date "2026-04-09" or "2026-04-09T14:00" + const iso = text.match(/(\d{4}-\d{2}-\d{2}(?:t\d{2}:\d{2}(?::\d{2})?)?)/); + if (iso) { + const d = new Date(iso[1]); + if (!Number.isNaN(d.getTime())) return d; + } + + return null; + }, +}; + +/** Apply a "HH:MM" or "HH Uhr" time hint to a date if found in the text. */ +function withTime(date: Date, text: string): Date { + const hhmm = text.match(/\b(\d{1,2}):(\d{2})\b/); + if (hhmm) { + date.setHours(parseInt(hhmm[1], 10), parseInt(hhmm[2], 10), 0, 0); + return date; + } + const hhUhr = text.match(/\b(\d{1,2})\s*uhr\b/); + if (hhUhr) { + date.setHours(parseInt(hhUhr[1], 10), 0, 0, 0); + return date; + } + // No time hint — keep the original time-of-day + return date; +} diff --git a/apps/mana/apps/web/src/lib/llm-tasks/summarize.ts b/apps/mana/apps/web/src/lib/llm-tasks/summarize.ts new file mode 100644 index 000000000..1c6927775 --- /dev/null +++ b/apps/mana/apps/web/src/lib/llm-tasks/summarize.ts @@ -0,0 +1,57 @@ +/** + * summarizeTextTask — produces a short summary of a longer piece of + * text. Used for things like dream entries, voice memo transcripts, + * meeting notes. + * + * Has NO runRules() implementation: a meaningful summary genuinely + * requires an LLM, and a fake "first sentence + ellipsis" fallback + * would mislead the user. Tasks without a runRules forces the user + * to actually pick a higher tier in settings — and the orchestrator's + * canRun() will return false for them when they're on Tier 0. + * + * minTier is set to 'browser' rather than 'mana-server' because Gemma + * 4 E2B handles short summarization tasks well in the browser. For + * very long inputs (>4k tokens) the task could escalate to + * mana-server via a per-task override. + */ + +import type { LlmBackend, LlmTask } from '@mana/shared-llm'; + +export interface SummarizeInput { + text: string; + /** Approximate target length in sentences. Default 3. */ + sentences?: number; +} + +export type SummarizeOutput = string; + +export const summarizeTextTask: LlmTask = { + name: 'common.summarize', + minTier: 'browser', // genuinely needs an LLM — no rules-based equivalent + contentClass: 'personal', + displayLabel: 'Text zusammenfassen', + + async runLlm(input, backend: LlmBackend): Promise { + const sentences = input.sentences ?? 3; + const result = await backend.generate({ + taskName: summarizeTextTask.name, + contentClass: summarizeTextTask.contentClass, + messages: [ + { + role: 'system', + content: `Du fasst Text in ${sentences} prägnanten Sätzen zusammen. Behalte die wichtigsten Fakten und Beschlüsse, lasse Füller weg. Kein Markdown, keine Aufzählungen, keine Vorrede — nur die Zusammenfassung.`, + }, + { role: 'user', content: input.text }, + ], + temperature: 0.3, + maxTokens: 500, + }); + + return result.content.trim(); + }, + + // No runRules — this task is impossible without an LLM. The + // orchestrator's canRun() will return false for users on Tier 0, + // and modules using this task should hide their summarize button + // when canRun() is false. +}; diff --git a/apps/mana/apps/web/src/lib/stores/workbench-scenes.svelte.ts b/apps/mana/apps/web/src/lib/stores/workbench-scenes.svelte.ts index 73e7a0cb7..65b8bb4ee 100644 --- a/apps/mana/apps/web/src/lib/stores/workbench-scenes.svelte.ts +++ b/apps/mana/apps/web/src/lib/stores/workbench-scenes.svelte.ts @@ -99,10 +99,9 @@ async function patchScene( id: string, patch: Partial> ) { - await db.table(TABLE).update(id, { - ...patch, - updatedAt: nowIso(), - }); + // Strip Svelte 5 $state proxies — IndexedDB's structured clone can't serialize them. + const clean = $state.snapshot({ ...patch, updatedAt: nowIso() }); + await db.table(TABLE).update(id, clean); } async function patchActiveScene(fn: (apps: WorkbenchSceneApp[]) => WorkbenchSceneApp[]) { @@ -110,7 +109,9 @@ async function patchActiveScene(fn: (apps: WorkbenchSceneApp[]) => WorkbenchScen if (!id) return; const current = scenesState.find((s) => s.id === id); if (!current) return; - await patchScene(id, { openApps: fn(current.openApps) }); + // Snapshot before handing to the mutator so callers operate on plain objects. + const plainApps = $state.snapshot(current.openApps) as WorkbenchSceneApp[]; + await patchScene(id, { openApps: fn(plainApps) }); } // ─── Public store ───────────────────────────────────────────── @@ -191,7 +192,7 @@ export const workbenchScenesStore = { id, name: opts.name.trim() || 'Neue Szene', icon: opts.icon, - openApps: opts.seedApps ? structuredClone(opts.seedApps) : [], + openApps: opts.seedApps ? ($state.snapshot(opts.seedApps) as WorkbenchSceneApp[]) : [], order: maxOrder + 1, createdAt: now, updatedAt: now, diff --git a/apps/mana/apps/web/src/routes/(app)/llm-test/+page.svelte b/apps/mana/apps/web/src/routes/(app)/llm-test/+page.svelte index 3b4a0f458..85d09e415 100644 --- a/apps/mana/apps/web/src/routes/(app)/llm-test/+page.svelte +++ b/apps/mana/apps/web/src/routes/(app)/llm-test/+page.svelte @@ -11,6 +11,16 @@ type ModelKey, } from '@mana/local-llm'; import { hasModelInCache } from '@mana/local-llm'; + import { + llmOrchestrator, + llmSettingsState, + updateLlmSettings, + ALL_TIERS, + tierLabel, + type LlmTier, + } from '@mana/shared-llm'; + import { extractDateTask } from '$lib/llm-tasks/extract-date'; + import { summarizeTextTask } from '$lib/llm-tasks/summarize'; import { marked } from 'marked'; import { Robot, Trash, PaperPlaneRight, ClockCounterClockwise } from '@mana/shared-icons'; @@ -43,7 +53,47 @@ // --- State --- let selectedModel: ModelKey = $state('gemma-4-e2b'); - let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' = $state('chat'); + let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' | 'router' = + $state('chat'); + + // --- Router tab state --- + const settings = $derived(llmSettingsState.current); + let routerInput = $state('Treffen mit Sara morgen 14:30'); + let routerRunning = $state(false); + let routerResult = $state<{ + value: unknown; + source: string; + latencyMs: number; + attempted: string[]; + } | null>(null); + let routerError = $state(null); + + function toggleAllowedTier(tier: LlmTier) { + const current = settings.allowedTiers; + const next = current.includes(tier) ? current.filter((t) => t !== tier) : [...current, tier]; + updateLlmSettings({ allowedTiers: next }); + } + + async function runRouterTask(task: typeof extractDateTask | typeof summarizeTextTask) { + routerRunning = true; + routerResult = null; + routerError = null; + try { + const input = task === extractDateTask ? { text: routerInput } : { text: routerInput }; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const result = await llmOrchestrator.run(task as any, input); + routerResult = { + value: result.value, + source: result.source, + latencyMs: result.latencyMs, + attempted: result.attempted, + }; + } catch (err) { + routerError = err instanceof Error ? `${err.name}: ${err.message}` : String(err); + } finally { + routerRunning = false; + } + } const supported = isLocalLlmSupported(); const status = getLocalLlmStatus(); @@ -581,7 +631,7 @@
- {#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }] as tab} + {#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }, { id: 'router', label: 'Router' }] as tab}
{/if} + + + {#if activeTab === 'router'} +
+
+

+ Smoke-Test für den tiered LLM-Router. Wähle welche Tiers der Orchestrator benutzen darf + — der Router wählt dann pro Task die erste passende Schicht aus deiner Liste. +

+ +
+
Erlaubte Tiers
+
+ {#each ALL_TIERS as tier} + {@const enabled = settings.allowedTiers.includes(tier)} + + {/each} +
+
+ Aktuell: {settings.allowedTiers.length === 0 + ? 'keine LLM-Tiers — nur Tier 0 (Regeln)' + : settings.allowedTiers.map(tierLabel).join(' → ')} +
+
+ + + +
+ + +
+ +
+ extractDate.canRun: {llmOrchestrator.canRun(extractDateTask)} · summarize.canRun: {llmOrchestrator.canRun( + summarizeTextTask + )} +
+
+ + {#if routerError} +
+
Task fehlgeschlagen
+
{routerError}
+
+ {/if} + + {#if routerResult} +
+
+ + {tierLabel(routerResult.source as LlmTier)} + + {routerResult.latencyMs} ms + {#if routerResult.attempted.length > 1} + (versucht: {routerResult.attempted.join(' → ')}) + {/if} +
+
{JSON.stringify(
+								routerResult.value,
+								null,
+								2
+							)}
+
+ {/if} +
+ {/if} {/if} diff --git a/docker/Dockerfile.sveltekit-base b/docker/Dockerfile.sveltekit-base index 066a6d763..4c0f1d140 100644 --- a/docker/Dockerfile.sveltekit-base +++ b/docker/Dockerfile.sveltekit-base @@ -67,6 +67,7 @@ COPY packages/credits ./packages/credits COPY packages/spiral-db ./packages/spiral-db COPY packages/wallpaper-generator ./packages/wallpaper-generator COPY packages/local-llm ./packages/local-llm +COPY packages/shared-llm ./packages/shared-llm # Install dependencies (shared packages only - app deps added later) RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \ diff --git a/packages/shared-llm/package.json b/packages/shared-llm/package.json index a4fc15cc8..f18c13d18 100644 --- a/packages/shared-llm/package.json +++ b/packages/shared-llm/package.json @@ -1,47 +1,26 @@ { "name": "@mana/shared-llm", - "version": "1.0.0", + "version": "2.0.0", "private": true, - "description": "Unified LLM client for all Mana backends via mana-llm service", - "main": "dist/index.js", - "types": "dist/index.d.ts", + "description": "Tiered LLM orchestrator for Mana — routes tasks across rules / browser-edge / mana-server / cloud backends with explicit user-controlled privacy tiers", + "main": "./src/index.ts", + "types": "./src/index.ts", "exports": { - ".": { - "types": "./dist/index.d.ts", - "import": "./dist/index.js", - "require": "./dist/index.js" - }, - "./standalone": { - "types": "./dist/standalone.d.ts", - "import": "./dist/standalone.js", - "require": "./dist/standalone.js" - } + ".": "./src/index.ts" }, "scripts": { - "build": "tsc", - "dev": "tsc --watch", - "clean": "rm -rf dist", "type-check": "tsc --noEmit", - "test": "vitest run" + "clean": "rm -rf dist" }, "dependencies": { - "@nestjs/common": "^10.0.0 || ^11.0.0", - "@nestjs/config": "^3.0.0 || ^4.0.0", - "@nestjs/core": "^10.0.0 || ^11.0.0", - "reflect-metadata": "^0.1.13 || ^0.2.0", - "rxjs": "^7.0.0" - }, - "peerDependencies": { - "@nestjs/common": "^10.0.0 || ^11.0.0", - "@nestjs/config": "^3.0.0 || ^4.0.0", - "@nestjs/core": "^10.0.0 || ^11.0.0" + "@mana/local-llm": "workspace:*" }, "devDependencies": { - "@types/node": "^20.0.0", - "typescript": "^5.0.0", - "vitest": "^4.1.2" + "@types/node": "^24.10.1", + "svelte": "^5.0.0", + "typescript": "^5.9.3" }, - "files": [ - "dist" - ] + "peerDependencies": { + "svelte": "^5.0.0" + } } diff --git a/packages/shared-llm/src/__tests__/json-extractor.spec.ts b/packages/shared-llm/src/__tests__/json-extractor.spec.ts deleted file mode 100644 index a67118c87..000000000 --- a/packages/shared-llm/src/__tests__/json-extractor.spec.ts +++ /dev/null @@ -1,119 +0,0 @@ -import { describe, it, expect } from 'vitest'; -import { extractJson } from '../utils/json-extractor'; - -describe('extractJson', () => { - it('parses direct JSON object', () => { - const result = extractJson('{"name": "test", "value": 42}'); - expect(result).toEqual({ name: 'test', value: 42 }); - }); - - it('parses direct JSON array', () => { - const result = extractJson('[1, 2, 3]'); - expect(result).toEqual([1, 2, 3]); - }); - - it('strips markdown json code fence', () => { - const input = '```json\n{"category": "bug", "title": "Fix login"}\n```'; - const result = extractJson(input); - expect(result).toEqual({ category: 'bug', title: 'Fix login' }); - }); - - it('strips markdown code fence without json label', () => { - const input = '```\n{"key": "value"}\n```'; - const result = extractJson(input); - expect(result).toEqual({ key: 'value' }); - }); - - it('extracts JSON from surrounding text', () => { - const input = - 'Here is the analysis:\n{"confidence": 0.95, "species": "Rose"}\nHope this helps!'; - const result = extractJson(input); - expect(result).toEqual({ confidence: 0.95, species: 'Rose' }); - }); - - it('extracts JSON array from surrounding text', () => { - const input = 'The items are: [1, 2, 3] as requested.'; - const result = extractJson(input); - expect(result).toEqual([1, 2, 3]); - }); - - it('handles nested JSON objects', () => { - const input = '{"outer": {"inner": {"deep": true}}, "list": [1, 2]}'; - const result = extractJson(input); - expect(result).toEqual({ outer: { inner: { deep: true } }, list: [1, 2] }); - }); - - it('handles JSON with escaped quotes in strings', () => { - const input = '{"text": "He said \\"hello\\""}'; - const result = extractJson(input); - expect(result).toEqual({ text: 'He said "hello"' }); - }); - - it('handles JSON with braces inside strings', () => { - const input = 'Result: {"code": "if (x) { return }"}'; - const result = extractJson(input); - expect(result).toEqual({ code: 'if (x) { return }' }); - }); - - it('trims whitespace before parsing', () => { - const input = ' \n {"key": "value"} \n '; - const result = extractJson(input); - expect(result).toEqual({ key: 'value' }); - }); - - it('applies validation function on success', () => { - const validate = (data: unknown) => { - const obj = data as { name: string }; - if (!obj.name) throw new Error('missing name'); - return obj; - }; - const result = extractJson('{"name": "test"}', validate); - expect(result).toEqual({ name: 'test' }); - }); - - it('throws when validation fails', () => { - const validate = (data: unknown) => { - const obj = data as { name?: string }; - if (!obj.name) throw new Error('missing name'); - return obj; - }; - expect(() => extractJson('{"value": 123}', validate)).toThrow(); - }); - - it('throws on completely invalid input', () => { - expect(() => extractJson('This is just plain text with no JSON')).toThrow( - 'Failed to extract JSON' - ); - }); - - it('throws on empty input', () => { - expect(() => extractJson('')).toThrow('Failed to extract JSON'); - }); - - it('handles real-world LLM response with preamble', () => { - const input = `Based on my analysis, here is the result: - -\`\`\`json -{ - "foods": [ - {"name": "Apple", "calories": 95, "protein": 0.5} - ], - "totalCalories": 95, - "confidence": 0.9 -} -\`\`\` - -This analysis is based on the image provided.`; - - const result = extractJson<{ foods: unknown[]; totalCalories: number }>(input); - expect(result.totalCalories).toBe(95); - expect(result.foods).toHaveLength(1); - }); - - it('prefers object over array when both exist', () => { - // Direct parse fails, fence fails, tries object first - const input = 'Some text {"key": "val"} and [1, 2, 3]'; - const result = extractJson(input); - expect(result).toEqual({ key: 'val' }); - }); -}); diff --git a/packages/shared-llm/src/__tests__/llm-client.spec.ts b/packages/shared-llm/src/__tests__/llm-client.spec.ts deleted file mode 100644 index 2ee65e8e0..000000000 --- a/packages/shared-llm/src/__tests__/llm-client.spec.ts +++ /dev/null @@ -1,277 +0,0 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { LlmClient } from '../llm-client'; -import type { ResolvedLlmOptions } from '../interfaces/llm-options.interface'; -import type { ChatCompletionResponse } from '../types/openai-compat.types'; - -const mockFetch = vi.fn(); -vi.stubGlobal('fetch', mockFetch); - -const DEFAULT_OPTIONS: ResolvedLlmOptions = { - manaLlmUrl: 'http://localhost:3025', - defaultModel: 'ollama/gemma3:4b', - defaultVisionModel: 'ollama/llava:7b', - timeout: 30_000, - maxRetries: 0, // No retries in tests for simplicity - debug: false, -}; - -function mockCompletionResponse( - content: string, - model = 'ollama/gemma3:4b' -): ChatCompletionResponse { - return { - id: 'chatcmpl-test123', - object: 'chat.completion', - created: Date.now(), - model, - choices: [{ index: 0, message: { role: 'assistant', content }, finish_reason: 'stop' }], - usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 }, - }; -} - -function mockFetchOk(body: unknown): void { - mockFetch.mockResolvedValueOnce({ - ok: true, - status: 200, - json: () => Promise.resolve(body), - text: () => Promise.resolve(JSON.stringify(body)), - } as unknown as Response); -} - -function mockFetchError(status: number, body = ''): void { - mockFetch.mockResolvedValueOnce({ - ok: false, - status, - statusText: `Error ${status}`, - json: () => Promise.resolve({}), - text: () => Promise.resolve(body), - } as unknown as Response); -} - -describe('LlmClient', () => { - let client: LlmClient; - - beforeEach(() => { - vi.clearAllMocks(); - client = new LlmClient(DEFAULT_OPTIONS); - }); - - describe('chat', () => { - it('sends correct request body', async () => { - mockFetchOk(mockCompletionResponse('Hello!')); - - await client.chat('Hi there'); - - expect(mockFetch).toHaveBeenCalledTimes(1); - const [url, init] = mockFetch.mock.calls[0]; - expect(url).toBe('http://localhost:3025/v1/chat/completions'); - - const body = JSON.parse(init.body); - expect(body.model).toBe('ollama/gemma3:4b'); - expect(body.messages).toEqual([{ role: 'user', content: 'Hi there' }]); - expect(body.stream).toBe(false); - }); - - it('includes system prompt when provided', async () => { - mockFetchOk(mockCompletionResponse('Response')); - - await client.chat('Question', { systemPrompt: 'You are helpful.' }); - - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.messages).toEqual([ - { role: 'system', content: 'You are helpful.' }, - { role: 'user', content: 'Question' }, - ]); - }); - - it('uses custom model and temperature', async () => { - mockFetchOk(mockCompletionResponse('Response')); - - await client.chat('Prompt', { model: 'openrouter/gpt-4o', temperature: 0.3 }); - - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.model).toBe('openrouter/gpt-4o'); - expect(body.temperature).toBe(0.3); - }); - - it('returns ChatResult with content and usage', async () => { - mockFetchOk(mockCompletionResponse('Generated text')); - - const result = await client.chat('Prompt'); - - expect(result.content).toBe('Generated text'); - expect(result.model).toBe('ollama/gemma3:4b'); - expect(result.usage.total_tokens).toBe(30); - expect(result.latencyMs).toBeGreaterThanOrEqual(0); - }); - - it('throws on error response', async () => { - mockFetchError(500, 'Internal Server Error'); - - await expect(client.chat('Prompt')).rejects.toThrow('mana-llm error 500'); - }); - }); - - describe('json', () => { - it('extracts JSON from response', async () => { - mockFetchOk(mockCompletionResponse('{"category": "bug", "title": "Fix it"}')); - - const result = await client.json<{ category: string; title: string }>('Analyze this'); - - expect(result.data).toEqual({ category: 'bug', title: 'Fix it' }); - expect(result.content).toBe('{"category": "bug", "title": "Fix it"}'); - }); - - it('extracts JSON from markdown-wrapped response', async () => { - mockFetchOk(mockCompletionResponse('```json\n{"key": "value"}\n```')); - - const result = await client.json('Parse this'); - expect(result.data).toEqual({ key: 'value' }); - }); - - it('applies validation function', async () => { - mockFetchOk(mockCompletionResponse('{"name": "test"}')); - - const validate = (data: unknown) => { - const obj = data as { name: string }; - if (typeof obj.name !== 'string') throw new Error('invalid'); - return obj; - }; - - const result = await client.json('Prompt', { validate }); - expect(result.data.name).toBe('test'); - }); - - it('retries JSON extraction on parse failure', async () => { - // First attempt returns bad JSON, second returns good - mockFetchOk(mockCompletionResponse('not json at all')); - mockFetchOk(mockCompletionResponse('{"valid": true}')); - - const result = await client.json('Prompt', { jsonRetries: 1 }); - expect(result.data).toEqual({ valid: true }); - expect(mockFetch).toHaveBeenCalledTimes(2); - }); - }); - - describe('vision', () => { - it('builds multimodal message with base64 image', async () => { - mockFetchOk(mockCompletionResponse('A rose')); - - await client.vision('What is this?', 'abc123base64', 'image/jpeg'); - - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.model).toBe('ollama/llava:7b'); - expect(body.messages[0].content).toEqual([ - { type: 'text', text: 'What is this?' }, - { type: 'image_url', image_url: { url: 'data:image/jpeg;base64,abc123base64' } }, - ]); - }); - - it('uses data URL as-is if already formatted', async () => { - mockFetchOk(mockCompletionResponse('A cat')); - - await client.vision('What?', 'data:image/png;base64,xyz'); - - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - const imageUrl = body.messages[0].content[1].image_url.url; - expect(imageUrl).toBe('data:image/png;base64,xyz'); - }); - - it('uses custom vision model when specified', async () => { - mockFetchOk(mockCompletionResponse('Result')); - - await client.vision('Prompt', 'img', 'image/jpeg', { - visionModel: 'ollama/qwen3-vl:4b', - }); - - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.model).toBe('ollama/qwen3-vl:4b'); - }); - }); - - describe('visionJson', () => { - it('extracts JSON from vision response', async () => { - mockFetchOk(mockCompletionResponse('```json\n{"species": "Rose", "confidence": 0.95}\n```')); - - const result = await client.visionJson<{ species: string }>( - 'Identify plant', - 'imgdata', - 'image/jpeg' - ); - - expect(result.data.species).toBe('Rose'); - }); - }); - - describe('health', () => { - it('returns health status', async () => { - mockFetch.mockResolvedValueOnce({ - ok: true, - status: 200, - json: () => - Promise.resolve({ - status: 'healthy', - providers: { ollama: { status: 'healthy' } }, - }), - } as unknown as Response); - - const health = await client.health(); - expect(health.status).toBe('healthy'); - }); - - it('returns unhealthy on network error', async () => { - mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED')); - - const health = await client.health(); - expect(health.status).toBe('unhealthy'); - }); - }); - - describe('listModels', () => { - it('returns model list', async () => { - mockFetch.mockResolvedValueOnce({ - ok: true, - status: 200, - json: () => - Promise.resolve({ - data: [{ id: 'ollama/gemma3:4b', object: 'model', created: 0, owned_by: 'ollama' }], - }), - } as unknown as Response); - - const models = await client.listModels(); - expect(models).toHaveLength(1); - expect(models[0].id).toBe('ollama/gemma3:4b'); - }); - }); - - describe('chatMessages', () => { - it('sends full message history', async () => { - mockFetchOk(mockCompletionResponse('Answer')); - - await client.chatMessages([ - { role: 'system', content: 'Be brief.' }, - { role: 'user', content: 'Hello' }, - { role: 'assistant', content: 'Hi!' }, - { role: 'user', content: 'How are you?' }, - ]); - - const body = JSON.parse(mockFetch.mock.calls[0][1].body); - expect(body.messages).toHaveLength(4); - }); - }); - - describe('embed', () => { - it('sends embedding request', async () => { - mockFetchOk({ - object: 'list', - data: [{ object: 'embedding', index: 0, embedding: [0.1, 0.2, 0.3] }], - model: 'ollama/gemma3:4b', - usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 }, - }); - - const result = await client.embed('Hello world'); - expect(result.embeddings).toHaveLength(1); - expect(result.embeddings[0]).toEqual([0.1, 0.2, 0.3]); - }); - }); -}); diff --git a/packages/shared-llm/src/__tests__/retry.spec.ts b/packages/shared-llm/src/__tests__/retry.spec.ts deleted file mode 100644 index 80122e165..000000000 --- a/packages/shared-llm/src/__tests__/retry.spec.ts +++ /dev/null @@ -1,118 +0,0 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { retryFetch } from '../utils/retry'; - -// Mock global fetch -const mockFetch = vi.fn(); -vi.stubGlobal('fetch', mockFetch); - -function mockResponse(status: number, body = ''): Response { - return { - ok: status >= 200 && status < 300, - status, - statusText: `Status ${status}`, - text: () => Promise.resolve(body), - json: () => Promise.resolve(JSON.parse(body || '{}')), - headers: new Headers(), - } as unknown as Response; -} - -describe('retryFetch', () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - it('returns on first successful attempt', async () => { - mockFetch.mockResolvedValueOnce(mockResponse(200, '{"ok": true}')); - - const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 }); - expect(response.ok).toBe(true); - expect(mockFetch).toHaveBeenCalledTimes(1); - }); - - it('retries on 503 and succeeds', async () => { - mockFetch - .mockResolvedValueOnce(mockResponse(503)) - .mockResolvedValueOnce(mockResponse(200, '{}')); - - const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 }); - expect(response.ok).toBe(true); - expect(mockFetch).toHaveBeenCalledTimes(2); - }); - - it('retries on 429 rate limit', async () => { - mockFetch - .mockResolvedValueOnce(mockResponse(429)) - .mockResolvedValueOnce(mockResponse(200, '{}')); - - const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 }); - expect(response.ok).toBe(true); - expect(mockFetch).toHaveBeenCalledTimes(2); - }); - - it('retries on network error and succeeds', async () => { - mockFetch - .mockRejectedValueOnce(new Error('ECONNREFUSED')) - .mockResolvedValueOnce(mockResponse(200, '{}')); - - const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 }); - expect(response.ok).toBe(true); - expect(mockFetch).toHaveBeenCalledTimes(2); - }); - - it('does NOT retry on 400 client error', async () => { - mockFetch.mockResolvedValueOnce(mockResponse(400, 'Bad Request')); - - const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 }); - expect(response.status).toBe(400); - expect(mockFetch).toHaveBeenCalledTimes(1); - }); - - it('does NOT retry on 401 unauthorized', async () => { - mockFetch.mockResolvedValueOnce(mockResponse(401)); - - const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 }); - expect(response.status).toBe(401); - expect(mockFetch).toHaveBeenCalledTimes(1); - }); - - it('does NOT retry on 404 not found', async () => { - mockFetch.mockResolvedValueOnce(mockResponse(404)); - - const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 }); - expect(response.status).toBe(404); - expect(mockFetch).toHaveBeenCalledTimes(1); - }); - - it('throws after exhausting all retries', async () => { - mockFetch - .mockResolvedValueOnce(mockResponse(503)) - .mockResolvedValueOnce(mockResponse(503)) - .mockResolvedValueOnce(mockResponse(503)); - - await expect(retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 })).rejects.toThrow( - 'HTTP 503' - ); - - expect(mockFetch).toHaveBeenCalledTimes(3); // 1 initial + 2 retries - }); - - it('throws after exhausting retries on network errors', async () => { - mockFetch - .mockRejectedValueOnce(new Error('ECONNREFUSED')) - .mockRejectedValueOnce(new Error('ECONNREFUSED')); - - await expect(retryFetch('http://test', {}, { maxRetries: 1, baseDelay: 10 })).rejects.toThrow( - 'ECONNREFUSED' - ); - - expect(mockFetch).toHaveBeenCalledTimes(2); - }); - - it('works with maxRetries: 0 (no retries)', async () => { - mockFetch.mockResolvedValueOnce(mockResponse(503)); - - await expect(retryFetch('http://test', {}, { maxRetries: 0, baseDelay: 10 })).rejects.toThrow(); - - expect(mockFetch).toHaveBeenCalledTimes(1); - }); -}); diff --git a/packages/shared-llm/src/backends/browser.ts b/packages/shared-llm/src/backends/browser.ts new file mode 100644 index 000000000..9c6f44781 --- /dev/null +++ b/packages/shared-llm/src/backends/browser.ts @@ -0,0 +1,62 @@ +/** + * Browser-edge backend — wraps @mana/local-llm. + * + * Inference happens 100% on the user's device via WebGPU. The model + * (currently Gemma 4 E2B) is a one-time ~500 MB download cached in the + * browser. We do NOT auto-load on backend creation; the user has to + * explicitly trigger a load via the settings page or by using a feature + * that calls `ensureLoaded()`. This avoids surprising 500 MB downloads. + */ + +import { + localLLM, + LocalLLMEngine, + loadLocalLlm, + type ChatMessage as LocalChatMessage, +} from '@mana/local-llm'; +import { EdgeLoadFailedError } from '../errors'; +import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types'; + +export class BrowserBackend implements LlmBackend { + readonly tier = 'browser' as const; + + isAvailable(): boolean { + return LocalLLMEngine.isSupported(); + } + + isReady(): boolean { + return localLLM.isReady; + } + + /** Trigger the one-time model download + WebGPU initialization. + * Idempotent — safe to call repeatedly. Throws EdgeLoadFailedError + * on failure (model corrupt, WebGPU OOM, etc.). */ + async ensureLoaded(): Promise { + try { + await loadLocalLlm(); + } catch (err) { + throw new EdgeLoadFailedError(err instanceof Error ? err.message : String(err)); + } + } + + async generate(req: LlmTaskRequest): Promise { + await this.ensureLoaded(); + + const result = await localLLM.generate({ + messages: req.messages as LocalChatMessage[], + temperature: req.temperature, + maxTokens: req.maxTokens, + onToken: req.onToken, + }); + + return { + content: result.content, + usage: { + promptTokens: result.usage.prompt_tokens, + completionTokens: result.usage.completion_tokens, + totalTokens: result.usage.total_tokens, + }, + latencyMs: result.latencyMs, + }; + } +} diff --git a/packages/shared-llm/src/backends/cloud.ts b/packages/shared-llm/src/backends/cloud.ts new file mode 100644 index 000000000..9c5e35fd9 --- /dev/null +++ b/packages/shared-llm/src/backends/cloud.ts @@ -0,0 +1,44 @@ +/** + * Cloud backend — calls services/mana-llm with a `google/...` model + * string. mana-llm's ProviderRouter recognizes the `google/` prefix + * and routes to its Google Gemini provider, which holds the API key + * server-side (we never expose the key to the browser). + * + * Default model is google/gemini-2.0-flash. The mana-llm google.py + * provider also supports gemini-2.5-pro for higher-quality calls but + * 2.0-flash is the right default — fast, cheap, multimodal, plenty + * good for the kind of structured-output tasks Mana modules need. + * + * Cloud is gated by `cloudConsentGiven` in LlmSettings — even if a + * user has 'cloud' in their allowedTiers, the orchestrator will skip + * this backend until they've ticked the consent checkbox once. + */ + +import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types'; +import { callManaLlmStreaming, resolveLlmBaseUrl } from './remote'; + +export interface CloudBackendOptions { + /** Gemini model to send. Default 'google/gemini-2.0-flash'. */ + defaultModel?: string; +} + +export class CloudBackend implements LlmBackend { + readonly tier = 'cloud' as const; + private readonly defaultModel: string; + + constructor(opts: CloudBackendOptions = {}) { + this.defaultModel = opts.defaultModel ?? 'google/gemini-2.0-flash'; + } + + isAvailable(): boolean { + return resolveLlmBaseUrl().length > 0; + } + + isReady(): boolean { + return this.isAvailable(); + } + + async generate(req: LlmTaskRequest): Promise { + return callManaLlmStreaming(this.tier, this.defaultModel, req); + } +} diff --git a/packages/shared-llm/src/backends/mana-server.ts b/packages/shared-llm/src/backends/mana-server.ts new file mode 100644 index 000000000..652c7969d --- /dev/null +++ b/packages/shared-llm/src/backends/mana-server.ts @@ -0,0 +1,43 @@ +/** + * Mana-server backend — calls services/mana-llm with an Ollama model + * string. mana-llm's ProviderRouter recognizes plain Ollama model names + * (no provider prefix) and routes them to the local Ollama instance on + * the Mac Mini, with automatic Gemini fallback if Ollama is overloaded. + * + * The default model is gemma3:4b — same model family as the browser + * tier (Gemma 4 E2B is the smaller sibling), so prompts behave + * consistently when a task auto-falls between tiers. + */ + +import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types'; +import { callManaLlmStreaming, resolveLlmBaseUrl } from './remote'; + +export interface ManaServerBackendOptions { + /** Ollama model name to send to mana-llm. Default 'gemma3:4b'. */ + defaultModel?: string; +} + +export class ManaServerBackend implements LlmBackend { + readonly tier = 'mana-server' as const; + private readonly defaultModel: string; + + constructor(opts: ManaServerBackendOptions = {}) { + this.defaultModel = opts.defaultModel ?? 'gemma3:4b'; + } + + isAvailable(): boolean { + // Available if we have a base URL configured at all. We don't + // ping /health here — that adds latency to every isAvailable() + // check. The first real call will fail loudly if mana-llm is down. + return resolveLlmBaseUrl().length > 0; + } + + isReady(): boolean { + // Stateless from our side — assume ready if available. + return this.isAvailable(); + } + + async generate(req: LlmTaskRequest): Promise { + return callManaLlmStreaming(this.tier, this.defaultModel, req); + } +} diff --git a/packages/shared-llm/src/backends/remote.ts b/packages/shared-llm/src/backends/remote.ts new file mode 100644 index 000000000..98161c445 --- /dev/null +++ b/packages/shared-llm/src/backends/remote.ts @@ -0,0 +1,135 @@ +/** + * Shared HTTP transport for the mana-server and cloud backends. + * + * Both tiers POST to the same OpenAI-compatible endpoint on + * services/mana-llm — they only differ in the `model:` string they + * send (which selects which provider mana-llm internally routes to). + * + * The endpoint is `/v1/chat/completions` and the wire format is + * straight OpenAI SSE: `data: {…}\n\n` lines, terminated by + * `data: [DONE]`. The hand-rolled parser is the same shape as the + * existing playground client (apps/mana/apps/web/src/lib/modules/ + * playground/llm.ts) so the two consumers stay aligned and can be + * unified later if we want. + */ + +import { BackendUnreachableError, ProviderBlockedError } from '../errors'; +import type { LlmTier } from '../tiers'; +import type { GenerateResult, LlmTaskRequest } from '../types'; + +const DEFAULT_LLM_URL = 'http://localhost:3025'; + +/** Resolve the mana-llm base URL from the window-injected env, falling + * back to localhost. Mirrors the playground client pattern. */ +export function resolveLlmBaseUrl(): string { + if (typeof window !== 'undefined') { + const fromWindow = (window as unknown as { __PUBLIC_MANA_LLM_URL__?: string }) + .__PUBLIC_MANA_LLM_URL__; + if (fromWindow) return fromWindow.replace(/\/$/, ''); + } + return DEFAULT_LLM_URL; +} + +/** + * Send a chat completion to mana-llm and yield streaming token deltas. + * The caller is responsible for assembling the final string and tracking + * latency. + * + * `tier` is only used for error tagging — both 'mana-server' and 'cloud' + * call the same endpoint with different model strings. + */ +export async function callManaLlmStreaming( + tier: Exclude, + model: string, + req: LlmTaskRequest +): Promise { + const url = `${resolveLlmBaseUrl()}/v1/chat/completions`; + const start = performance.now(); + + let res: Response; + try { + res = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + credentials: 'include', // forwards the Mana auth cookie if present + body: JSON.stringify({ + model, + messages: req.messages, + temperature: req.temperature ?? 0.7, + max_tokens: req.maxTokens ?? 1024, + stream: true, + }), + }); + } catch (err) { + // Network failure — DNS, refused connection, CORS preflight, etc. + throw new BackendUnreachableError( + tier, + undefined, + err instanceof Error ? err.message : String(err) + ); + } + + if (!res.ok || !res.body) { + const text = await res.text().catch(() => ''); + // 451 = upstream blocked content (we use this convention; Gemini + // safety blocks are mapped to 451 in mana-llm's google provider). + // Other 4xx/5xx are generic server errors. + if (res.status === 451 || /safety|blocked|filter/i.test(text)) { + throw new ProviderBlockedError(tier, text || `HTTP ${res.status}`); + } + throw new BackendUnreachableError(tier, res.status, text); + } + + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + let collected = ''; + let promptTokens = 0; + let completionTokens = 0; + + while (true) { + const { value, done } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + + // SSE frames are separated by blank lines. + let sep: number; + while ((sep = buffer.indexOf('\n\n')) !== -1) { + const frame = buffer.slice(0, sep); + buffer = buffer.slice(sep + 2); + + for (const line of frame.split('\n')) { + if (!line.startsWith('data:')) continue; + const data = line.slice(5).trim(); + if (!data || data === '[DONE]') continue; + try { + const json = JSON.parse(data) as { + choices?: Array<{ delta?: { content?: string } }>; + usage?: { prompt_tokens?: number; completion_tokens?: number }; + }; + const delta = json.choices?.[0]?.delta?.content; + if (delta) { + collected += delta; + req.onToken?.(delta); + } + if (json.usage) { + promptTokens = json.usage.prompt_tokens ?? promptTokens; + completionTokens = json.usage.completion_tokens ?? completionTokens; + } + } catch { + // Malformed frame — keepalive comment, skip silently. + } + } + } + } + + return { + content: collected, + usage: { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }, + latencyMs: Math.round(performance.now() - start), + }; +} diff --git a/packages/shared-llm/src/errors.ts b/packages/shared-llm/src/errors.ts new file mode 100644 index 000000000..dd0520cd4 --- /dev/null +++ b/packages/shared-llm/src/errors.ts @@ -0,0 +1,80 @@ +/** + * Typed error classes for the LLM orchestrator. UI code can `instanceof` + * these to render task-appropriate failure states (retry button, switch + * tier prompt, "blocked by safety filter" notice, etc.). + */ + +import type { LlmTier } from './tiers'; + +export class LlmError extends Error { + constructor(message: string) { + super(message); + this.name = 'LlmError'; + } +} + +/** No tier from the user's preference list was able to run the task. */ +export class NoTierAvailableError extends LlmError { + constructor( + public readonly taskName: string, + public readonly attempted: LlmTier[] + ) { + super(`No tier could run task '${taskName}' (attempted: ${attempted.join(', ') || 'none'})`); + this.name = 'NoTierAvailableError'; + } +} + +/** The user's chosen tier is below the task's declared minimum tier. */ +export class TierTooLowError extends LlmError { + constructor( + public readonly taskName: string, + public readonly requiredTier: LlmTier, + public readonly userTier: LlmTier + ) { + super( + `Task '${taskName}' requires tier '${requiredTier}' but user is on '${userTier}'. Activate the higher tier in settings.` + ); + this.name = 'TierTooLowError'; + } +} + +/** + * The upstream provider blocked the content (e.g. Gemini safety filter, + * OpenAI moderation). The UI should offer "retry" + "switch to another + * provider" options to the user — this is NOT auto-recoverable because + * a different provider might allow the same content (or might not). + */ +export class ProviderBlockedError extends LlmError { + constructor( + public readonly tier: LlmTier, + public readonly providerMessage: string + ) { + super(`Provider '${tier}' blocked the request: ${providerMessage}`); + this.name = 'ProviderBlockedError'; + } +} + +/** Network/server error from a remote tier (mana-server, cloud). */ +export class BackendUnreachableError extends LlmError { + constructor( + public readonly tier: LlmTier, + public readonly httpStatus?: number, + details?: string + ) { + super( + `Backend '${tier}' is unreachable${httpStatus ? ` (HTTP ${httpStatus})` : ''}${details ? `: ${details}` : ''}` + ); + this.name = 'BackendUnreachableError'; + } +} + +/** + * The browser tier specifically failed to load — model download + * interrupted, WebGPU adapter request failed, OOM, etc. + */ +export class EdgeLoadFailedError extends LlmError { + constructor(public readonly cause: string) { + super(`Edge LLM failed to load: ${cause}`); + this.name = 'EdgeLoadFailedError'; + } +} diff --git a/packages/shared-llm/src/index.ts b/packages/shared-llm/src/index.ts index f0a95fe93..dc36aedb9 100644 --- a/packages/shared-llm/src/index.ts +++ b/packages/shared-llm/src/index.ts @@ -1,39 +1,44 @@ -// Module -export { LlmModule } from './llm.module'; -export { LlmClientService } from './llm-client.service'; -export { LLM_MODULE_OPTIONS } from './llm.constants'; - -// Core client (for advanced use cases) -export { LlmClient } from './llm-client'; - -// Interfaces -export type { - LlmModuleOptions, - LlmModuleAsyncOptions, - LlmOptionsFactory, - ResolvedLlmOptions, -} from './interfaces'; -export { resolveOptions } from './interfaces'; - -// Types +// Tiers + types +export { ALL_TIERS, TIER_RANK, tierLabel, type LlmTier } from './tiers'; export type { + CapabilityRequirements, ChatMessage, - ContentPart, - TextContentPart, - ImageContentPart, - ChatOptions, - JsonOptions, - VisionOptions, - TokenUsage, - ChatResult, - JsonResult, - ModelInfo, - HealthStatus, + ContentClass, + GenerateOptions, + GenerateResult, + LlmBackend, + LlmSettings, + LlmTaskRequest, + LlmTaskResult, } from './types'; +export { DEFAULT_LLM_SETTINGS } from './types'; -// Utilities -export { extractJson } from './utils'; +// Errors +export { + BackendUnreachableError, + EdgeLoadFailedError, + LlmError, + NoTierAvailableError, + ProviderBlockedError, + TierTooLowError, +} from './errors'; -// Metrics -export { LlmMetricsCollector } from './utils'; -export type { LlmRequestMetrics, MetricsCallback } from './utils'; +// Task contract +export { buildTaskRequest, type LlmTask } from './task'; + +// Orchestrator (rarely instantiated directly — most consumers use the +// store's singleton instead) +export { LlmOrchestrator, type LlmOrchestratorOptions } from './orchestrator'; + +// Backends (exported for tests + custom orchestrator setups) +export { BrowserBackend } from './backends/browser'; +export { CloudBackend, type CloudBackendOptions } from './backends/cloud'; +export { ManaServerBackend, type ManaServerBackendOptions } from './backends/mana-server'; + +// Singleton store + Svelte 5 reactive hooks +export { + llmOrchestrator, + llmSettingsState, + updateLlmSettings, + useTaskAvailability, +} from './store.svelte'; diff --git a/packages/shared-llm/src/interfaces/index.ts b/packages/shared-llm/src/interfaces/index.ts deleted file mode 100644 index 5159eabfe..000000000 --- a/packages/shared-llm/src/interfaces/index.ts +++ /dev/null @@ -1,8 +0,0 @@ -export type { - LlmModuleOptions, - LlmModuleAsyncOptions, - LlmOptionsFactory, - ResolvedLlmOptions, -} from './llm-options.interface'; - -export { resolveOptions } from './llm-options.interface'; diff --git a/packages/shared-llm/src/interfaces/llm-options.interface.ts b/packages/shared-llm/src/interfaces/llm-options.interface.ts deleted file mode 100644 index 75eb78737..000000000 --- a/packages/shared-llm/src/interfaces/llm-options.interface.ts +++ /dev/null @@ -1,52 +0,0 @@ -import type { ModuleMetadata, Type } from '@nestjs/common'; -import type { MetricsCallback } from '../utils/metrics'; - -export interface LlmModuleOptions { - /** mana-llm service URL (default: http://localhost:3025) */ - manaLlmUrl?: string; - /** Default text model (default: ollama/gemma3:4b) */ - defaultModel?: string; - /** Default vision model (default: ollama/llava:7b) */ - defaultVisionModel?: string; - /** Request timeout in ms (default: 120000) */ - timeout?: number; - /** Max retries on transient failures (default: 2) */ - maxRetries?: number; - /** Enable debug logging (default: false) */ - debug?: boolean; - /** Optional callback invoked after every LLM request with metrics */ - onMetrics?: MetricsCallback; -} - -export interface LlmModuleAsyncOptions extends Pick { - useExisting?: Type; - useClass?: Type; - useFactory?: (...args: any[]) => Promise | LlmModuleOptions; - inject?: any[]; -} - -export interface LlmOptionsFactory { - createLlmOptions(): Promise | LlmModuleOptions; -} - -export interface ResolvedLlmOptions { - manaLlmUrl: string; - defaultModel: string; - defaultVisionModel: string; - timeout: number; - maxRetries: number; - debug: boolean; - onMetrics?: MetricsCallback; -} - -export function resolveOptions(options: LlmModuleOptions): ResolvedLlmOptions { - return { - manaLlmUrl: options.manaLlmUrl ?? 'http://localhost:3025', - defaultModel: options.defaultModel ?? 'ollama/gemma3:4b', - defaultVisionModel: options.defaultVisionModel ?? 'ollama/llava:7b', - timeout: options.timeout ?? 120_000, - maxRetries: options.maxRetries ?? 2, - debug: options.debug ?? false, - onMetrics: options.onMetrics, - }; -} diff --git a/packages/shared-llm/src/llm-client.service.ts b/packages/shared-llm/src/llm-client.service.ts deleted file mode 100644 index 17d210ea9..000000000 --- a/packages/shared-llm/src/llm-client.service.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { Inject, Injectable } from '@nestjs/common'; -import { LlmClient } from './llm-client'; -import { LLM_MODULE_OPTIONS } from './llm.constants'; -import type { LlmModuleOptions } from './interfaces/llm-options.interface'; -import { resolveOptions } from './interfaces/llm-options.interface'; - -/** - * NestJS injectable wrapper around LlmClient. - * All logic lives in the framework-agnostic LlmClient base class. - */ -@Injectable() -export class LlmClientService extends LlmClient { - constructor(@Inject(LLM_MODULE_OPTIONS) options: LlmModuleOptions) { - super(resolveOptions(options)); - } -} diff --git a/packages/shared-llm/src/llm-client.ts b/packages/shared-llm/src/llm-client.ts deleted file mode 100644 index d4f6d794e..000000000 --- a/packages/shared-llm/src/llm-client.ts +++ /dev/null @@ -1,392 +0,0 @@ -/** - * Framework-agnostic LLM client that communicates with the mana-llm service. - * - * This is the core implementation shared between the NestJS LlmClientService - * and the standalone LlmClient export (for non-NestJS consumers like bot-services). - */ - -import type { ResolvedLlmOptions } from './interfaces/llm-options.interface'; -import type { - ChatMessage, - ChatOptions, - ChatResult, - JsonOptions, - JsonResult, - VisionOptions, - TokenUsage, - ModelInfo, - HealthStatus, -} from './types/chat.types'; -import type { - ChatCompletionRequest, - ChatCompletionResponse, - EmbeddingResponse, -} from './types/openai-compat.types'; -import type { LlmRequestMetrics } from './utils/metrics'; -import { extractJson } from './utils/json-extractor'; -import { retryFetch } from './utils/retry'; - -function createTimeoutSignal(ms: number): any { - const controller = new AbortController(); - setTimeout(() => controller.abort(), ms); - return controller.signal; -} - -export class LlmClient { - private readonly baseUrl: string; - private readonly options: ResolvedLlmOptions; - - constructor(options: ResolvedLlmOptions) { - this.options = options; - this.baseUrl = options.manaLlmUrl.replace(/\/+$/, ''); - } - - // --------------------------------------------------------------------------- - // Text Chat - // --------------------------------------------------------------------------- - - /** Simple chat with a single prompt string. */ - async chat(prompt: string, opts?: ChatOptions): Promise { - const messages = this.buildMessages(prompt, opts?.systemPrompt); - return this.chatMessages(messages, opts); - } - - /** Chat with full message history. */ - async chatMessages(messages: ChatMessage[], opts?: ChatOptions): Promise { - const requestedModel = opts?.model ?? this.options.defaultModel; - const body = this.buildRequest(messages, opts, false); - const start = Date.now(); - - try { - const response = await this.fetchCompletion(body, opts?.timeout); - const latencyMs = Date.now() - start; - const usage = response.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }; - - this.emitMetrics({ - model: requestedModel, - actualModel: response.model, - type: 'chat', - latencyMs, - promptTokens: usage.prompt_tokens, - completionTokens: usage.completion_tokens, - totalTokens: usage.total_tokens, - wasFallback: response.model !== requestedModel && !response.model.endsWith(requestedModel), - success: true, - }); - - return { - content: response.choices[0]?.message?.content ?? '', - model: response.model, - usage, - latencyMs, - }; - } catch (error) { - this.emitMetrics({ - model: requestedModel, - actualModel: requestedModel, - type: 'chat', - latencyMs: Date.now() - start, - promptTokens: 0, - completionTokens: 0, - totalTokens: 0, - wasFallback: false, - success: false, - error: error instanceof Error ? error.message : String(error), - }); - throw error; - } - } - - // --------------------------------------------------------------------------- - // Streaming - // --------------------------------------------------------------------------- - - /** Streaming chat - returns an async iterable of text tokens. */ - async *chatStream(prompt: string, opts?: ChatOptions): AsyncIterable { - const messages = this.buildMessages(prompt, opts?.systemPrompt); - yield* this.chatStreamMessages(messages, opts); - } - - /** Streaming chat with full message history. */ - async *chatStreamMessages(messages: ChatMessage[], opts?: ChatOptions): AsyncIterable { - const body = this.buildRequest(messages, opts, true); - const timeout = opts?.timeout ?? this.options.timeout; - - const response = await retryFetch( - `${this.baseUrl}/v1/chat/completions`, - { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(body), - signal: createTimeoutSignal(timeout), - }, - { maxRetries: this.options.maxRetries } - ); - - if (!response.ok) { - const text = await response.text().catch(() => ''); - throw new Error(`mana-llm stream error ${response.status}: ${text}`); - } - - if (!response.body) { - throw new Error('mana-llm returned no response body for stream'); - } - - const reader = response.body.getReader(); - const decoder = new TextDecoder(); - let buffer = ''; - - try { - while (true) { - const { done, value } = await reader.read(); - if (done) break; - - buffer += decoder.decode(value, { stream: true }); - const lines = buffer.split('\n'); - buffer = lines.pop() ?? ''; - - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed || !trimmed.startsWith('data: ')) continue; - - const data = trimmed.slice(6); - if (data === '[DONE]') return; - - try { - const chunk = JSON.parse(data); - const content = chunk.choices?.[0]?.delta?.content; - if (content) yield content; - } catch { - // Skip unparseable chunks - } - } - } - } finally { - reader.releaseLock(); - } - } - - // --------------------------------------------------------------------------- - // Structured JSON Output - // --------------------------------------------------------------------------- - - /** Chat that extracts and parses JSON from the response. */ - async json(prompt: string, opts?: JsonOptions): Promise> { - const messages = this.buildMessages(prompt, opts?.systemPrompt); - return this.jsonMessages(messages, opts); - } - - /** JSON extraction from full message history. */ - async jsonMessages( - messages: ChatMessage[], - opts?: JsonOptions - ): Promise> { - const maxAttempts = (opts?.jsonRetries ?? 1) + 1; - let lastError: Error | undefined; - - for (let attempt = 0; attempt < maxAttempts; attempt++) { - const result = await this.chatMessages(messages, opts); - - try { - const data = extractJson(result.content, opts?.validate); - return { ...result, data }; - } catch (error) { - lastError = error instanceof Error ? error : new Error(String(error)); - if (this.options.debug) { - console.warn( - `[shared-llm] JSON extraction attempt ${attempt + 1}/${maxAttempts} failed:`, - lastError.message - ); - } - } - } - - throw lastError ?? new Error('JSON extraction failed'); - } - - // --------------------------------------------------------------------------- - // Vision - // --------------------------------------------------------------------------- - - /** Analyze an image with a text prompt. */ - async vision( - prompt: string, - imageBase64: string, - mimeType?: string, - opts?: VisionOptions - ): Promise { - const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt); - const model = opts?.visionModel ?? this.options.defaultVisionModel; - return this.chatMessages(messages, { ...opts, model }); - } - - /** Vision + JSON extraction. */ - async visionJson( - prompt: string, - imageBase64: string, - mimeType?: string, - opts?: VisionOptions & JsonOptions - ): Promise> { - const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt); - const model = opts?.visionModel ?? this.options.defaultVisionModel; - return this.jsonMessages(messages, { ...opts, model }); - } - - // --------------------------------------------------------------------------- - // Embeddings - // --------------------------------------------------------------------------- - - /** Generate embeddings for text input. */ - async embed( - input: string | string[], - model?: string - ): Promise<{ embeddings: number[][]; usage: TokenUsage }> { - const response = await retryFetch( - `${this.baseUrl}/v1/embeddings`, - { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - model: model ?? this.options.defaultModel, - input, - }), - signal: createTimeoutSignal(this.options.timeout), - }, - { maxRetries: this.options.maxRetries } - ); - - if (!response.ok) { - const text = await response.text().catch(() => ''); - throw new Error(`mana-llm embeddings error ${response.status}: ${text}`); - } - - const data = (await response.json()) as EmbeddingResponse; - return { - embeddings: data.data.map((d) => d.embedding), - usage: data.usage, - }; - } - - // --------------------------------------------------------------------------- - // Health & Models - // --------------------------------------------------------------------------- - - /** Check mana-llm health and provider status. */ - async health(): Promise { - try { - const response = await fetch(`${this.baseUrl}/health`, { - signal: createTimeoutSignal(5_000), - }); - if (!response.ok) { - return { status: 'unhealthy', providers: {} }; - } - return (await response.json()) as HealthStatus; - } catch { - return { status: 'unhealthy', providers: {} }; - } - } - - /** List available models from all providers. */ - async listModels(): Promise { - const response = await fetch(`${this.baseUrl}/v1/models`, { - signal: createTimeoutSignal(10_000), - }); - - if (!response.ok) { - throw new Error(`mana-llm models error ${response.status}`); - } - - const data = (await response.json()) as { data: ModelInfo[] }; - return data.data ?? []; - } - - // --------------------------------------------------------------------------- - // Private helpers - // --------------------------------------------------------------------------- - - private buildMessages(prompt: string, systemPrompt?: string): ChatMessage[] { - const messages: ChatMessage[] = []; - if (systemPrompt) { - messages.push({ role: 'system', content: systemPrompt }); - } - messages.push({ role: 'user', content: prompt }); - return messages; - } - - private buildVisionMessages( - prompt: string, - imageBase64: string, - mimeType?: string, - systemPrompt?: string - ): ChatMessage[] { - const mime = mimeType ?? 'image/jpeg'; - const dataUrl = imageBase64.startsWith('data:') - ? imageBase64 - : `data:${mime};base64,${imageBase64}`; - - const messages: ChatMessage[] = []; - if (systemPrompt) { - messages.push({ role: 'system', content: systemPrompt }); - } - messages.push({ - role: 'user', - content: [ - { type: 'text', text: prompt }, - { type: 'image_url', image_url: { url: dataUrl } }, - ], - }); - return messages; - } - - private buildRequest( - messages: ChatMessage[], - opts: ChatOptions | undefined, - stream: boolean - ): ChatCompletionRequest { - const request: ChatCompletionRequest = { - model: opts?.model ?? this.options.defaultModel, - messages, - stream, - }; - - if (opts?.temperature !== undefined) request.temperature = opts.temperature; - if (opts?.maxTokens !== undefined) request.max_tokens = opts.maxTokens; - - return request; - } - - private async fetchCompletion( - body: ChatCompletionRequest, - timeoutOverride?: number - ): Promise { - const timeout = timeoutOverride ?? this.options.timeout; - - const response = await retryFetch( - `${this.baseUrl}/v1/chat/completions`, - { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(body), - signal: createTimeoutSignal(timeout), - }, - { maxRetries: this.options.maxRetries } - ); - - if (!response.ok) { - const text = await response.text().catch(() => ''); - throw new Error(`mana-llm error ${response.status}: ${text}`); - } - - return (await response.json()) as ChatCompletionResponse; - } - - private emitMetrics(metrics: LlmRequestMetrics): void { - if (this.options.onMetrics) { - try { - this.options.onMetrics(metrics); - } catch { - // Never let metrics callback break the request - } - } - } -} diff --git a/packages/shared-llm/src/llm.constants.ts b/packages/shared-llm/src/llm.constants.ts deleted file mode 100644 index 67f0064f5..000000000 --- a/packages/shared-llm/src/llm.constants.ts +++ /dev/null @@ -1 +0,0 @@ -export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS'; diff --git a/packages/shared-llm/src/llm.module.ts b/packages/shared-llm/src/llm.module.ts deleted file mode 100644 index d70edcf37..000000000 --- a/packages/shared-llm/src/llm.module.ts +++ /dev/null @@ -1,80 +0,0 @@ -import { DynamicModule, Module, Global, Provider } from '@nestjs/common'; -import type { - LlmModuleOptions, - LlmModuleAsyncOptions, - LlmOptionsFactory, -} from './interfaces/llm-options.interface'; -import { LlmClientService } from './llm-client.service'; -import { LLM_MODULE_OPTIONS } from './llm.constants'; - -@Global() -@Module({}) -export class LlmModule { - static forRoot(options: LlmModuleOptions): DynamicModule { - return { - module: LlmModule, - providers: [ - { - provide: LLM_MODULE_OPTIONS, - useValue: options, - }, - LlmClientService, - ], - exports: [LLM_MODULE_OPTIONS, LlmClientService], - }; - } - - static forRootAsync(options: LlmModuleAsyncOptions): DynamicModule { - const asyncProviders = this.createAsyncProviders(options); - - return { - module: LlmModule, - imports: options.imports || [], - providers: [...asyncProviders, LlmClientService], - exports: [LLM_MODULE_OPTIONS, LlmClientService], - }; - } - - private static createAsyncProviders(options: LlmModuleAsyncOptions): Provider[] { - if (options.useFactory) { - return [ - { - provide: LLM_MODULE_OPTIONS, - useFactory: options.useFactory, - inject: options.inject || [], - }, - ]; - } - - const useClass = options.useClass; - const useExisting = options.useExisting; - - if (useClass) { - return [ - { - provide: LLM_MODULE_OPTIONS, - useFactory: async (optionsFactory: LlmOptionsFactory) => - await optionsFactory.createLlmOptions(), - inject: [useClass], - }, - { - provide: useClass, - useClass, - }, - ]; - } - - if (useExisting) { - return [ - { - provide: LLM_MODULE_OPTIONS, - useFactory: async (optionsFactory: LlmOptionsFactory) => - await optionsFactory.createLlmOptions(), - inject: [useExisting], - }, - ]; - } - - return []; - } -} diff --git a/packages/shared-llm/src/orchestrator.ts b/packages/shared-llm/src/orchestrator.ts new file mode 100644 index 000000000..357b6bc3f --- /dev/null +++ b/packages/shared-llm/src/orchestrator.ts @@ -0,0 +1,258 @@ +/** + * LlmOrchestrator — routes LlmTasks across the four privacy tiers + * (none / browser / mana-server / cloud) according to the user's + * settings, the task's minimum tier, and the input's content class. + * + * Routing rules — applied in this exact order: + * + * 1. If the task's minTier is above the user's HIGHEST allowed tier, + * we cannot run the LLM path at all. Try runRules() if defined, + * else throw TierTooLowError. + * + * 2. If contentClass is 'sensitive', strip 'mana-server' and 'cloud' + * from the candidate tier list — sensitive content NEVER leaves + * the device, even if the user has these tiers enabled globally. + * This is the privacy backstop the user can't accidentally + * override task-by-task. + * + * 3. If a per-task override exists in settings.taskOverrides, use it + * verbatim (still subject to rule 2 — task overrides cannot + * bypass the sensitive-content backstop). + * + * 4. Otherwise, pick the FIRST tier from settings.allowedTiers that + * (a) is in the candidate set after rules 1+2, (b) has an + * available + ready backend, (c) the cloud-consent gate is + * satisfied if it's the cloud tier. + * + * 5. Run the task on the chosen backend. + * + * 6. If the run throws and settings.fallbackToRulesOnError is true + * and the task has a runRules() implementation, fall back to + * rules. We do NOT auto-fall to a different LLM tier on error — + * the user explicitly chose this tier and silently switching + * providers would be a privacy/trust break. + * + * 7. If everything fails, throw NoTierAvailableError. UI catches it + * and offers a "retry" / "switch tier" / "enter manually" prompt. + */ + +import { + BackendUnreachableError, + NoTierAvailableError, + ProviderBlockedError, + TierTooLowError, +} from './errors'; +import type { LlmTask } from './task'; +import type { LlmTier } from './tiers'; +import { TIER_RANK } from './tiers'; +import type { LlmBackend, LlmSettings, LlmTaskRequest, LlmTaskResult } from './types'; + +export interface LlmOrchestratorOptions { + settings: LlmSettings; + backends: LlmBackend[]; +} + +export class LlmOrchestrator { + private settings: LlmSettings; + private backendsByTier: Map; + + constructor(opts: LlmOrchestratorOptions) { + this.settings = opts.settings; + this.backendsByTier = new Map(); + for (const b of opts.backends) { + this.backendsByTier.set(b.tier, b); + } + } + + /** Replace the settings object — call this when the user updates + * their preferences in the settings UI. */ + updateSettings(settings: LlmSettings): void { + this.settings = settings; + } + + /** Public read-only view for UI components that want to react to + * the current settings (e.g. the tier selector). */ + getSettings(): Readonly { + return this.settings; + } + + /** + * Can the user (with their current settings) run this task at all? + * The UI uses this to decide whether to show a feature button as + * enabled / disabled / hidden. Does NOT check backend readiness — + * that's a per-call concern. Just checks "is there any conceivable + * tier in the user's allowedTiers that satisfies task.minTier and + * is permitted for task.contentClass?". + */ + canRun(task: LlmTask): boolean { + // Rules-only tasks always run if they have a fallback + if (task.minTier === 'none') return true; + if (task.runRules) return true; + + const candidates = this.candidateTiers(task); + return candidates.some((t) => { + const backend = this.backendsByTier.get(t); + return backend?.isAvailable() ?? false; + }); + } + + /** + * Run the task. Honors the routing rules above. The returned + * LlmTaskResult includes which tier actually ran, plus a trail + * of tiers that were attempted and skipped before it. + */ + async run(task: LlmTask, input: TIn): Promise> { + const start = performance.now(); + const attempted: LlmTier[] = []; + + // Rule 1: tier-too-low check + const userMaxTier = this.userMaxTier(); + if (TIER_RANK[task.minTier] > TIER_RANK[userMaxTier]) { + if (task.runRules) { + const value = await task.runRules(input); + return { + value, + source: 'none', + latencyMs: Math.round(performance.now() - start), + attempted: ['none'], + }; + } + throw new TierTooLowError(task.name, task.minTier, userMaxTier); + } + + // Rules-2-3: candidate tier list and per-task override + const candidates = this.candidateTiers(task); + const override = this.settings.taskOverrides[task.name]; + const orderedTiers = override ? [override].filter((t) => candidates.includes(t)) : candidates; + + // Rule 4-5: try the first runnable tier + for (const tier of orderedTiers) { + if (tier === 'none') { + if (task.runRules) { + const value = await task.runRules(input); + return { + value, + source: 'none', + latencyMs: Math.round(performance.now() - start), + attempted: [...attempted, 'none'], + }; + } + attempted.push('none'); + continue; + } + + // Cloud-consent gate + if (tier === 'cloud' && !this.settings.cloudConsentGiven) { + attempted.push('cloud'); + continue; + } + + const backend = this.backendsByTier.get(tier); + if (!backend) { + attempted.push(tier); + continue; + } + if (!backend.isAvailable()) { + attempted.push(tier); + continue; + } + const ready = await backend.isReady(); + if (!ready) { + attempted.push(tier); + continue; + } + + try { + const request = this.buildRequest(task, input); + const generated = await task.runLlm(input, backend); + return { + value: generated, + source: tier, + latencyMs: Math.round(performance.now() - start), + attempted: [...attempted, tier], + }; + // `request` is intentionally unused — the task constructs + // its own LlmTaskRequest internally via runLlm. We build + // it here only as a future hook for telemetry. + void request; + } catch (err) { + attempted.push(tier); + // Rule 6: rules-fallback on error + if ( + this.settings.fallbackToRulesOnError && + task.runRules && + !(err instanceof ProviderBlockedError) + ) { + // Provider-blocked errors should NOT silently fall to + // rules — they should bubble up so the UI can offer + // "retry" / "switch tier" prompts. Other errors + // (network failure, OOM, model not loaded) get the + // silent rules fallback. + try { + const value = await task.runRules(input); + return { + value, + source: 'none', + latencyMs: Math.round(performance.now() - start), + attempted: [...attempted, 'none'], + }; + } catch { + // rules fallback also failed — re-throw original + throw err; + } + } + // Re-throw provider blocks and unrecoverable errors + if (err instanceof ProviderBlockedError || err instanceof BackendUnreachableError) { + throw err; + } + // Unknown error — try the next tier in the list + continue; + } + } + + throw new NoTierAvailableError(task.name, attempted); + } + + /** Highest tier in the user's allowedTiers list (by rank). */ + private userMaxTier(): LlmTier { + if (this.settings.allowedTiers.length === 0) return 'none'; + return this.settings.allowedTiers.reduce( + (max, t) => (TIER_RANK[t] > TIER_RANK[max] ? t : max), + 'none' as LlmTier + ); + } + + /** Candidate tier list after applying rules 1 + 2. + * - Rule 1: only tiers >= task.minTier + * - Rule 2: sensitive content excludes mana-server + cloud + * Also always includes 'none' at the end if the task has runRules. */ + private candidateTiers(task: LlmTask): LlmTier[] { + // Start from the user's allowed tiers, in their preference order + let tiers = this.settings.allowedTiers.filter((t) => TIER_RANK[t] >= TIER_RANK[task.minTier]); + + // Rule 2: sensitive content backstop + if (task.contentClass === 'sensitive') { + tiers = tiers.filter((t) => t === 'browser'); + } + + // 'none' is always tail-appended if the task has a rules implementation, + // so the for-loop in run() naturally falls through to it. + if (task.runRules && !tiers.includes('none')) { + tiers.push('none'); + } + return tiers; + } + + private buildRequest(task: LlmTask, _input: TIn): LlmTaskRequest { + // Right now this is a placeholder — tasks build their own + // LlmTaskRequest inside runLlm. Once we add token-counting + // telemetry we'll move that construction up here so the + // orchestrator can prepend the task metadata uniformly. + return { + taskName: task.name, + contentClass: task.contentClass, + requires: task.requires, + messages: [], + }; + } +} diff --git a/packages/shared-llm/src/standalone.ts b/packages/shared-llm/src/standalone.ts deleted file mode 100644 index 763f83d3c..000000000 --- a/packages/shared-llm/src/standalone.ts +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Standalone exports for non-NestJS consumers (e.g. bot-services). - * - * Usage: - * import { LlmClient } from '@mana/shared-llm/standalone'; - * const llm = new LlmClient({ manaLlmUrl: 'http://localhost:3025' }); - */ - -export { LlmClient } from './llm-client'; -export { resolveOptions } from './interfaces/llm-options.interface'; -export type { LlmModuleOptions, ResolvedLlmOptions } from './interfaces/llm-options.interface'; - -// Types -export type { - ChatMessage, - ContentPart, - TextContentPart, - ImageContentPart, - ChatOptions, - JsonOptions, - VisionOptions, - TokenUsage, - ChatResult, - JsonResult, - ModelInfo, - HealthStatus, -} from './types'; - -// Utilities -export { extractJson } from './utils'; diff --git a/packages/shared-llm/src/store.svelte.ts b/packages/shared-llm/src/store.svelte.ts new file mode 100644 index 000000000..6c2359490 --- /dev/null +++ b/packages/shared-llm/src/store.svelte.ts @@ -0,0 +1,107 @@ +/** + * Svelte 5 reactive store for the LLM orchestrator. + * + * Lives at module-scope as a singleton because there is exactly one + * orchestrator + settings per page session. Settings are persisted to + * localStorage for now (Phase 1) — Phase 2 will move them into the + * encrypted IndexedDB settings table once that exists. + * + * Usage in a Svelte 5 component: + * + * import { llmOrchestrator, llmSettingsState, useTaskAvailability } from '@mana/shared-llm'; + * import { extractDateTask } from '$lib/llm-tasks/extract-date'; + * + * const available = useTaskAvailability(extractDateTask); + * // ... reactively true/false based on settings + backend readiness + * + * {#if available.current} + * + * {/if} + */ + +import { BrowserBackend } from './backends/browser'; +import { CloudBackend } from './backends/cloud'; +import { ManaServerBackend } from './backends/mana-server'; +import { LlmOrchestrator } from './orchestrator'; +import type { LlmTask } from './task'; +import { DEFAULT_LLM_SETTINGS, type LlmSettings } from './types'; + +const STORAGE_KEY = 'mana.llm.settings.v1'; + +/** Load persisted settings, falling back to defaults on first run or + * any parse error. localStorage is fine for Phase 1 — small payload, + * not encrypted-sensitive (the user's tier preference is hardly + * secret), and trivial to migrate to IndexedDB later. */ +function loadSettings(): LlmSettings { + if (typeof localStorage === 'undefined') return { ...DEFAULT_LLM_SETTINGS }; + try { + const raw = localStorage.getItem(STORAGE_KEY); + if (!raw) return { ...DEFAULT_LLM_SETTINGS }; + const parsed = JSON.parse(raw) as Partial; + return { ...DEFAULT_LLM_SETTINGS, ...parsed }; + } catch { + return { ...DEFAULT_LLM_SETTINGS }; + } +} + +function persistSettings(settings: LlmSettings): void { + if (typeof localStorage === 'undefined') return; + try { + localStorage.setItem(STORAGE_KEY, JSON.stringify(settings)); + } catch { + // Quota exceeded or storage disabled — non-fatal, settings just + // won't persist across sessions. + } +} + +// ─── Reactive state ────────────────────────────────────────────── + +let _settings = $state(loadSettings()); + +// Backends are constructed once per page session. They're stateless +// (or hold their own internal state in the case of BrowserBackend +// pointing at @mana/local-llm's singleton), so a fresh instance per +// orchestrator is fine. +const backends = [new BrowserBackend(), new ManaServerBackend(), new CloudBackend()]; + +export const llmOrchestrator = new LlmOrchestrator({ + settings: _settings, + backends, +}); + +/** Reactive accessor for the current settings. UI components read + * via `llmSettingsState.current` to get a $state-tracked snapshot. */ +export const llmSettingsState = { + get current(): LlmSettings { + return _settings; + }, +}; + +/** Update settings (or part of them). Persists to localStorage and + * pushes the new value into the orchestrator. */ +export function updateLlmSettings(patch: Partial): void { + _settings = { ..._settings, ...patch }; + persistSettings(_settings); + llmOrchestrator.updateSettings(_settings); +} + +/** + * Svelte 5 reactive hook: returns `{ current: boolean }` indicating + * whether the given task can run with the user's current settings. + * Reactive against `llmSettingsState` so the UI re-renders when the + * user toggles a tier in the settings page. + * + * Use this to gate feature buttons — show them as enabled when the + * task is runnable, disabled (with a tooltip) when not. + */ +export function useTaskAvailability( + task: LlmTask +): { readonly current: boolean } { + return { + get current() { + // Reading _settings here registers the reactive dependency + void _settings; + return llmOrchestrator.canRun(task); + }, + }; +} diff --git a/packages/shared-llm/src/task.ts b/packages/shared-llm/src/task.ts new file mode 100644 index 000000000..a84d388bd --- /dev/null +++ b/packages/shared-llm/src/task.ts @@ -0,0 +1,82 @@ +/** + * The LlmTask contract — the unit of work modules describe to the + * orchestrator. Tasks bundle: + * + * 1. The LLM-side implementation (used for browser/server/cloud tiers) + * 2. An optional rules-tier fallback (used when the LLM tier is + * unavailable, fails, or the user has opted out of all LLM tiers) + * 3. Routing metadata (minimum tier, content class, capability needs) + * + * Tasks live next to the modules that use them — there is intentionally + * no central task registry. The convention is: + * + * apps/mana/apps/web/src/lib/llm-tasks/ ← cross-module helpers + * apps/mana/apps/web/src/lib/modules/notes/llm-tasks/ ← notes-specific + * + * The orchestrator never imports tasks directly — modules import tasks + * AND the orchestrator and call `orchestrator.run(task, input)`. + */ + +import type { LlmTier } from './tiers'; +import type { ContentClass, CapabilityRequirements, LlmBackend, LlmTaskRequest } from './types'; + +export interface LlmTask { + /** + * Stable identifier for this task. Used for telemetry, per-task + * tier overrides in user settings, and debug logs. Convention is + * `{module}.{action}` — e.g. `notes.extractTags`, `todo.parseQuickAdd`. + */ + readonly name: string; + + /** Lowest tier this task can produce a useful result on. */ + readonly minTier: LlmTier; + + /** Privacy class of inputs this task handles. */ + readonly contentClass: ContentClass; + + /** Capability requirements that exclude tiers/backends that can't satisfy them. */ + readonly requires?: CapabilityRequirements; + + /** + * User-facing label, shown when telling the user "this task needs + * AI" or "this result was computed via tier X". + */ + readonly displayLabel: string; + + /** + * The LLM-based implementation. Builds an LlmTaskRequest from the + * task input and asks the backend to run it, then maps the + * generated text back into the typed TOutput shape (e.g. parses + * JSON, validates a date, looks up a tag). + */ + runLlm(input: TInput, backend: LlmBackend): Promise; + + /** + * Optional deterministic fallback — runs when no LLM tier is + * available, or when the LLM tier failed and + * `fallbackToRulesOnError` is enabled in user settings. + * + * Returning the typed TOutput indicates success. Throwing means + * the rules implementation also can't handle this input — the + * orchestrator will then surface a NoTierAvailableError so the + * UI can ask the user for direct input. + */ + runRules?(input: TInput): Promise; +} + +/** + * Helper for tasks that need to construct an LlmTaskRequest from their + * own input. Centralizes the boilerplate so individual tasks don't have + * to redeclare taskName / contentClass / requires every time. + */ +export function buildTaskRequest( + task: LlmTask, + overrides: Omit +): LlmTaskRequest { + return { + ...overrides, + taskName: task.name, + contentClass: task.contentClass, + requires: task.requires, + }; +} diff --git a/packages/shared-llm/src/tiers.ts b/packages/shared-llm/src/tiers.ts new file mode 100644 index 000000000..85294da06 --- /dev/null +++ b/packages/shared-llm/src/tiers.ts @@ -0,0 +1,50 @@ +/** + * Tier definitions for the Mana LLM orchestrator. + * + * Four tiers, ordered from most-private to least-private: + * + * none — Deterministic parsers / heuristics. No LLM at all. + * Always available. Zero cost. Quality varies by task. + * + * browser — Gemma 4 E2B running in the user's browser via WebGPU + * (@mana/local-llm). 100% on-device. Requires the + * ~500 MB model to be downloaded once and ~2 GB VRAM. + * + * mana-server — services/mana-llm + Ollama on our own infrastructure + * (currently the Mac Mini, gemma3:4b by default). + * Data leaves the device but stays in our control. + * + * cloud — services/mana-llm proxied to a third-party provider + * (Google Gemini, configured via google_api_key in the + * mana-llm service env). Data goes to the third party. + * + * The numeric rank is used by the orchestrator to compare a user's + * preferred tier against a task's minimum tier ("can the user even + * run this task?") and is the canonical sort order for the privacy + * gradient. + */ + +export type LlmTier = 'none' | 'browser' | 'mana-server' | 'cloud'; + +export const TIER_RANK: Record = { + none: 0, + browser: 1, + 'mana-server': 2, + cloud: 3, +}; + +export const ALL_TIERS: readonly LlmTier[] = ['none', 'browser', 'mana-server', 'cloud']; + +/** Human-readable label, kept here so backends/UI agree on naming. */ +export function tierLabel(tier: LlmTier): string { + switch (tier) { + case 'none': + return 'Lokal (ohne KI)'; + case 'browser': + return 'Auf deinem Gerät'; + case 'mana-server': + return 'Mana-Server'; + case 'cloud': + return 'Google Gemini'; + } +} diff --git a/packages/shared-llm/src/types.ts b/packages/shared-llm/src/types.ts new file mode 100644 index 000000000..e754888cd --- /dev/null +++ b/packages/shared-llm/src/types.ts @@ -0,0 +1,150 @@ +/** + * Shared types for the Mana LLM orchestrator. + * + * These deliberately mirror the surface of @mana/local-llm so that the + * browser tier can pass them straight through, but they are intentionally + * a SUPERSET (with task name, content class, capability requirements, + * rule fallback) so the orchestrator can route intelligently. + */ + +import type { LlmTier } from './tiers'; + +export interface ChatMessage { + role: 'system' | 'user' | 'assistant'; + content: string; +} + +export interface GenerateOptions { + messages: ChatMessage[]; + temperature?: number; + maxTokens?: number; + /** Optional streaming callback — called once per emitted token chunk */ + onToken?: (token: string) => void; +} + +export interface GenerateResult { + content: string; + usage?: { + promptTokens: number; + completionTokens: number; + totalTokens: number; + }; + latencyMs: number; +} + +/** + * The privacy class of the input being processed. The orchestrator uses + * this to ENFORCE that sensitive content never leaves the device, even + * if the user has globally allowed cloud tiers. + * + * public — already public-domain content (e.g. an open URL the user + * wants summarized). Anything is fair game. + * personal — the user's own content but routine (a calendar event, a + * todo title). Default for most module tasks. Allowed on + * any tier the user has enabled. + * sensitive — explicitly private content (notes flagged sensitive, + * diary entries, dreams, financial data). The orchestrator + * restricts these to {none, browser} regardless of user's + * global settings — the user has to explicitly opt out of + * this protection per-task to send sensitive content to + * server/cloud tiers. + */ +export type ContentClass = 'public' | 'personal' | 'sensitive'; + +export interface CapabilityRequirements { + /** Task needs to receive structured JSON in response */ + json?: boolean; + /** Task needs at least this many context tokens (input + output) */ + minContextTokens?: number; + /** Task needs streaming support (per-token onToken callbacks) */ + streaming?: boolean; +} + +/** + * The high-level "I want to do X" descriptor that flows from a module + * to the orchestrator. Concrete LlmTask implementations build these + * internally before delegating to the orchestrator. + */ +export interface LlmTaskRequest extends GenerateOptions { + /** Stable name for analytics + per-task overrides — e.g. "notes.extractTags" */ + taskName: string; + contentClass: ContentClass; + requires?: CapabilityRequirements; +} + +/** + * The result of running a task through the orchestrator. Carries the + * tier that actually executed (which may differ from the user's + * preferred tier if a fallback kicked in) and the trail of tiers + * that were tried first — useful for telemetry and for debugging + * "why did this task end up running on tier X?". + */ +export interface LlmTaskResult { + value: T; + source: LlmTier; + latencyMs: number; + /** Tiers that were attempted before `source` succeeded */ + attempted: LlmTier[]; +} + +/** + * Backend interface that the orchestrator talks to. The "none" tier + * does NOT implement this — rule-based fallbacks live on each + * concrete LlmTask, not on a backend object. + */ +export interface LlmBackend { + readonly tier: Exclude; + + /** Could this backend run AT ALL given the current environment? + * e.g. browser tier checks for WebGPU + user-enabled, server tier + * checks for a configured base URL. */ + isAvailable(): boolean; + + /** Could this backend run RIGHT NOW? e.g. browser tier checks if + * the model is loaded into VRAM. May return false even when + * isAvailable() is true (model still downloading, server in + * startup, …). */ + isReady(): boolean | Promise; + + /** Run a task. The backend is responsible for actually performing + * the inference and returning the result; it does NOT decide + * whether it SHOULD run (the orchestrator did that). */ + generate(req: LlmTaskRequest): Promise; +} + +/** + * The mutable user preferences that drive routing. + */ +export interface LlmSettings { + /** Tiers the orchestrator is allowed to use, in preference order. + * An empty array means "no AI at all" — only Tier 0 (rules) runs. */ + allowedTiers: LlmTier[]; + + /** Per-task overrides — keyed by task name, value is the tier to + * use for that task specifically (overrides allowedTiers order). */ + taskOverrides: Record; + + /** When the user-chosen tier fails to run a task, fall back to + * the rules tier (if the task has a runT0 implementation). + * When false, failures surface as errors instead. */ + fallbackToRulesOnError: boolean; + + /** Show a small "via Edge / via Server / via Gemini" badge under + * every LLM result. Default true — helps the user understand + * where their data went. */ + showSourceInUi: boolean; + + /** First-time consent for the cloud tier. Until this is true, the + * cloud tier is treated as unavailable even if it's in + * allowedTiers. The user must explicitly tick a "yes I understand + * Google sees my data" checkbox once. */ + cloudConsentGiven: boolean; +} + +export const DEFAULT_LLM_SETTINGS: LlmSettings = { + allowedTiers: [], // ZERO opt-in by default — every user starts in Tier 0 only + taskOverrides: {}, + fallbackToRulesOnError: true, + showSourceInUi: true, + cloudConsentGiven: false, +}; diff --git a/packages/shared-llm/src/types/chat.types.ts b/packages/shared-llm/src/types/chat.types.ts deleted file mode 100644 index 53b2b1f7b..000000000 --- a/packages/shared-llm/src/types/chat.types.ts +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Core chat types for the LLM client. - * These are the high-level types that consumers interact with. - */ - -// --------------------------------------------------------------------------- -// Messages -// --------------------------------------------------------------------------- - -export interface TextContentPart { - type: 'text'; - text: string; -} - -export interface ImageContentPart { - type: 'image_url'; - image_url: { url: string }; -} - -export type ContentPart = TextContentPart | ImageContentPart; - -export interface ChatMessage { - role: 'system' | 'user' | 'assistant'; - content: string | ContentPart[]; -} - -// --------------------------------------------------------------------------- -// Options -// --------------------------------------------------------------------------- - -export interface ChatOptions { - /** Model to use (default from module config, e.g. "ollama/gemma3:4b") */ - model?: string; - /** Sampling temperature 0.0-2.0 */ - temperature?: number; - /** Max tokens to generate */ - maxTokens?: number; - /** System prompt prepended to messages */ - systemPrompt?: string; - /** Request timeout in ms (overrides module default) */ - timeout?: number; -} - -export interface JsonOptions extends ChatOptions { - /** Validation function applied to parsed JSON. Should throw on invalid data. */ - validate?: (data: unknown) => T; - /** Number of extraction retries on parse failure (default: 1) */ - jsonRetries?: number; -} - -export interface VisionOptions extends ChatOptions { - /** Vision model override (default from module config, e.g. "ollama/llava:7b") */ - visionModel?: string; -} - -// --------------------------------------------------------------------------- -// Results -// --------------------------------------------------------------------------- - -export interface TokenUsage { - prompt_tokens: number; - completion_tokens: number; - total_tokens: number; -} - -export interface ChatResult { - /** Generated text content */ - content: string; - /** Model that was actually used */ - model: string; - /** Token usage statistics */ - usage: TokenUsage; - /** Request latency in milliseconds */ - latencyMs: number; -} - -export interface JsonResult extends ChatResult { - /** Parsed and optionally validated data */ - data: T; -} - -// --------------------------------------------------------------------------- -// Models -// --------------------------------------------------------------------------- - -export interface ModelInfo { - id: string; - object: 'model'; - created: number; - owned_by: string; -} - -// --------------------------------------------------------------------------- -// Health -// --------------------------------------------------------------------------- - -export interface HealthStatus { - status: 'healthy' | 'degraded' | 'unhealthy'; - providers: Record; -} diff --git a/packages/shared-llm/src/types/index.ts b/packages/shared-llm/src/types/index.ts deleted file mode 100644 index 368d18ff8..000000000 --- a/packages/shared-llm/src/types/index.ts +++ /dev/null @@ -1,26 +0,0 @@ -export type { - ChatMessage, - ContentPart, - TextContentPart, - ImageContentPart, - ChatOptions, - JsonOptions, - VisionOptions, - TokenUsage, - ChatResult, - JsonResult, - ModelInfo, - HealthStatus, -} from './chat.types'; - -export type { - ChatCompletionRequest, - ChatCompletionResponse, - ChatCompletionChoice, - ChatCompletionStreamChunk, - StreamChoice, - EmbeddingRequest, - EmbeddingResponse, - EmbeddingData, - ModelsListResponse, -} from './openai-compat.types'; diff --git a/packages/shared-llm/src/types/openai-compat.types.ts b/packages/shared-llm/src/types/openai-compat.types.ts deleted file mode 100644 index 457c671fd..000000000 --- a/packages/shared-llm/src/types/openai-compat.types.ts +++ /dev/null @@ -1,97 +0,0 @@ -/** - * OpenAI-compatible wire format types matching the mana-llm API contract. - * These are internal types used for HTTP communication - consumers should - * use the high-level types from chat.types.ts instead. - */ - -import type { ChatMessage, TokenUsage } from './chat.types'; - -// --------------------------------------------------------------------------- -// Request (POST /v1/chat/completions) -// --------------------------------------------------------------------------- - -export interface ChatCompletionRequest { - model: string; - messages: ChatMessage[]; - stream?: boolean; - temperature?: number; - max_tokens?: number; - top_p?: number; - frequency_penalty?: number; - presence_penalty?: number; - stop?: string | string[]; -} - -// --------------------------------------------------------------------------- -// Response (non-streaming) -// --------------------------------------------------------------------------- - -export interface ChatCompletionResponse { - id: string; - object: 'chat.completion'; - created: number; - model: string; - choices: ChatCompletionChoice[]; - usage: TokenUsage; -} - -export interface ChatCompletionChoice { - index: number; - message: { role: 'assistant'; content: string }; - finish_reason: 'stop' | 'length' | 'content_filter' | null; -} - -// --------------------------------------------------------------------------- -// Response (streaming) -// --------------------------------------------------------------------------- - -export interface ChatCompletionStreamChunk { - id: string; - object: 'chat.completion.chunk'; - created: number; - model: string; - choices: StreamChoice[]; -} - -export interface StreamChoice { - index: number; - delta: { role?: 'assistant'; content?: string }; - finish_reason: string | null; -} - -// --------------------------------------------------------------------------- -// Embeddings -// --------------------------------------------------------------------------- - -export interface EmbeddingRequest { - model: string; - input: string | string[]; - encoding_format?: 'float' | 'base64'; -} - -export interface EmbeddingResponse { - object: 'list'; - data: EmbeddingData[]; - model: string; - usage: TokenUsage; -} - -export interface EmbeddingData { - object: 'embedding'; - index: number; - embedding: number[]; -} - -// --------------------------------------------------------------------------- -// Models (GET /v1/models) -// --------------------------------------------------------------------------- - -export interface ModelsListResponse { - object: 'list'; - data: Array<{ - id: string; - object: 'model'; - created: number; - owned_by: string; - }>; -} diff --git a/packages/shared-llm/src/utils/index.ts b/packages/shared-llm/src/utils/index.ts deleted file mode 100644 index 1466b2de8..000000000 --- a/packages/shared-llm/src/utils/index.ts +++ /dev/null @@ -1,5 +0,0 @@ -export { extractJson } from './json-extractor'; -export { retryFetch } from './retry'; -export type { RetryOptions } from './retry'; -export { LlmMetricsCollector } from './metrics'; -export type { LlmRequestMetrics, MetricsCallback } from './metrics'; diff --git a/packages/shared-llm/src/utils/json-extractor.ts b/packages/shared-llm/src/utils/json-extractor.ts deleted file mode 100644 index 8e0d27c90..000000000 --- a/packages/shared-llm/src/utils/json-extractor.ts +++ /dev/null @@ -1,94 +0,0 @@ -/** - * Extract and parse JSON from LLM responses. - * - * LLMs often wrap JSON in markdown code fences or include extra text. - * This utility handles all common patterns: - * 1. Direct JSON parse - * 2. Markdown ```json ... ``` fences - * 3. First { ... } or [ ... ] block in text - */ -export function extractJson(text: string, validate?: (data: unknown) => T): T { - const trimmed = text.trim(); - - // Step 1: Try direct parse - const direct = tryParse(trimmed, validate); - if (direct !== undefined) return direct; - - // Step 2: Strip markdown code fences - const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/); - if (fenceMatch) { - const fenced = tryParse(fenceMatch[1].trim(), validate); - if (fenced !== undefined) return fenced; - } - - // Step 3: Find first JSON object - const objectStart = trimmed.indexOf('{'); - if (objectStart !== -1) { - const objectStr = extractBalanced(trimmed, objectStart, '{', '}'); - if (objectStr) { - const obj = tryParse(objectStr, validate); - if (obj !== undefined) return obj; - } - } - - // Step 4: Find first JSON array - const arrayStart = trimmed.indexOf('['); - if (arrayStart !== -1) { - const arrayStr = extractBalanced(trimmed, arrayStart, '[', ']'); - if (arrayStr) { - const arr = tryParse(arrayStr, validate); - if (arr !== undefined) return arr; - } - } - - throw new Error(`Failed to extract JSON from LLM response: ${trimmed.slice(0, 200)}...`); -} - -function tryParse(text: string, validate?: (data: unknown) => T): T | undefined { - try { - const parsed = JSON.parse(text); - return validate ? validate(parsed) : parsed; - } catch { - return undefined; - } -} - -/** - * Extract a balanced block starting from the given position. - * Handles nested braces/brackets but not strings with escaped delimiters. - */ -function extractBalanced(text: string, start: number, open: string, close: string): string | null { - let depth = 0; - let inString = false; - let escape = false; - - for (let i = start; i < text.length; i++) { - const ch = text[i]; - - if (escape) { - escape = false; - continue; - } - - if (ch === '\\') { - escape = true; - continue; - } - - if (ch === '"') { - inString = !inString; - continue; - } - - if (inString) continue; - - if (ch === open) depth++; - if (ch === close) depth--; - - if (depth === 0) { - return text.slice(start, i + 1); - } - } - - return null; -} diff --git a/packages/shared-llm/src/utils/metrics.ts b/packages/shared-llm/src/utils/metrics.ts deleted file mode 100644 index 3751994d3..000000000 --- a/packages/shared-llm/src/utils/metrics.ts +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Request-level metrics for LLM calls. - * - * Provides an optional callback system that backends can hook into - * for monitoring, logging, or forwarding to Prometheus/Grafana. - */ - -export interface LlmRequestMetrics { - /** Model requested (e.g. "ollama/gemma3:4b") */ - model: string; - /** Model actually used (may differ if fallback occurred) */ - actualModel: string; - /** Request type */ - type: 'chat' | 'json' | 'vision' | 'visionJson' | 'embed' | 'stream'; - /** Total request duration in ms */ - latencyMs: number; - /** Token usage */ - promptTokens: number; - completionTokens: number; - totalTokens: number; - /** Whether this request was a fallback (model differs from requested) */ - wasFallback: boolean; - /** Whether the request succeeded */ - success: boolean; - /** Error message if failed */ - error?: string; -} - -export type MetricsCallback = (metrics: LlmRequestMetrics) => void; - -/** - * Simple in-memory metrics aggregator. - * Useful for health endpoints and debugging. - */ -export class LlmMetricsCollector { - private _totalRequests = 0; - private _totalErrors = 0; - private _totalFallbacks = 0; - private _totalTokens = 0; - private _totalLatencyMs = 0; - private _byModel: Map = new Map(); - - /** Use as MetricsCallback */ - readonly collect = (metrics: LlmRequestMetrics): void => { - this._totalRequests++; - this._totalLatencyMs += metrics.latencyMs; - this._totalTokens += metrics.totalTokens; - - if (!metrics.success) this._totalErrors++; - if (metrics.wasFallback) this._totalFallbacks++; - - const modelKey = metrics.actualModel; - const existing = this._byModel.get(modelKey) ?? { requests: 0, tokens: 0, errors: 0 }; - existing.requests++; - existing.tokens += metrics.totalTokens; - if (!metrics.success) existing.errors++; - this._byModel.set(modelKey, existing); - }; - - /** Get summary stats for health endpoints / dashboards */ - getSummary() { - return { - totalRequests: this._totalRequests, - totalErrors: this._totalErrors, - totalFallbacks: this._totalFallbacks, - totalTokens: this._totalTokens, - averageLatencyMs: - this._totalRequests > 0 ? Math.round(this._totalLatencyMs / this._totalRequests) : 0, - fallbackRate: - this._totalRequests > 0 - ? Math.round((this._totalFallbacks / this._totalRequests) * 100) - : 0, - errorRate: - this._totalRequests > 0 ? Math.round((this._totalErrors / this._totalRequests) * 100) : 0, - byModel: Object.fromEntries(this._byModel), - }; - } - - /** Reset all counters */ - reset(): void { - this._totalRequests = 0; - this._totalErrors = 0; - this._totalFallbacks = 0; - this._totalTokens = 0; - this._totalLatencyMs = 0; - this._byModel.clear(); - } -} diff --git a/packages/shared-llm/src/utils/retry.ts b/packages/shared-llm/src/utils/retry.ts deleted file mode 100644 index c05f92c85..000000000 --- a/packages/shared-llm/src/utils/retry.ts +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Fetch wrapper with exponential backoff retry for transient failures. - * - * Retries on: 429 (rate limit), 502, 503, 504 (server errors), network errors. - * Does NOT retry on: 400, 401, 403, 404 (client errors). - */ - -const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]); - -export interface RetryOptions { - maxRetries: number; - /** Base delay in ms (doubles each retry). Default: 200 */ - baseDelay?: number; -} - -export async function retryFetch( - url: string, - init: RequestInit, - options: RetryOptions -): Promise { - const { maxRetries, baseDelay = 200 } = options; - let lastError: Error | undefined; - - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - const response = await fetch(url, init); - - if (response.ok || !RETRYABLE_STATUS_CODES.has(response.status)) { - return response; - } - - // Retryable status code - lastError = new Error(`HTTP ${response.status}: ${response.statusText}`); - } catch (error) { - // Network error (connection refused, timeout, etc.) - lastError = error instanceof Error ? error : new Error(String(error)); - } - - // Don't sleep after the last attempt - if (attempt < maxRetries) { - const delay = baseDelay * Math.pow(2, attempt); - await sleep(delay); - } - } - - throw lastError ?? new Error('retryFetch exhausted all retries'); -} - -function sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); -} diff --git a/packages/shared-llm/tsconfig.json b/packages/shared-llm/tsconfig.json index 310fa8950..897ca8cba 100644 --- a/packages/shared-llm/tsconfig.json +++ b/packages/shared-llm/tsconfig.json @@ -1,21 +1,14 @@ { "compilerOptions": { - "target": "ES2021", - "module": "commonjs", - "lib": ["ES2021"], - "declaration": true, - "declarationMap": true, - "sourceMap": true, - "outDir": "./dist", - "rootDir": "./src", + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "lib": ["ES2022", "DOM"], "strict": true, - "esModuleInterop": true, + "noEmit": true, "skipLibCheck": true, - "forceConsistentCasingInFileNames": true, - "moduleResolution": "node", - "experimentalDecorators": true, - "emitDecoratorMetadata": true + "forceConsistentCasingInFileNames": true }, "include": ["src/**/*"], - "exclude": ["node_modules", "dist"] + "exclude": ["node_modules"] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b0be3bea5..28bd3bca9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -978,6 +978,9 @@ importers: '@mana/shared-links': specifier: workspace:* version: link:../../../../packages/shared-links + '@mana/shared-llm': + specifier: workspace:* + version: link:../../../../packages/shared-llm '@mana/shared-stores': specifier: workspace:* version: link:../../../../packages/shared-stores @@ -2981,31 +2984,19 @@ importers: packages/shared-llm: dependencies: - '@nestjs/common': - specifier: ^10.0.0 || ^11.0.0 - version: 10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2) - '@nestjs/config': - specifier: ^3.0.0 || ^4.0.0 - version: 3.3.0(@nestjs/common@10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2))(rxjs@7.8.2) - '@nestjs/core': - specifier: ^10.0.0 || ^11.0.0 - version: 10.4.22(@nestjs/common@10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/platform-express@10.4.22)(reflect-metadata@0.2.2)(rxjs@7.8.2) - reflect-metadata: - specifier: ^0.1.13 || ^0.2.0 - version: 0.2.2 - rxjs: - specifier: ^7.0.0 - version: 7.8.2 + '@mana/local-llm': + specifier: workspace:* + version: link:../local-llm devDependencies: '@types/node': - specifier: ^20.0.0 - version: 20.19.39 - typescript: + specifier: ^24.10.1 + version: 24.12.2 + svelte: specifier: ^5.0.0 + version: 5.55.1 + typescript: + specifier: ^5.9.3 version: 5.9.3 - vitest: - specifier: ^4.1.2 - version: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@20.19.39)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@20.19.39)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3)) packages/shared-logger: devDependencies: