fix(mana/web): unwrap $state proxy in workbench-scenes Dexie writes

Adding an app to a workbench scene threw DataCloneError. scenesState
is a $state array, so current.openApps was a Svelte 5 proxy and
spreading it into a new array left proxy entries inside; IndexedDB's
structured clone refuses to serialise those. Snapshot before handing
the array to patchScene / createScene so Dexie sees plain objects.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-09 00:44:00 +02:00
parent 761851847f
commit 56065c8537
38 changed files with 1415 additions and 1665 deletions

View file

@ -15,6 +15,7 @@ COPY apps/calc/packages/shared ./apps/calc/packages/shared
COPY apps/zitare/packages/content ./apps/zitare/packages/content
COPY packages/shared-uload ./packages/shared-uload
COPY packages/local-llm ./packages/local-llm
COPY packages/shared-llm ./packages/shared-llm
RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \
pnpm install --no-frozen-lockfile --ignore-scripts

View file

@ -60,6 +60,7 @@
"@mana/shared-i18n": "workspace:*",
"@mana/shared-icons": "workspace:*",
"@mana/shared-links": "workspace:*",
"@mana/shared-llm": "workspace:*",
"@mana/shared-stores": "workspace:*",
"@mana/shared-tags": "workspace:*",
"@mana/shared-tailwind": "workspace:*",

View file

@ -0,0 +1,119 @@
/**
* extractDateTask pulls an ISO date out of a free-form German/English
* string. Used by Quick-Add features that want to recognize phrases like
* "morgen 14 Uhr" or "next Tuesday".
*
* Has a runRules() fallback so it works even on Tier 0 (no AI) the
* fallback uses a hand-rolled regex set covering the most common
* shortcuts. It's intentionally narrow: it only catches the patterns it
* KNOWS, and returns null otherwise. This is the right semantic for
* Tier 0 "I'm not certain enough to guess" is a valid answer when
* the user has explicitly opted out of LLM use.
*
* For production-grade NL date parsing without an LLM, replacing the
* regex stub with chrono-node would be a one-line change in runRules().
*/
import type { LlmBackend, LlmTask } from '@mana/shared-llm';
export interface ExtractDateInput {
text: string;
/** Reference date for relative parsing ("morgen", "next week"). Defaults to now. */
now?: Date;
}
export type ExtractDateOutput = Date | null;
export const extractDateTask: LlmTask<ExtractDateInput, ExtractDateOutput> = {
name: 'common.extractDate',
minTier: 'none', // works on Tier 0 thanks to the regex fallback
contentClass: 'personal',
displayLabel: 'Datum aus Text erkennen',
async runLlm(input, backend: LlmBackend): Promise<ExtractDateOutput> {
const refIso = (input.now ?? new Date()).toISOString();
const result = await backend.generate({
taskName: extractDateTask.name,
contentClass: extractDateTask.contentClass,
messages: [
{
role: 'system',
content:
'You extract date+time references from short user input. Always respond with strict JSON of the form {"iso":"YYYY-MM-DDTHH:MM:SSZ"} or {"iso":null}. No prose, no markdown.',
},
{
role: 'user',
content: `Reference time: ${refIso}\nUser input: ${input.text}`,
},
],
temperature: 0,
maxTokens: 80,
});
try {
// Strip markdown fences if a less-disciplined model added them
const cleaned = result.content.replace(/```(?:json)?|```/g, '').trim();
const parsed = JSON.parse(cleaned) as { iso: string | null };
return parsed.iso ? new Date(parsed.iso) : null;
} catch {
return null;
}
},
async runRules(input): Promise<ExtractDateOutput> {
const text = input.text.toLowerCase().trim();
const now = input.now ?? new Date();
// "heute" / "today"
if (/\b(heute|today)\b/.test(text)) {
return withTime(new Date(now), text);
}
// "morgen" / "tomorrow"
if (/\b(morgen|tomorrow)\b/.test(text)) {
const d = new Date(now);
d.setDate(d.getDate() + 1);
return withTime(d, text);
}
// "übermorgen" / "day after tomorrow"
if (/\b(übermorgen|day after tomorrow)\b/.test(text)) {
const d = new Date(now);
d.setDate(d.getDate() + 2);
return withTime(d, text);
}
// "in N tagen" / "in N days"
const inDays = text.match(/\bin (\d+) (tagen|days?)\b/);
if (inDays) {
const d = new Date(now);
d.setDate(d.getDate() + parseInt(inDays[1], 10));
return withTime(d, text);
}
// Explicit ISO date "2026-04-09" or "2026-04-09T14:00"
const iso = text.match(/(\d{4}-\d{2}-\d{2}(?:t\d{2}:\d{2}(?::\d{2})?)?)/);
if (iso) {
const d = new Date(iso[1]);
if (!Number.isNaN(d.getTime())) return d;
}
return null;
},
};
/** Apply a "HH:MM" or "HH Uhr" time hint to a date if found in the text. */
function withTime(date: Date, text: string): Date {
const hhmm = text.match(/\b(\d{1,2}):(\d{2})\b/);
if (hhmm) {
date.setHours(parseInt(hhmm[1], 10), parseInt(hhmm[2], 10), 0, 0);
return date;
}
const hhUhr = text.match(/\b(\d{1,2})\s*uhr\b/);
if (hhUhr) {
date.setHours(parseInt(hhUhr[1], 10), 0, 0, 0);
return date;
}
// No time hint — keep the original time-of-day
return date;
}

View file

@ -0,0 +1,57 @@
/**
* summarizeTextTask produces a short summary of a longer piece of
* text. Used for things like dream entries, voice memo transcripts,
* meeting notes.
*
* Has NO runRules() implementation: a meaningful summary genuinely
* requires an LLM, and a fake "first sentence + ellipsis" fallback
* would mislead the user. Tasks without a runRules forces the user
* to actually pick a higher tier in settings and the orchestrator's
* canRun() will return false for them when they're on Tier 0.
*
* minTier is set to 'browser' rather than 'mana-server' because Gemma
* 4 E2B handles short summarization tasks well in the browser. For
* very long inputs (>4k tokens) the task could escalate to
* mana-server via a per-task override.
*/
import type { LlmBackend, LlmTask } from '@mana/shared-llm';
export interface SummarizeInput {
text: string;
/** Approximate target length in sentences. Default 3. */
sentences?: number;
}
export type SummarizeOutput = string;
export const summarizeTextTask: LlmTask<SummarizeInput, SummarizeOutput> = {
name: 'common.summarize',
minTier: 'browser', // genuinely needs an LLM — no rules-based equivalent
contentClass: 'personal',
displayLabel: 'Text zusammenfassen',
async runLlm(input, backend: LlmBackend): Promise<SummarizeOutput> {
const sentences = input.sentences ?? 3;
const result = await backend.generate({
taskName: summarizeTextTask.name,
contentClass: summarizeTextTask.contentClass,
messages: [
{
role: 'system',
content: `Du fasst Text in ${sentences} prägnanten Sätzen zusammen. Behalte die wichtigsten Fakten und Beschlüsse, lasse Füller weg. Kein Markdown, keine Aufzählungen, keine Vorrede — nur die Zusammenfassung.`,
},
{ role: 'user', content: input.text },
],
temperature: 0.3,
maxTokens: 500,
});
return result.content.trim();
},
// No runRules — this task is impossible without an LLM. The
// orchestrator's canRun() will return false for users on Tier 0,
// and modules using this task should hide their summarize button
// when canRun() is false.
};

View file

@ -99,10 +99,9 @@ async function patchScene(
id: string,
patch: Partial<Pick<LocalWorkbenchScene, 'name' | 'icon' | 'openApps' | 'order'>>
) {
await db.table<LocalWorkbenchScene>(TABLE).update(id, {
...patch,
updatedAt: nowIso(),
});
// Strip Svelte 5 $state proxies — IndexedDB's structured clone can't serialize them.
const clean = $state.snapshot({ ...patch, updatedAt: nowIso() });
await db.table<LocalWorkbenchScene>(TABLE).update(id, clean);
}
async function patchActiveScene(fn: (apps: WorkbenchSceneApp[]) => WorkbenchSceneApp[]) {
@ -110,7 +109,9 @@ async function patchActiveScene(fn: (apps: WorkbenchSceneApp[]) => WorkbenchScen
if (!id) return;
const current = scenesState.find((s) => s.id === id);
if (!current) return;
await patchScene(id, { openApps: fn(current.openApps) });
// Snapshot before handing to the mutator so callers operate on plain objects.
const plainApps = $state.snapshot(current.openApps) as WorkbenchSceneApp[];
await patchScene(id, { openApps: fn(plainApps) });
}
// ─── Public store ─────────────────────────────────────────────
@ -191,7 +192,7 @@ export const workbenchScenesStore = {
id,
name: opts.name.trim() || 'Neue Szene',
icon: opts.icon,
openApps: opts.seedApps ? structuredClone(opts.seedApps) : [],
openApps: opts.seedApps ? ($state.snapshot(opts.seedApps) as WorkbenchSceneApp[]) : [],
order: maxOrder + 1,
createdAt: now,
updatedAt: now,

View file

@ -11,6 +11,16 @@
type ModelKey,
} from '@mana/local-llm';
import { hasModelInCache } from '@mana/local-llm';
import {
llmOrchestrator,
llmSettingsState,
updateLlmSettings,
ALL_TIERS,
tierLabel,
type LlmTier,
} from '@mana/shared-llm';
import { extractDateTask } from '$lib/llm-tasks/extract-date';
import { summarizeTextTask } from '$lib/llm-tasks/summarize';
import { marked } from 'marked';
import { Robot, Trash, PaperPlaneRight, ClockCounterClockwise } from '@mana/shared-icons';
@ -43,7 +53,47 @@
// --- State ---
let selectedModel: ModelKey = $state('gemma-4-e2b');
let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' = $state('chat');
let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' | 'router' =
$state('chat');
// --- Router tab state ---
const settings = $derived(llmSettingsState.current);
let routerInput = $state('Treffen mit Sara morgen 14:30');
let routerRunning = $state(false);
let routerResult = $state<{
value: unknown;
source: string;
latencyMs: number;
attempted: string[];
} | null>(null);
let routerError = $state<string | null>(null);
function toggleAllowedTier(tier: LlmTier) {
const current = settings.allowedTiers;
const next = current.includes(tier) ? current.filter((t) => t !== tier) : [...current, tier];
updateLlmSettings({ allowedTiers: next });
}
async function runRouterTask(task: typeof extractDateTask | typeof summarizeTextTask) {
routerRunning = true;
routerResult = null;
routerError = null;
try {
const input = task === extractDateTask ? { text: routerInput } : { text: routerInput };
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = await llmOrchestrator.run(task as any, input);
routerResult = {
value: result.value,
source: result.source,
latencyMs: result.latencyMs,
attempted: result.attempted,
};
} catch (err) {
routerError = err instanceof Error ? `${err.name}: ${err.message}` : String(err);
} finally {
routerRunning = false;
}
}
const supported = isLocalLlmSupported();
const status = getLocalLlmStatus();
@ -581,7 +631,7 @@
<!-- Tabs -->
<div class="mb-4 flex gap-1 rounded-lg border border-border bg-card p-1">
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }] as tab}
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }, { id: 'router', label: 'Router' }] as tab}
<button
onclick={() => (activeTab = tab.id as typeof activeTab)}
class="flex-1 rounded-md px-3 py-1.5 text-sm font-medium transition-colors {activeTab ===
@ -1134,5 +1184,100 @@
{/if}
</div>
{/if}
<!-- Router Tab — exercises the @mana/shared-llm tiered orchestrator -->
{#if activeTab === 'router'}
<div class="flex flex-col gap-4">
<div class="rounded-xl border border-border bg-card p-4">
<p class="mb-3 text-sm text-muted-foreground">
Smoke-Test für den tiered LLM-Router. Wähle welche Tiers der Orchestrator benutzen darf
— der Router wählt dann pro Task die erste passende Schicht aus deiner Liste.
</p>
<div class="mb-4">
<div class="mb-2 text-xs font-medium text-muted-foreground">Erlaubte Tiers</div>
<div class="flex flex-wrap gap-2">
{#each ALL_TIERS as tier}
{@const enabled = settings.allowedTiers.includes(tier)}
<button
onclick={() => toggleAllowedTier(tier)}
class="rounded-lg border px-3 py-1.5 text-xs font-medium transition-colors {enabled
? 'border-primary bg-primary/20 text-primary'
: 'border-border bg-background text-muted-foreground hover:text-foreground'}"
>
{tierLabel(tier)}
</button>
{/each}
</div>
<div class="mt-2 text-xs text-muted-foreground">
Aktuell: {settings.allowedTiers.length === 0
? 'keine LLM-Tiers — nur Tier 0 (Regeln)'
: settings.allowedTiers.map(tierLabel).join(' → ')}
</div>
</div>
<input
type="text"
bind:value={routerInput}
placeholder="Eingabetext für den Task..."
class="mb-3 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
/>
<div class="flex flex-wrap gap-2">
<button
onclick={() => runRouterTask(extractDateTask)}
disabled={routerRunning || !routerInput.trim()}
class="rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
>
extractDate (hat T0-Fallback)
</button>
<button
onclick={() => runRouterTask(summarizeTextTask)}
disabled={routerRunning || !routerInput.trim()}
class="rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
>
summarize (kein T0)
</button>
</div>
<div class="mt-2 text-xs text-muted-foreground">
extractDate.canRun: {llmOrchestrator.canRun(extractDateTask)} · summarize.canRun: {llmOrchestrator.canRun(
summarizeTextTask
)}
</div>
</div>
{#if routerError}
<div class="rounded-xl border border-red-500/30 bg-red-500/10 p-4">
<div class="text-sm font-medium text-red-400">Task fehlgeschlagen</div>
<div class="mt-1 font-mono text-xs text-red-300">{routerError}</div>
</div>
{/if}
{#if routerResult}
<div class="rounded-xl border border-border bg-card p-4">
<div class="mb-2 flex items-center gap-2">
<span
class="rounded-full border border-primary/40 bg-primary/10 px-2 py-0.5 text-xs font-medium text-primary"
>
{tierLabel(routerResult.source as LlmTier)}
</span>
<span class="text-xs text-muted-foreground">{routerResult.latencyMs} ms</span>
{#if routerResult.attempted.length > 1}
<span class="text-xs text-muted-foreground"
>(versucht: {routerResult.attempted.join(' → ')})</span
>
{/if}
</div>
<pre
class="overflow-x-auto rounded-lg bg-background p-3 font-mono text-xs text-foreground">{JSON.stringify(
routerResult.value,
null,
2
)}</pre>
</div>
{/if}
</div>
{/if}
{/if}
</div>

View file

@ -67,6 +67,7 @@ COPY packages/credits ./packages/credits
COPY packages/spiral-db ./packages/spiral-db
COPY packages/wallpaper-generator ./packages/wallpaper-generator
COPY packages/local-llm ./packages/local-llm
COPY packages/shared-llm ./packages/shared-llm
# Install dependencies (shared packages only - app deps added later)
RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \

View file

@ -1,47 +1,26 @@
{
"name": "@mana/shared-llm",
"version": "1.0.0",
"version": "2.0.0",
"private": true,
"description": "Unified LLM client for all Mana backends via mana-llm service",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"description": "Tiered LLM orchestrator for Mana — routes tasks across rules / browser-edge / mana-server / cloud backends with explicit user-controlled privacy tiers",
"main": "./src/index.ts",
"types": "./src/index.ts",
"exports": {
".": {
"types": "./dist/index.d.ts",
"import": "./dist/index.js",
"require": "./dist/index.js"
},
"./standalone": {
"types": "./dist/standalone.d.ts",
"import": "./dist/standalone.js",
"require": "./dist/standalone.js"
}
".": "./src/index.ts"
},
"scripts": {
"build": "tsc",
"dev": "tsc --watch",
"clean": "rm -rf dist",
"type-check": "tsc --noEmit",
"test": "vitest run"
"clean": "rm -rf dist"
},
"dependencies": {
"@nestjs/common": "^10.0.0 || ^11.0.0",
"@nestjs/config": "^3.0.0 || ^4.0.0",
"@nestjs/core": "^10.0.0 || ^11.0.0",
"reflect-metadata": "^0.1.13 || ^0.2.0",
"rxjs": "^7.0.0"
},
"peerDependencies": {
"@nestjs/common": "^10.0.0 || ^11.0.0",
"@nestjs/config": "^3.0.0 || ^4.0.0",
"@nestjs/core": "^10.0.0 || ^11.0.0"
"@mana/local-llm": "workspace:*"
},
"devDependencies": {
"@types/node": "^20.0.0",
"typescript": "^5.0.0",
"vitest": "^4.1.2"
"@types/node": "^24.10.1",
"svelte": "^5.0.0",
"typescript": "^5.9.3"
},
"files": [
"dist"
]
"peerDependencies": {
"svelte": "^5.0.0"
}
}

View file

@ -1,119 +0,0 @@
import { describe, it, expect } from 'vitest';
import { extractJson } from '../utils/json-extractor';
describe('extractJson', () => {
it('parses direct JSON object', () => {
const result = extractJson('{"name": "test", "value": 42}');
expect(result).toEqual({ name: 'test', value: 42 });
});
it('parses direct JSON array', () => {
const result = extractJson('[1, 2, 3]');
expect(result).toEqual([1, 2, 3]);
});
it('strips markdown json code fence', () => {
const input = '```json\n{"category": "bug", "title": "Fix login"}\n```';
const result = extractJson(input);
expect(result).toEqual({ category: 'bug', title: 'Fix login' });
});
it('strips markdown code fence without json label', () => {
const input = '```\n{"key": "value"}\n```';
const result = extractJson(input);
expect(result).toEqual({ key: 'value' });
});
it('extracts JSON from surrounding text', () => {
const input =
'Here is the analysis:\n{"confidence": 0.95, "species": "Rose"}\nHope this helps!';
const result = extractJson(input);
expect(result).toEqual({ confidence: 0.95, species: 'Rose' });
});
it('extracts JSON array from surrounding text', () => {
const input = 'The items are: [1, 2, 3] as requested.';
const result = extractJson(input);
expect(result).toEqual([1, 2, 3]);
});
it('handles nested JSON objects', () => {
const input = '{"outer": {"inner": {"deep": true}}, "list": [1, 2]}';
const result = extractJson(input);
expect(result).toEqual({ outer: { inner: { deep: true } }, list: [1, 2] });
});
it('handles JSON with escaped quotes in strings', () => {
const input = '{"text": "He said \\"hello\\""}';
const result = extractJson(input);
expect(result).toEqual({ text: 'He said "hello"' });
});
it('handles JSON with braces inside strings', () => {
const input = 'Result: {"code": "if (x) { return }"}';
const result = extractJson(input);
expect(result).toEqual({ code: 'if (x) { return }' });
});
it('trims whitespace before parsing', () => {
const input = ' \n {"key": "value"} \n ';
const result = extractJson(input);
expect(result).toEqual({ key: 'value' });
});
it('applies validation function on success', () => {
const validate = (data: unknown) => {
const obj = data as { name: string };
if (!obj.name) throw new Error('missing name');
return obj;
};
const result = extractJson('{"name": "test"}', validate);
expect(result).toEqual({ name: 'test' });
});
it('throws when validation fails', () => {
const validate = (data: unknown) => {
const obj = data as { name?: string };
if (!obj.name) throw new Error('missing name');
return obj;
};
expect(() => extractJson('{"value": 123}', validate)).toThrow();
});
it('throws on completely invalid input', () => {
expect(() => extractJson('This is just plain text with no JSON')).toThrow(
'Failed to extract JSON'
);
});
it('throws on empty input', () => {
expect(() => extractJson('')).toThrow('Failed to extract JSON');
});
it('handles real-world LLM response with preamble', () => {
const input = `Based on my analysis, here is the result:
\`\`\`json
{
"foods": [
{"name": "Apple", "calories": 95, "protein": 0.5}
],
"totalCalories": 95,
"confidence": 0.9
}
\`\`\`
This analysis is based on the image provided.`;
const result = extractJson<{ foods: unknown[]; totalCalories: number }>(input);
expect(result.totalCalories).toBe(95);
expect(result.foods).toHaveLength(1);
});
it('prefers object over array when both exist', () => {
// Direct parse fails, fence fails, tries object first
const input = 'Some text {"key": "val"} and [1, 2, 3]';
const result = extractJson(input);
expect(result).toEqual({ key: 'val' });
});
});

View file

@ -1,277 +0,0 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { LlmClient } from '../llm-client';
import type { ResolvedLlmOptions } from '../interfaces/llm-options.interface';
import type { ChatCompletionResponse } from '../types/openai-compat.types';
const mockFetch = vi.fn();
vi.stubGlobal('fetch', mockFetch);
const DEFAULT_OPTIONS: ResolvedLlmOptions = {
manaLlmUrl: 'http://localhost:3025',
defaultModel: 'ollama/gemma3:4b',
defaultVisionModel: 'ollama/llava:7b',
timeout: 30_000,
maxRetries: 0, // No retries in tests for simplicity
debug: false,
};
function mockCompletionResponse(
content: string,
model = 'ollama/gemma3:4b'
): ChatCompletionResponse {
return {
id: 'chatcmpl-test123',
object: 'chat.completion',
created: Date.now(),
model,
choices: [{ index: 0, message: { role: 'assistant', content }, finish_reason: 'stop' }],
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
};
}
function mockFetchOk(body: unknown): void {
mockFetch.mockResolvedValueOnce({
ok: true,
status: 200,
json: () => Promise.resolve(body),
text: () => Promise.resolve(JSON.stringify(body)),
} as unknown as Response);
}
function mockFetchError(status: number, body = ''): void {
mockFetch.mockResolvedValueOnce({
ok: false,
status,
statusText: `Error ${status}`,
json: () => Promise.resolve({}),
text: () => Promise.resolve(body),
} as unknown as Response);
}
describe('LlmClient', () => {
let client: LlmClient;
beforeEach(() => {
vi.clearAllMocks();
client = new LlmClient(DEFAULT_OPTIONS);
});
describe('chat', () => {
it('sends correct request body', async () => {
mockFetchOk(mockCompletionResponse('Hello!'));
await client.chat('Hi there');
expect(mockFetch).toHaveBeenCalledTimes(1);
const [url, init] = mockFetch.mock.calls[0];
expect(url).toBe('http://localhost:3025/v1/chat/completions');
const body = JSON.parse(init.body);
expect(body.model).toBe('ollama/gemma3:4b');
expect(body.messages).toEqual([{ role: 'user', content: 'Hi there' }]);
expect(body.stream).toBe(false);
});
it('includes system prompt when provided', async () => {
mockFetchOk(mockCompletionResponse('Response'));
await client.chat('Question', { systemPrompt: 'You are helpful.' });
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.messages).toEqual([
{ role: 'system', content: 'You are helpful.' },
{ role: 'user', content: 'Question' },
]);
});
it('uses custom model and temperature', async () => {
mockFetchOk(mockCompletionResponse('Response'));
await client.chat('Prompt', { model: 'openrouter/gpt-4o', temperature: 0.3 });
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.model).toBe('openrouter/gpt-4o');
expect(body.temperature).toBe(0.3);
});
it('returns ChatResult with content and usage', async () => {
mockFetchOk(mockCompletionResponse('Generated text'));
const result = await client.chat('Prompt');
expect(result.content).toBe('Generated text');
expect(result.model).toBe('ollama/gemma3:4b');
expect(result.usage.total_tokens).toBe(30);
expect(result.latencyMs).toBeGreaterThanOrEqual(0);
});
it('throws on error response', async () => {
mockFetchError(500, 'Internal Server Error');
await expect(client.chat('Prompt')).rejects.toThrow('mana-llm error 500');
});
});
describe('json', () => {
it('extracts JSON from response', async () => {
mockFetchOk(mockCompletionResponse('{"category": "bug", "title": "Fix it"}'));
const result = await client.json<{ category: string; title: string }>('Analyze this');
expect(result.data).toEqual({ category: 'bug', title: 'Fix it' });
expect(result.content).toBe('{"category": "bug", "title": "Fix it"}');
});
it('extracts JSON from markdown-wrapped response', async () => {
mockFetchOk(mockCompletionResponse('```json\n{"key": "value"}\n```'));
const result = await client.json('Parse this');
expect(result.data).toEqual({ key: 'value' });
});
it('applies validation function', async () => {
mockFetchOk(mockCompletionResponse('{"name": "test"}'));
const validate = (data: unknown) => {
const obj = data as { name: string };
if (typeof obj.name !== 'string') throw new Error('invalid');
return obj;
};
const result = await client.json('Prompt', { validate });
expect(result.data.name).toBe('test');
});
it('retries JSON extraction on parse failure', async () => {
// First attempt returns bad JSON, second returns good
mockFetchOk(mockCompletionResponse('not json at all'));
mockFetchOk(mockCompletionResponse('{"valid": true}'));
const result = await client.json('Prompt', { jsonRetries: 1 });
expect(result.data).toEqual({ valid: true });
expect(mockFetch).toHaveBeenCalledTimes(2);
});
});
describe('vision', () => {
it('builds multimodal message with base64 image', async () => {
mockFetchOk(mockCompletionResponse('A rose'));
await client.vision('What is this?', 'abc123base64', 'image/jpeg');
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.model).toBe('ollama/llava:7b');
expect(body.messages[0].content).toEqual([
{ type: 'text', text: 'What is this?' },
{ type: 'image_url', image_url: { url: 'data:image/jpeg;base64,abc123base64' } },
]);
});
it('uses data URL as-is if already formatted', async () => {
mockFetchOk(mockCompletionResponse('A cat'));
await client.vision('What?', 'data:image/png;base64,xyz');
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
const imageUrl = body.messages[0].content[1].image_url.url;
expect(imageUrl).toBe('data:image/png;base64,xyz');
});
it('uses custom vision model when specified', async () => {
mockFetchOk(mockCompletionResponse('Result'));
await client.vision('Prompt', 'img', 'image/jpeg', {
visionModel: 'ollama/qwen3-vl:4b',
});
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.model).toBe('ollama/qwen3-vl:4b');
});
});
describe('visionJson', () => {
it('extracts JSON from vision response', async () => {
mockFetchOk(mockCompletionResponse('```json\n{"species": "Rose", "confidence": 0.95}\n```'));
const result = await client.visionJson<{ species: string }>(
'Identify plant',
'imgdata',
'image/jpeg'
);
expect(result.data.species).toBe('Rose');
});
});
describe('health', () => {
it('returns health status', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
status: 200,
json: () =>
Promise.resolve({
status: 'healthy',
providers: { ollama: { status: 'healthy' } },
}),
} as unknown as Response);
const health = await client.health();
expect(health.status).toBe('healthy');
});
it('returns unhealthy on network error', async () => {
mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
const health = await client.health();
expect(health.status).toBe('unhealthy');
});
});
describe('listModels', () => {
it('returns model list', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
status: 200,
json: () =>
Promise.resolve({
data: [{ id: 'ollama/gemma3:4b', object: 'model', created: 0, owned_by: 'ollama' }],
}),
} as unknown as Response);
const models = await client.listModels();
expect(models).toHaveLength(1);
expect(models[0].id).toBe('ollama/gemma3:4b');
});
});
describe('chatMessages', () => {
it('sends full message history', async () => {
mockFetchOk(mockCompletionResponse('Answer'));
await client.chatMessages([
{ role: 'system', content: 'Be brief.' },
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi!' },
{ role: 'user', content: 'How are you?' },
]);
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.messages).toHaveLength(4);
});
});
describe('embed', () => {
it('sends embedding request', async () => {
mockFetchOk({
object: 'list',
data: [{ object: 'embedding', index: 0, embedding: [0.1, 0.2, 0.3] }],
model: 'ollama/gemma3:4b',
usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 },
});
const result = await client.embed('Hello world');
expect(result.embeddings).toHaveLength(1);
expect(result.embeddings[0]).toEqual([0.1, 0.2, 0.3]);
});
});
});

View file

@ -1,118 +0,0 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { retryFetch } from '../utils/retry';
// Mock global fetch
const mockFetch = vi.fn();
vi.stubGlobal('fetch', mockFetch);
function mockResponse(status: number, body = ''): Response {
return {
ok: status >= 200 && status < 300,
status,
statusText: `Status ${status}`,
text: () => Promise.resolve(body),
json: () => Promise.resolve(JSON.parse(body || '{}')),
headers: new Headers(),
} as unknown as Response;
}
describe('retryFetch', () => {
beforeEach(() => {
vi.clearAllMocks();
});
it('returns on first successful attempt', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(200, '{"ok": true}'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.ok).toBe(true);
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it('retries on 503 and succeeds', async () => {
mockFetch
.mockResolvedValueOnce(mockResponse(503))
.mockResolvedValueOnce(mockResponse(200, '{}'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.ok).toBe(true);
expect(mockFetch).toHaveBeenCalledTimes(2);
});
it('retries on 429 rate limit', async () => {
mockFetch
.mockResolvedValueOnce(mockResponse(429))
.mockResolvedValueOnce(mockResponse(200, '{}'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.ok).toBe(true);
expect(mockFetch).toHaveBeenCalledTimes(2);
});
it('retries on network error and succeeds', async () => {
mockFetch
.mockRejectedValueOnce(new Error('ECONNREFUSED'))
.mockResolvedValueOnce(mockResponse(200, '{}'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.ok).toBe(true);
expect(mockFetch).toHaveBeenCalledTimes(2);
});
it('does NOT retry on 400 client error', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(400, 'Bad Request'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.status).toBe(400);
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it('does NOT retry on 401 unauthorized', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(401));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.status).toBe(401);
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it('does NOT retry on 404 not found', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(404));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.status).toBe(404);
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it('throws after exhausting all retries', async () => {
mockFetch
.mockResolvedValueOnce(mockResponse(503))
.mockResolvedValueOnce(mockResponse(503))
.mockResolvedValueOnce(mockResponse(503));
await expect(retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 })).rejects.toThrow(
'HTTP 503'
);
expect(mockFetch).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
});
it('throws after exhausting retries on network errors', async () => {
mockFetch
.mockRejectedValueOnce(new Error('ECONNREFUSED'))
.mockRejectedValueOnce(new Error('ECONNREFUSED'));
await expect(retryFetch('http://test', {}, { maxRetries: 1, baseDelay: 10 })).rejects.toThrow(
'ECONNREFUSED'
);
expect(mockFetch).toHaveBeenCalledTimes(2);
});
it('works with maxRetries: 0 (no retries)', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(503));
await expect(retryFetch('http://test', {}, { maxRetries: 0, baseDelay: 10 })).rejects.toThrow();
expect(mockFetch).toHaveBeenCalledTimes(1);
});
});

View file

@ -0,0 +1,62 @@
/**
* Browser-edge backend wraps @mana/local-llm.
*
* Inference happens 100% on the user's device via WebGPU. The model
* (currently Gemma 4 E2B) is a one-time ~500 MB download cached in the
* browser. We do NOT auto-load on backend creation; the user has to
* explicitly trigger a load via the settings page or by using a feature
* that calls `ensureLoaded()`. This avoids surprising 500 MB downloads.
*/
import {
localLLM,
LocalLLMEngine,
loadLocalLlm,
type ChatMessage as LocalChatMessage,
} from '@mana/local-llm';
import { EdgeLoadFailedError } from '../errors';
import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
export class BrowserBackend implements LlmBackend {
readonly tier = 'browser' as const;
isAvailable(): boolean {
return LocalLLMEngine.isSupported();
}
isReady(): boolean {
return localLLM.isReady;
}
/** Trigger the one-time model download + WebGPU initialization.
* Idempotent safe to call repeatedly. Throws EdgeLoadFailedError
* on failure (model corrupt, WebGPU OOM, etc.). */
async ensureLoaded(): Promise<void> {
try {
await loadLocalLlm();
} catch (err) {
throw new EdgeLoadFailedError(err instanceof Error ? err.message : String(err));
}
}
async generate(req: LlmTaskRequest): Promise<GenerateResult> {
await this.ensureLoaded();
const result = await localLLM.generate({
messages: req.messages as LocalChatMessage[],
temperature: req.temperature,
maxTokens: req.maxTokens,
onToken: req.onToken,
});
return {
content: result.content,
usage: {
promptTokens: result.usage.prompt_tokens,
completionTokens: result.usage.completion_tokens,
totalTokens: result.usage.total_tokens,
},
latencyMs: result.latencyMs,
};
}
}

View file

@ -0,0 +1,44 @@
/**
* Cloud backend calls services/mana-llm with a `google/...` model
* string. mana-llm's ProviderRouter recognizes the `google/` prefix
* and routes to its Google Gemini provider, which holds the API key
* server-side (we never expose the key to the browser).
*
* Default model is google/gemini-2.0-flash. The mana-llm google.py
* provider also supports gemini-2.5-pro for higher-quality calls but
* 2.0-flash is the right default fast, cheap, multimodal, plenty
* good for the kind of structured-output tasks Mana modules need.
*
* Cloud is gated by `cloudConsentGiven` in LlmSettings even if a
* user has 'cloud' in their allowedTiers, the orchestrator will skip
* this backend until they've ticked the consent checkbox once.
*/
import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
import { callManaLlmStreaming, resolveLlmBaseUrl } from './remote';
export interface CloudBackendOptions {
/** Gemini model to send. Default 'google/gemini-2.0-flash'. */
defaultModel?: string;
}
export class CloudBackend implements LlmBackend {
readonly tier = 'cloud' as const;
private readonly defaultModel: string;
constructor(opts: CloudBackendOptions = {}) {
this.defaultModel = opts.defaultModel ?? 'google/gemini-2.0-flash';
}
isAvailable(): boolean {
return resolveLlmBaseUrl().length > 0;
}
isReady(): boolean {
return this.isAvailable();
}
async generate(req: LlmTaskRequest): Promise<GenerateResult> {
return callManaLlmStreaming(this.tier, this.defaultModel, req);
}
}

View file

@ -0,0 +1,43 @@
/**
* Mana-server backend calls services/mana-llm with an Ollama model
* string. mana-llm's ProviderRouter recognizes plain Ollama model names
* (no provider prefix) and routes them to the local Ollama instance on
* the Mac Mini, with automatic Gemini fallback if Ollama is overloaded.
*
* The default model is gemma3:4b same model family as the browser
* tier (Gemma 4 E2B is the smaller sibling), so prompts behave
* consistently when a task auto-falls between tiers.
*/
import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
import { callManaLlmStreaming, resolveLlmBaseUrl } from './remote';
export interface ManaServerBackendOptions {
/** Ollama model name to send to mana-llm. Default 'gemma3:4b'. */
defaultModel?: string;
}
export class ManaServerBackend implements LlmBackend {
readonly tier = 'mana-server' as const;
private readonly defaultModel: string;
constructor(opts: ManaServerBackendOptions = {}) {
this.defaultModel = opts.defaultModel ?? 'gemma3:4b';
}
isAvailable(): boolean {
// Available if we have a base URL configured at all. We don't
// ping /health here — that adds latency to every isAvailable()
// check. The first real call will fail loudly if mana-llm is down.
return resolveLlmBaseUrl().length > 0;
}
isReady(): boolean {
// Stateless from our side — assume ready if available.
return this.isAvailable();
}
async generate(req: LlmTaskRequest): Promise<GenerateResult> {
return callManaLlmStreaming(this.tier, this.defaultModel, req);
}
}

View file

@ -0,0 +1,135 @@
/**
* Shared HTTP transport for the mana-server and cloud backends.
*
* Both tiers POST to the same OpenAI-compatible endpoint on
* services/mana-llm they only differ in the `model:` string they
* send (which selects which provider mana-llm internally routes to).
*
* The endpoint is `/v1/chat/completions` and the wire format is
* straight OpenAI SSE: `data: {…}\n\n` lines, terminated by
* `data: [DONE]`. The hand-rolled parser is the same shape as the
* existing playground client (apps/mana/apps/web/src/lib/modules/
* playground/llm.ts) so the two consumers stay aligned and can be
* unified later if we want.
*/
import { BackendUnreachableError, ProviderBlockedError } from '../errors';
import type { LlmTier } from '../tiers';
import type { GenerateResult, LlmTaskRequest } from '../types';
const DEFAULT_LLM_URL = 'http://localhost:3025';
/** Resolve the mana-llm base URL from the window-injected env, falling
* back to localhost. Mirrors the playground client pattern. */
export function resolveLlmBaseUrl(): string {
if (typeof window !== 'undefined') {
const fromWindow = (window as unknown as { __PUBLIC_MANA_LLM_URL__?: string })
.__PUBLIC_MANA_LLM_URL__;
if (fromWindow) return fromWindow.replace(/\/$/, '');
}
return DEFAULT_LLM_URL;
}
/**
* Send a chat completion to mana-llm and yield streaming token deltas.
* The caller is responsible for assembling the final string and tracking
* latency.
*
* `tier` is only used for error tagging both 'mana-server' and 'cloud'
* call the same endpoint with different model strings.
*/
export async function callManaLlmStreaming(
tier: Exclude<LlmTier, 'none' | 'browser'>,
model: string,
req: LlmTaskRequest
): Promise<GenerateResult> {
const url = `${resolveLlmBaseUrl()}/v1/chat/completions`;
const start = performance.now();
let res: Response;
try {
res = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
credentials: 'include', // forwards the Mana auth cookie if present
body: JSON.stringify({
model,
messages: req.messages,
temperature: req.temperature ?? 0.7,
max_tokens: req.maxTokens ?? 1024,
stream: true,
}),
});
} catch (err) {
// Network failure — DNS, refused connection, CORS preflight, etc.
throw new BackendUnreachableError(
tier,
undefined,
err instanceof Error ? err.message : String(err)
);
}
if (!res.ok || !res.body) {
const text = await res.text().catch(() => '');
// 451 = upstream blocked content (we use this convention; Gemini
// safety blocks are mapped to 451 in mana-llm's google provider).
// Other 4xx/5xx are generic server errors.
if (res.status === 451 || /safety|blocked|filter/i.test(text)) {
throw new ProviderBlockedError(tier, text || `HTTP ${res.status}`);
}
throw new BackendUnreachableError(tier, res.status, text);
}
const reader = res.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
let collected = '';
let promptTokens = 0;
let completionTokens = 0;
while (true) {
const { value, done } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
// SSE frames are separated by blank lines.
let sep: number;
while ((sep = buffer.indexOf('\n\n')) !== -1) {
const frame = buffer.slice(0, sep);
buffer = buffer.slice(sep + 2);
for (const line of frame.split('\n')) {
if (!line.startsWith('data:')) continue;
const data = line.slice(5).trim();
if (!data || data === '[DONE]') continue;
try {
const json = JSON.parse(data) as {
choices?: Array<{ delta?: { content?: string } }>;
usage?: { prompt_tokens?: number; completion_tokens?: number };
};
const delta = json.choices?.[0]?.delta?.content;
if (delta) {
collected += delta;
req.onToken?.(delta);
}
if (json.usage) {
promptTokens = json.usage.prompt_tokens ?? promptTokens;
completionTokens = json.usage.completion_tokens ?? completionTokens;
}
} catch {
// Malformed frame — keepalive comment, skip silently.
}
}
}
}
return {
content: collected,
usage: {
promptTokens,
completionTokens,
totalTokens: promptTokens + completionTokens,
},
latencyMs: Math.round(performance.now() - start),
};
}

View file

@ -0,0 +1,80 @@
/**
* Typed error classes for the LLM orchestrator. UI code can `instanceof`
* these to render task-appropriate failure states (retry button, switch
* tier prompt, "blocked by safety filter" notice, etc.).
*/
import type { LlmTier } from './tiers';
export class LlmError extends Error {
constructor(message: string) {
super(message);
this.name = 'LlmError';
}
}
/** No tier from the user's preference list was able to run the task. */
export class NoTierAvailableError extends LlmError {
constructor(
public readonly taskName: string,
public readonly attempted: LlmTier[]
) {
super(`No tier could run task '${taskName}' (attempted: ${attempted.join(', ') || 'none'})`);
this.name = 'NoTierAvailableError';
}
}
/** The user's chosen tier is below the task's declared minimum tier. */
export class TierTooLowError extends LlmError {
constructor(
public readonly taskName: string,
public readonly requiredTier: LlmTier,
public readonly userTier: LlmTier
) {
super(
`Task '${taskName}' requires tier '${requiredTier}' but user is on '${userTier}'. Activate the higher tier in settings.`
);
this.name = 'TierTooLowError';
}
}
/**
* The upstream provider blocked the content (e.g. Gemini safety filter,
* OpenAI moderation). The UI should offer "retry" + "switch to another
* provider" options to the user this is NOT auto-recoverable because
* a different provider might allow the same content (or might not).
*/
export class ProviderBlockedError extends LlmError {
constructor(
public readonly tier: LlmTier,
public readonly providerMessage: string
) {
super(`Provider '${tier}' blocked the request: ${providerMessage}`);
this.name = 'ProviderBlockedError';
}
}
/** Network/server error from a remote tier (mana-server, cloud). */
export class BackendUnreachableError extends LlmError {
constructor(
public readonly tier: LlmTier,
public readonly httpStatus?: number,
details?: string
) {
super(
`Backend '${tier}' is unreachable${httpStatus ? ` (HTTP ${httpStatus})` : ''}${details ? `: ${details}` : ''}`
);
this.name = 'BackendUnreachableError';
}
}
/**
* The browser tier specifically failed to load model download
* interrupted, WebGPU adapter request failed, OOM, etc.
*/
export class EdgeLoadFailedError extends LlmError {
constructor(public readonly cause: string) {
super(`Edge LLM failed to load: ${cause}`);
this.name = 'EdgeLoadFailedError';
}
}

View file

@ -1,39 +1,44 @@
// Module
export { LlmModule } from './llm.module';
export { LlmClientService } from './llm-client.service';
export { LLM_MODULE_OPTIONS } from './llm.constants';
// Core client (for advanced use cases)
export { LlmClient } from './llm-client';
// Interfaces
export type {
LlmModuleOptions,
LlmModuleAsyncOptions,
LlmOptionsFactory,
ResolvedLlmOptions,
} from './interfaces';
export { resolveOptions } from './interfaces';
// Types
// Tiers + types
export { ALL_TIERS, TIER_RANK, tierLabel, type LlmTier } from './tiers';
export type {
CapabilityRequirements,
ChatMessage,
ContentPart,
TextContentPart,
ImageContentPart,
ChatOptions,
JsonOptions,
VisionOptions,
TokenUsage,
ChatResult,
JsonResult,
ModelInfo,
HealthStatus,
ContentClass,
GenerateOptions,
GenerateResult,
LlmBackend,
LlmSettings,
LlmTaskRequest,
LlmTaskResult,
} from './types';
export { DEFAULT_LLM_SETTINGS } from './types';
// Utilities
export { extractJson } from './utils';
// Errors
export {
BackendUnreachableError,
EdgeLoadFailedError,
LlmError,
NoTierAvailableError,
ProviderBlockedError,
TierTooLowError,
} from './errors';
// Metrics
export { LlmMetricsCollector } from './utils';
export type { LlmRequestMetrics, MetricsCallback } from './utils';
// Task contract
export { buildTaskRequest, type LlmTask } from './task';
// Orchestrator (rarely instantiated directly — most consumers use the
// store's singleton instead)
export { LlmOrchestrator, type LlmOrchestratorOptions } from './orchestrator';
// Backends (exported for tests + custom orchestrator setups)
export { BrowserBackend } from './backends/browser';
export { CloudBackend, type CloudBackendOptions } from './backends/cloud';
export { ManaServerBackend, type ManaServerBackendOptions } from './backends/mana-server';
// Singleton store + Svelte 5 reactive hooks
export {
llmOrchestrator,
llmSettingsState,
updateLlmSettings,
useTaskAvailability,
} from './store.svelte';

View file

@ -1,8 +0,0 @@
export type {
LlmModuleOptions,
LlmModuleAsyncOptions,
LlmOptionsFactory,
ResolvedLlmOptions,
} from './llm-options.interface';
export { resolveOptions } from './llm-options.interface';

View file

@ -1,52 +0,0 @@
import type { ModuleMetadata, Type } from '@nestjs/common';
import type { MetricsCallback } from '../utils/metrics';
export interface LlmModuleOptions {
/** mana-llm service URL (default: http://localhost:3025) */
manaLlmUrl?: string;
/** Default text model (default: ollama/gemma3:4b) */
defaultModel?: string;
/** Default vision model (default: ollama/llava:7b) */
defaultVisionModel?: string;
/** Request timeout in ms (default: 120000) */
timeout?: number;
/** Max retries on transient failures (default: 2) */
maxRetries?: number;
/** Enable debug logging (default: false) */
debug?: boolean;
/** Optional callback invoked after every LLM request with metrics */
onMetrics?: MetricsCallback;
}
export interface LlmModuleAsyncOptions extends Pick<ModuleMetadata, 'imports'> {
useExisting?: Type<LlmOptionsFactory>;
useClass?: Type<LlmOptionsFactory>;
useFactory?: (...args: any[]) => Promise<LlmModuleOptions> | LlmModuleOptions;
inject?: any[];
}
export interface LlmOptionsFactory {
createLlmOptions(): Promise<LlmModuleOptions> | LlmModuleOptions;
}
export interface ResolvedLlmOptions {
manaLlmUrl: string;
defaultModel: string;
defaultVisionModel: string;
timeout: number;
maxRetries: number;
debug: boolean;
onMetrics?: MetricsCallback;
}
export function resolveOptions(options: LlmModuleOptions): ResolvedLlmOptions {
return {
manaLlmUrl: options.manaLlmUrl ?? 'http://localhost:3025',
defaultModel: options.defaultModel ?? 'ollama/gemma3:4b',
defaultVisionModel: options.defaultVisionModel ?? 'ollama/llava:7b',
timeout: options.timeout ?? 120_000,
maxRetries: options.maxRetries ?? 2,
debug: options.debug ?? false,
onMetrics: options.onMetrics,
};
}

View file

@ -1,16 +0,0 @@
import { Inject, Injectable } from '@nestjs/common';
import { LlmClient } from './llm-client';
import { LLM_MODULE_OPTIONS } from './llm.constants';
import type { LlmModuleOptions } from './interfaces/llm-options.interface';
import { resolveOptions } from './interfaces/llm-options.interface';
/**
* NestJS injectable wrapper around LlmClient.
* All logic lives in the framework-agnostic LlmClient base class.
*/
@Injectable()
export class LlmClientService extends LlmClient {
constructor(@Inject(LLM_MODULE_OPTIONS) options: LlmModuleOptions) {
super(resolveOptions(options));
}
}

View file

@ -1,392 +0,0 @@
/**
* Framework-agnostic LLM client that communicates with the mana-llm service.
*
* This is the core implementation shared between the NestJS LlmClientService
* and the standalone LlmClient export (for non-NestJS consumers like bot-services).
*/
import type { ResolvedLlmOptions } from './interfaces/llm-options.interface';
import type {
ChatMessage,
ChatOptions,
ChatResult,
JsonOptions,
JsonResult,
VisionOptions,
TokenUsage,
ModelInfo,
HealthStatus,
} from './types/chat.types';
import type {
ChatCompletionRequest,
ChatCompletionResponse,
EmbeddingResponse,
} from './types/openai-compat.types';
import type { LlmRequestMetrics } from './utils/metrics';
import { extractJson } from './utils/json-extractor';
import { retryFetch } from './utils/retry';
function createTimeoutSignal(ms: number): any {
const controller = new AbortController();
setTimeout(() => controller.abort(), ms);
return controller.signal;
}
export class LlmClient {
private readonly baseUrl: string;
private readonly options: ResolvedLlmOptions;
constructor(options: ResolvedLlmOptions) {
this.options = options;
this.baseUrl = options.manaLlmUrl.replace(/\/+$/, '');
}
// ---------------------------------------------------------------------------
// Text Chat
// ---------------------------------------------------------------------------
/** Simple chat with a single prompt string. */
async chat(prompt: string, opts?: ChatOptions): Promise<ChatResult> {
const messages = this.buildMessages(prompt, opts?.systemPrompt);
return this.chatMessages(messages, opts);
}
/** Chat with full message history. */
async chatMessages(messages: ChatMessage[], opts?: ChatOptions): Promise<ChatResult> {
const requestedModel = opts?.model ?? this.options.defaultModel;
const body = this.buildRequest(messages, opts, false);
const start = Date.now();
try {
const response = await this.fetchCompletion(body, opts?.timeout);
const latencyMs = Date.now() - start;
const usage = response.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
this.emitMetrics({
model: requestedModel,
actualModel: response.model,
type: 'chat',
latencyMs,
promptTokens: usage.prompt_tokens,
completionTokens: usage.completion_tokens,
totalTokens: usage.total_tokens,
wasFallback: response.model !== requestedModel && !response.model.endsWith(requestedModel),
success: true,
});
return {
content: response.choices[0]?.message?.content ?? '',
model: response.model,
usage,
latencyMs,
};
} catch (error) {
this.emitMetrics({
model: requestedModel,
actualModel: requestedModel,
type: 'chat',
latencyMs: Date.now() - start,
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
wasFallback: false,
success: false,
error: error instanceof Error ? error.message : String(error),
});
throw error;
}
}
// ---------------------------------------------------------------------------
// Streaming
// ---------------------------------------------------------------------------
/** Streaming chat - returns an async iterable of text tokens. */
async *chatStream(prompt: string, opts?: ChatOptions): AsyncIterable<string> {
const messages = this.buildMessages(prompt, opts?.systemPrompt);
yield* this.chatStreamMessages(messages, opts);
}
/** Streaming chat with full message history. */
async *chatStreamMessages(messages: ChatMessage[], opts?: ChatOptions): AsyncIterable<string> {
const body = this.buildRequest(messages, opts, true);
const timeout = opts?.timeout ?? this.options.timeout;
const response = await retryFetch(
`${this.baseUrl}/v1/chat/completions`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal: createTimeoutSignal(timeout),
},
{ maxRetries: this.options.maxRetries }
);
if (!response.ok) {
const text = await response.text().catch(() => '');
throw new Error(`mana-llm stream error ${response.status}: ${text}`);
}
if (!response.body) {
throw new Error('mana-llm returned no response body for stream');
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || !trimmed.startsWith('data: ')) continue;
const data = trimmed.slice(6);
if (data === '[DONE]') return;
try {
const chunk = JSON.parse(data);
const content = chunk.choices?.[0]?.delta?.content;
if (content) yield content;
} catch {
// Skip unparseable chunks
}
}
}
} finally {
reader.releaseLock();
}
}
// ---------------------------------------------------------------------------
// Structured JSON Output
// ---------------------------------------------------------------------------
/** Chat that extracts and parses JSON from the response. */
async json<T = unknown>(prompt: string, opts?: JsonOptions<T>): Promise<JsonResult<T>> {
const messages = this.buildMessages(prompt, opts?.systemPrompt);
return this.jsonMessages<T>(messages, opts);
}
/** JSON extraction from full message history. */
async jsonMessages<T = unknown>(
messages: ChatMessage[],
opts?: JsonOptions<T>
): Promise<JsonResult<T>> {
const maxAttempts = (opts?.jsonRetries ?? 1) + 1;
let lastError: Error | undefined;
for (let attempt = 0; attempt < maxAttempts; attempt++) {
const result = await this.chatMessages(messages, opts);
try {
const data = extractJson<T>(result.content, opts?.validate);
return { ...result, data };
} catch (error) {
lastError = error instanceof Error ? error : new Error(String(error));
if (this.options.debug) {
console.warn(
`[shared-llm] JSON extraction attempt ${attempt + 1}/${maxAttempts} failed:`,
lastError.message
);
}
}
}
throw lastError ?? new Error('JSON extraction failed');
}
// ---------------------------------------------------------------------------
// Vision
// ---------------------------------------------------------------------------
/** Analyze an image with a text prompt. */
async vision(
prompt: string,
imageBase64: string,
mimeType?: string,
opts?: VisionOptions
): Promise<ChatResult> {
const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
const model = opts?.visionModel ?? this.options.defaultVisionModel;
return this.chatMessages(messages, { ...opts, model });
}
/** Vision + JSON extraction. */
async visionJson<T = unknown>(
prompt: string,
imageBase64: string,
mimeType?: string,
opts?: VisionOptions & JsonOptions<T>
): Promise<JsonResult<T>> {
const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
const model = opts?.visionModel ?? this.options.defaultVisionModel;
return this.jsonMessages<T>(messages, { ...opts, model });
}
// ---------------------------------------------------------------------------
// Embeddings
// ---------------------------------------------------------------------------
/** Generate embeddings for text input. */
async embed(
input: string | string[],
model?: string
): Promise<{ embeddings: number[][]; usage: TokenUsage }> {
const response = await retryFetch(
`${this.baseUrl}/v1/embeddings`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: model ?? this.options.defaultModel,
input,
}),
signal: createTimeoutSignal(this.options.timeout),
},
{ maxRetries: this.options.maxRetries }
);
if (!response.ok) {
const text = await response.text().catch(() => '');
throw new Error(`mana-llm embeddings error ${response.status}: ${text}`);
}
const data = (await response.json()) as EmbeddingResponse;
return {
embeddings: data.data.map((d) => d.embedding),
usage: data.usage,
};
}
// ---------------------------------------------------------------------------
// Health & Models
// ---------------------------------------------------------------------------
/** Check mana-llm health and provider status. */
async health(): Promise<HealthStatus> {
try {
const response = await fetch(`${this.baseUrl}/health`, {
signal: createTimeoutSignal(5_000),
});
if (!response.ok) {
return { status: 'unhealthy', providers: {} };
}
return (await response.json()) as HealthStatus;
} catch {
return { status: 'unhealthy', providers: {} };
}
}
/** List available models from all providers. */
async listModels(): Promise<ModelInfo[]> {
const response = await fetch(`${this.baseUrl}/v1/models`, {
signal: createTimeoutSignal(10_000),
});
if (!response.ok) {
throw new Error(`mana-llm models error ${response.status}`);
}
const data = (await response.json()) as { data: ModelInfo[] };
return data.data ?? [];
}
// ---------------------------------------------------------------------------
// Private helpers
// ---------------------------------------------------------------------------
private buildMessages(prompt: string, systemPrompt?: string): ChatMessage[] {
const messages: ChatMessage[] = [];
if (systemPrompt) {
messages.push({ role: 'system', content: systemPrompt });
}
messages.push({ role: 'user', content: prompt });
return messages;
}
private buildVisionMessages(
prompt: string,
imageBase64: string,
mimeType?: string,
systemPrompt?: string
): ChatMessage[] {
const mime = mimeType ?? 'image/jpeg';
const dataUrl = imageBase64.startsWith('data:')
? imageBase64
: `data:${mime};base64,${imageBase64}`;
const messages: ChatMessage[] = [];
if (systemPrompt) {
messages.push({ role: 'system', content: systemPrompt });
}
messages.push({
role: 'user',
content: [
{ type: 'text', text: prompt },
{ type: 'image_url', image_url: { url: dataUrl } },
],
});
return messages;
}
private buildRequest(
messages: ChatMessage[],
opts: ChatOptions | undefined,
stream: boolean
): ChatCompletionRequest {
const request: ChatCompletionRequest = {
model: opts?.model ?? this.options.defaultModel,
messages,
stream,
};
if (opts?.temperature !== undefined) request.temperature = opts.temperature;
if (opts?.maxTokens !== undefined) request.max_tokens = opts.maxTokens;
return request;
}
private async fetchCompletion(
body: ChatCompletionRequest,
timeoutOverride?: number
): Promise<ChatCompletionResponse> {
const timeout = timeoutOverride ?? this.options.timeout;
const response = await retryFetch(
`${this.baseUrl}/v1/chat/completions`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal: createTimeoutSignal(timeout),
},
{ maxRetries: this.options.maxRetries }
);
if (!response.ok) {
const text = await response.text().catch(() => '');
throw new Error(`mana-llm error ${response.status}: ${text}`);
}
return (await response.json()) as ChatCompletionResponse;
}
private emitMetrics(metrics: LlmRequestMetrics): void {
if (this.options.onMetrics) {
try {
this.options.onMetrics(metrics);
} catch {
// Never let metrics callback break the request
}
}
}
}

View file

@ -1 +0,0 @@
export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';

View file

@ -1,80 +0,0 @@
import { DynamicModule, Module, Global, Provider } from '@nestjs/common';
import type {
LlmModuleOptions,
LlmModuleAsyncOptions,
LlmOptionsFactory,
} from './interfaces/llm-options.interface';
import { LlmClientService } from './llm-client.service';
import { LLM_MODULE_OPTIONS } from './llm.constants';
@Global()
@Module({})
export class LlmModule {
static forRoot(options: LlmModuleOptions): DynamicModule {
return {
module: LlmModule,
providers: [
{
provide: LLM_MODULE_OPTIONS,
useValue: options,
},
LlmClientService,
],
exports: [LLM_MODULE_OPTIONS, LlmClientService],
};
}
static forRootAsync(options: LlmModuleAsyncOptions): DynamicModule {
const asyncProviders = this.createAsyncProviders(options);
return {
module: LlmModule,
imports: options.imports || [],
providers: [...asyncProviders, LlmClientService],
exports: [LLM_MODULE_OPTIONS, LlmClientService],
};
}
private static createAsyncProviders(options: LlmModuleAsyncOptions): Provider[] {
if (options.useFactory) {
return [
{
provide: LLM_MODULE_OPTIONS,
useFactory: options.useFactory,
inject: options.inject || [],
},
];
}
const useClass = options.useClass;
const useExisting = options.useExisting;
if (useClass) {
return [
{
provide: LLM_MODULE_OPTIONS,
useFactory: async (optionsFactory: LlmOptionsFactory) =>
await optionsFactory.createLlmOptions(),
inject: [useClass],
},
{
provide: useClass,
useClass,
},
];
}
if (useExisting) {
return [
{
provide: LLM_MODULE_OPTIONS,
useFactory: async (optionsFactory: LlmOptionsFactory) =>
await optionsFactory.createLlmOptions(),
inject: [useExisting],
},
];
}
return [];
}
}

View file

@ -0,0 +1,258 @@
/**
* LlmOrchestrator routes LlmTasks across the four privacy tiers
* (none / browser / mana-server / cloud) according to the user's
* settings, the task's minimum tier, and the input's content class.
*
* Routing rules applied in this exact order:
*
* 1. If the task's minTier is above the user's HIGHEST allowed tier,
* we cannot run the LLM path at all. Try runRules() if defined,
* else throw TierTooLowError.
*
* 2. If contentClass is 'sensitive', strip 'mana-server' and 'cloud'
* from the candidate tier list sensitive content NEVER leaves
* the device, even if the user has these tiers enabled globally.
* This is the privacy backstop the user can't accidentally
* override task-by-task.
*
* 3. If a per-task override exists in settings.taskOverrides, use it
* verbatim (still subject to rule 2 task overrides cannot
* bypass the sensitive-content backstop).
*
* 4. Otherwise, pick the FIRST tier from settings.allowedTiers that
* (a) is in the candidate set after rules 1+2, (b) has an
* available + ready backend, (c) the cloud-consent gate is
* satisfied if it's the cloud tier.
*
* 5. Run the task on the chosen backend.
*
* 6. If the run throws and settings.fallbackToRulesOnError is true
* and the task has a runRules() implementation, fall back to
* rules. We do NOT auto-fall to a different LLM tier on error
* the user explicitly chose this tier and silently switching
* providers would be a privacy/trust break.
*
* 7. If everything fails, throw NoTierAvailableError. UI catches it
* and offers a "retry" / "switch tier" / "enter manually" prompt.
*/
import {
BackendUnreachableError,
NoTierAvailableError,
ProviderBlockedError,
TierTooLowError,
} from './errors';
import type { LlmTask } from './task';
import type { LlmTier } from './tiers';
import { TIER_RANK } from './tiers';
import type { LlmBackend, LlmSettings, LlmTaskRequest, LlmTaskResult } from './types';
export interface LlmOrchestratorOptions {
settings: LlmSettings;
backends: LlmBackend[];
}
export class LlmOrchestrator {
private settings: LlmSettings;
private backendsByTier: Map<LlmTier, LlmBackend>;
constructor(opts: LlmOrchestratorOptions) {
this.settings = opts.settings;
this.backendsByTier = new Map();
for (const b of opts.backends) {
this.backendsByTier.set(b.tier, b);
}
}
/** Replace the settings object call this when the user updates
* their preferences in the settings UI. */
updateSettings(settings: LlmSettings): void {
this.settings = settings;
}
/** Public read-only view for UI components that want to react to
* the current settings (e.g. the tier selector). */
getSettings(): Readonly<LlmSettings> {
return this.settings;
}
/**
* Can the user (with their current settings) run this task at all?
* The UI uses this to decide whether to show a feature button as
* enabled / disabled / hidden. Does NOT check backend readiness
* that's a per-call concern. Just checks "is there any conceivable
* tier in the user's allowedTiers that satisfies task.minTier and
* is permitted for task.contentClass?".
*/
canRun<TIn, TOut>(task: LlmTask<TIn, TOut>): boolean {
// Rules-only tasks always run if they have a fallback
if (task.minTier === 'none') return true;
if (task.runRules) return true;
const candidates = this.candidateTiers(task);
return candidates.some((t) => {
const backend = this.backendsByTier.get(t);
return backend?.isAvailable() ?? false;
});
}
/**
* Run the task. Honors the routing rules above. The returned
* LlmTaskResult includes which tier actually ran, plus a trail
* of tiers that were attempted and skipped before it.
*/
async run<TIn, TOut>(task: LlmTask<TIn, TOut>, input: TIn): Promise<LlmTaskResult<TOut>> {
const start = performance.now();
const attempted: LlmTier[] = [];
// Rule 1: tier-too-low check
const userMaxTier = this.userMaxTier();
if (TIER_RANK[task.minTier] > TIER_RANK[userMaxTier]) {
if (task.runRules) {
const value = await task.runRules(input);
return {
value,
source: 'none',
latencyMs: Math.round(performance.now() - start),
attempted: ['none'],
};
}
throw new TierTooLowError(task.name, task.minTier, userMaxTier);
}
// Rules-2-3: candidate tier list and per-task override
const candidates = this.candidateTiers(task);
const override = this.settings.taskOverrides[task.name];
const orderedTiers = override ? [override].filter((t) => candidates.includes(t)) : candidates;
// Rule 4-5: try the first runnable tier
for (const tier of orderedTiers) {
if (tier === 'none') {
if (task.runRules) {
const value = await task.runRules(input);
return {
value,
source: 'none',
latencyMs: Math.round(performance.now() - start),
attempted: [...attempted, 'none'],
};
}
attempted.push('none');
continue;
}
// Cloud-consent gate
if (tier === 'cloud' && !this.settings.cloudConsentGiven) {
attempted.push('cloud');
continue;
}
const backend = this.backendsByTier.get(tier);
if (!backend) {
attempted.push(tier);
continue;
}
if (!backend.isAvailable()) {
attempted.push(tier);
continue;
}
const ready = await backend.isReady();
if (!ready) {
attempted.push(tier);
continue;
}
try {
const request = this.buildRequest(task, input);
const generated = await task.runLlm(input, backend);
return {
value: generated,
source: tier,
latencyMs: Math.round(performance.now() - start),
attempted: [...attempted, tier],
};
// `request` is intentionally unused — the task constructs
// its own LlmTaskRequest internally via runLlm. We build
// it here only as a future hook for telemetry.
void request;
} catch (err) {
attempted.push(tier);
// Rule 6: rules-fallback on error
if (
this.settings.fallbackToRulesOnError &&
task.runRules &&
!(err instanceof ProviderBlockedError)
) {
// Provider-blocked errors should NOT silently fall to
// rules — they should bubble up so the UI can offer
// "retry" / "switch tier" prompts. Other errors
// (network failure, OOM, model not loaded) get the
// silent rules fallback.
try {
const value = await task.runRules(input);
return {
value,
source: 'none',
latencyMs: Math.round(performance.now() - start),
attempted: [...attempted, 'none'],
};
} catch {
// rules fallback also failed — re-throw original
throw err;
}
}
// Re-throw provider blocks and unrecoverable errors
if (err instanceof ProviderBlockedError || err instanceof BackendUnreachableError) {
throw err;
}
// Unknown error — try the next tier in the list
continue;
}
}
throw new NoTierAvailableError(task.name, attempted);
}
/** Highest tier in the user's allowedTiers list (by rank). */
private userMaxTier(): LlmTier {
if (this.settings.allowedTiers.length === 0) return 'none';
return this.settings.allowedTiers.reduce(
(max, t) => (TIER_RANK[t] > TIER_RANK[max] ? t : max),
'none' as LlmTier
);
}
/** Candidate tier list after applying rules 1 + 2.
* - Rule 1: only tiers >= task.minTier
* - Rule 2: sensitive content excludes mana-server + cloud
* Also always includes 'none' at the end if the task has runRules. */
private candidateTiers<TIn, TOut>(task: LlmTask<TIn, TOut>): LlmTier[] {
// Start from the user's allowed tiers, in their preference order
let tiers = this.settings.allowedTiers.filter((t) => TIER_RANK[t] >= TIER_RANK[task.minTier]);
// Rule 2: sensitive content backstop
if (task.contentClass === 'sensitive') {
tiers = tiers.filter((t) => t === 'browser');
}
// 'none' is always tail-appended if the task has a rules implementation,
// so the for-loop in run() naturally falls through to it.
if (task.runRules && !tiers.includes('none')) {
tiers.push('none');
}
return tiers;
}
private buildRequest<TIn, TOut>(task: LlmTask<TIn, TOut>, _input: TIn): LlmTaskRequest {
// Right now this is a placeholder — tasks build their own
// LlmTaskRequest inside runLlm. Once we add token-counting
// telemetry we'll move that construction up here so the
// orchestrator can prepend the task metadata uniformly.
return {
taskName: task.name,
contentClass: task.contentClass,
requires: task.requires,
messages: [],
};
}
}

View file

@ -1,30 +0,0 @@
/**
* Standalone exports for non-NestJS consumers (e.g. bot-services).
*
* Usage:
* import { LlmClient } from '@mana/shared-llm/standalone';
* const llm = new LlmClient({ manaLlmUrl: 'http://localhost:3025' });
*/
export { LlmClient } from './llm-client';
export { resolveOptions } from './interfaces/llm-options.interface';
export type { LlmModuleOptions, ResolvedLlmOptions } from './interfaces/llm-options.interface';
// Types
export type {
ChatMessage,
ContentPart,
TextContentPart,
ImageContentPart,
ChatOptions,
JsonOptions,
VisionOptions,
TokenUsage,
ChatResult,
JsonResult,
ModelInfo,
HealthStatus,
} from './types';
// Utilities
export { extractJson } from './utils';

View file

@ -0,0 +1,107 @@
/**
* Svelte 5 reactive store for the LLM orchestrator.
*
* Lives at module-scope as a singleton because there is exactly one
* orchestrator + settings per page session. Settings are persisted to
* localStorage for now (Phase 1) Phase 2 will move them into the
* encrypted IndexedDB settings table once that exists.
*
* Usage in a Svelte 5 component:
*
* import { llmOrchestrator, llmSettingsState, useTaskAvailability } from '@mana/shared-llm';
* import { extractDateTask } from '$lib/llm-tasks/extract-date';
*
* const available = useTaskAvailability(extractDateTask);
* // ... reactively true/false based on settings + backend readiness
*
* {#if available.current}
* <button onclick={() => orchestrator.run(extractDateTask, text)}>...</button>
* {/if}
*/
import { BrowserBackend } from './backends/browser';
import { CloudBackend } from './backends/cloud';
import { ManaServerBackend } from './backends/mana-server';
import { LlmOrchestrator } from './orchestrator';
import type { LlmTask } from './task';
import { DEFAULT_LLM_SETTINGS, type LlmSettings } from './types';
const STORAGE_KEY = 'mana.llm.settings.v1';
/** Load persisted settings, falling back to defaults on first run or
* any parse error. localStorage is fine for Phase 1 small payload,
* not encrypted-sensitive (the user's tier preference is hardly
* secret), and trivial to migrate to IndexedDB later. */
function loadSettings(): LlmSettings {
if (typeof localStorage === 'undefined') return { ...DEFAULT_LLM_SETTINGS };
try {
const raw = localStorage.getItem(STORAGE_KEY);
if (!raw) return { ...DEFAULT_LLM_SETTINGS };
const parsed = JSON.parse(raw) as Partial<LlmSettings>;
return { ...DEFAULT_LLM_SETTINGS, ...parsed };
} catch {
return { ...DEFAULT_LLM_SETTINGS };
}
}
function persistSettings(settings: LlmSettings): void {
if (typeof localStorage === 'undefined') return;
try {
localStorage.setItem(STORAGE_KEY, JSON.stringify(settings));
} catch {
// Quota exceeded or storage disabled — non-fatal, settings just
// won't persist across sessions.
}
}
// ─── Reactive state ──────────────────────────────────────────────
let _settings = $state<LlmSettings>(loadSettings());
// Backends are constructed once per page session. They're stateless
// (or hold their own internal state in the case of BrowserBackend
// pointing at @mana/local-llm's singleton), so a fresh instance per
// orchestrator is fine.
const backends = [new BrowserBackend(), new ManaServerBackend(), new CloudBackend()];
export const llmOrchestrator = new LlmOrchestrator({
settings: _settings,
backends,
});
/** Reactive accessor for the current settings. UI components read
* via `llmSettingsState.current` to get a $state-tracked snapshot. */
export const llmSettingsState = {
get current(): LlmSettings {
return _settings;
},
};
/** Update settings (or part of them). Persists to localStorage and
* pushes the new value into the orchestrator. */
export function updateLlmSettings(patch: Partial<LlmSettings>): void {
_settings = { ..._settings, ...patch };
persistSettings(_settings);
llmOrchestrator.updateSettings(_settings);
}
/**
* Svelte 5 reactive hook: returns `{ current: boolean }` indicating
* whether the given task can run with the user's current settings.
* Reactive against `llmSettingsState` so the UI re-renders when the
* user toggles a tier in the settings page.
*
* Use this to gate feature buttons show them as enabled when the
* task is runnable, disabled (with a tooltip) when not.
*/
export function useTaskAvailability<TIn, TOut>(
task: LlmTask<TIn, TOut>
): { readonly current: boolean } {
return {
get current() {
// Reading _settings here registers the reactive dependency
void _settings;
return llmOrchestrator.canRun(task);
},
};
}

View file

@ -0,0 +1,82 @@
/**
* The LlmTask contract the unit of work modules describe to the
* orchestrator. Tasks bundle:
*
* 1. The LLM-side implementation (used for browser/server/cloud tiers)
* 2. An optional rules-tier fallback (used when the LLM tier is
* unavailable, fails, or the user has opted out of all LLM tiers)
* 3. Routing metadata (minimum tier, content class, capability needs)
*
* Tasks live next to the modules that use them there is intentionally
* no central task registry. The convention is:
*
* apps/mana/apps/web/src/lib/llm-tasks/ cross-module helpers
* apps/mana/apps/web/src/lib/modules/notes/llm-tasks/ notes-specific
*
* The orchestrator never imports tasks directly modules import tasks
* AND the orchestrator and call `orchestrator.run(task, input)`.
*/
import type { LlmTier } from './tiers';
import type { ContentClass, CapabilityRequirements, LlmBackend, LlmTaskRequest } from './types';
export interface LlmTask<TInput, TOutput> {
/**
* Stable identifier for this task. Used for telemetry, per-task
* tier overrides in user settings, and debug logs. Convention is
* `{module}.{action}` e.g. `notes.extractTags`, `todo.parseQuickAdd`.
*/
readonly name: string;
/** Lowest tier this task can produce a useful result on. */
readonly minTier: LlmTier;
/** Privacy class of inputs this task handles. */
readonly contentClass: ContentClass;
/** Capability requirements that exclude tiers/backends that can't satisfy them. */
readonly requires?: CapabilityRequirements;
/**
* User-facing label, shown when telling the user "this task needs
* AI" or "this result was computed via tier X".
*/
readonly displayLabel: string;
/**
* The LLM-based implementation. Builds an LlmTaskRequest from the
* task input and asks the backend to run it, then maps the
* generated text back into the typed TOutput shape (e.g. parses
* JSON, validates a date, looks up a tag).
*/
runLlm(input: TInput, backend: LlmBackend): Promise<TOutput>;
/**
* Optional deterministic fallback runs when no LLM tier is
* available, or when the LLM tier failed and
* `fallbackToRulesOnError` is enabled in user settings.
*
* Returning the typed TOutput indicates success. Throwing means
* the rules implementation also can't handle this input the
* orchestrator will then surface a NoTierAvailableError so the
* UI can ask the user for direct input.
*/
runRules?(input: TInput): Promise<TOutput>;
}
/**
* Helper for tasks that need to construct an LlmTaskRequest from their
* own input. Centralizes the boilerplate so individual tasks don't have
* to redeclare taskName / contentClass / requires every time.
*/
export function buildTaskRequest<TInput, TOutput>(
task: LlmTask<TInput, TOutput>,
overrides: Omit<LlmTaskRequest, 'taskName' | 'contentClass' | 'requires'>
): LlmTaskRequest {
return {
...overrides,
taskName: task.name,
contentClass: task.contentClass,
requires: task.requires,
};
}

View file

@ -0,0 +1,50 @@
/**
* Tier definitions for the Mana LLM orchestrator.
*
* Four tiers, ordered from most-private to least-private:
*
* none Deterministic parsers / heuristics. No LLM at all.
* Always available. Zero cost. Quality varies by task.
*
* browser Gemma 4 E2B running in the user's browser via WebGPU
* (@mana/local-llm). 100% on-device. Requires the
* ~500 MB model to be downloaded once and ~2 GB VRAM.
*
* mana-server services/mana-llm + Ollama on our own infrastructure
* (currently the Mac Mini, gemma3:4b by default).
* Data leaves the device but stays in our control.
*
* cloud services/mana-llm proxied to a third-party provider
* (Google Gemini, configured via google_api_key in the
* mana-llm service env). Data goes to the third party.
*
* The numeric rank is used by the orchestrator to compare a user's
* preferred tier against a task's minimum tier ("can the user even
* run this task?") and is the canonical sort order for the privacy
* gradient.
*/
export type LlmTier = 'none' | 'browser' | 'mana-server' | 'cloud';
export const TIER_RANK: Record<LlmTier, number> = {
none: 0,
browser: 1,
'mana-server': 2,
cloud: 3,
};
export const ALL_TIERS: readonly LlmTier[] = ['none', 'browser', 'mana-server', 'cloud'];
/** Human-readable label, kept here so backends/UI agree on naming. */
export function tierLabel(tier: LlmTier): string {
switch (tier) {
case 'none':
return 'Lokal (ohne KI)';
case 'browser':
return 'Auf deinem Gerät';
case 'mana-server':
return 'Mana-Server';
case 'cloud':
return 'Google Gemini';
}
}

View file

@ -0,0 +1,150 @@
/**
* Shared types for the Mana LLM orchestrator.
*
* These deliberately mirror the surface of @mana/local-llm so that the
* browser tier can pass them straight through, but they are intentionally
* a SUPERSET (with task name, content class, capability requirements,
* rule fallback) so the orchestrator can route intelligently.
*/
import type { LlmTier } from './tiers';
export interface ChatMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
export interface GenerateOptions {
messages: ChatMessage[];
temperature?: number;
maxTokens?: number;
/** Optional streaming callback — called once per emitted token chunk */
onToken?: (token: string) => void;
}
export interface GenerateResult {
content: string;
usage?: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
};
latencyMs: number;
}
/**
* The privacy class of the input being processed. The orchestrator uses
* this to ENFORCE that sensitive content never leaves the device, even
* if the user has globally allowed cloud tiers.
*
* public already public-domain content (e.g. an open URL the user
* wants summarized). Anything is fair game.
* personal the user's own content but routine (a calendar event, a
* todo title). Default for most module tasks. Allowed on
* any tier the user has enabled.
* sensitive explicitly private content (notes flagged sensitive,
* diary entries, dreams, financial data). The orchestrator
* restricts these to {none, browser} regardless of user's
* global settings the user has to explicitly opt out of
* this protection per-task to send sensitive content to
* server/cloud tiers.
*/
export type ContentClass = 'public' | 'personal' | 'sensitive';
export interface CapabilityRequirements {
/** Task needs to receive structured JSON in response */
json?: boolean;
/** Task needs at least this many context tokens (input + output) */
minContextTokens?: number;
/** Task needs streaming support (per-token onToken callbacks) */
streaming?: boolean;
}
/**
* The high-level "I want to do X" descriptor that flows from a module
* to the orchestrator. Concrete LlmTask implementations build these
* internally before delegating to the orchestrator.
*/
export interface LlmTaskRequest extends GenerateOptions {
/** Stable name for analytics + per-task overrides — e.g. "notes.extractTags" */
taskName: string;
contentClass: ContentClass;
requires?: CapabilityRequirements;
}
/**
* The result of running a task through the orchestrator. Carries the
* tier that actually executed (which may differ from the user's
* preferred tier if a fallback kicked in) and the trail of tiers
* that were tried first useful for telemetry and for debugging
* "why did this task end up running on tier X?".
*/
export interface LlmTaskResult<T = string> {
value: T;
source: LlmTier;
latencyMs: number;
/** Tiers that were attempted before `source` succeeded */
attempted: LlmTier[];
}
/**
* Backend interface that the orchestrator talks to. The "none" tier
* does NOT implement this rule-based fallbacks live on each
* concrete LlmTask, not on a backend object.
*/
export interface LlmBackend {
readonly tier: Exclude<LlmTier, 'none'>;
/** Could this backend run AT ALL given the current environment?
* e.g. browser tier checks for WebGPU + user-enabled, server tier
* checks for a configured base URL. */
isAvailable(): boolean;
/** Could this backend run RIGHT NOW? e.g. browser tier checks if
* the model is loaded into VRAM. May return false even when
* isAvailable() is true (model still downloading, server in
* startup, ). */
isReady(): boolean | Promise<boolean>;
/** Run a task. The backend is responsible for actually performing
* the inference and returning the result; it does NOT decide
* whether it SHOULD run (the orchestrator did that). */
generate(req: LlmTaskRequest): Promise<GenerateResult>;
}
/**
* The mutable user preferences that drive routing.
*/
export interface LlmSettings {
/** Tiers the orchestrator is allowed to use, in preference order.
* An empty array means "no AI at all" only Tier 0 (rules) runs. */
allowedTiers: LlmTier[];
/** Per-task overrides keyed by task name, value is the tier to
* use for that task specifically (overrides allowedTiers order). */
taskOverrides: Record<string, LlmTier>;
/** When the user-chosen tier fails to run a task, fall back to
* the rules tier (if the task has a runT0 implementation).
* When false, failures surface as errors instead. */
fallbackToRulesOnError: boolean;
/** Show a small "via Edge / via Server / via Gemini" badge under
* every LLM result. Default true helps the user understand
* where their data went. */
showSourceInUi: boolean;
/** First-time consent for the cloud tier. Until this is true, the
* cloud tier is treated as unavailable even if it's in
* allowedTiers. The user must explicitly tick a "yes I understand
* Google sees my data" checkbox once. */
cloudConsentGiven: boolean;
}
export const DEFAULT_LLM_SETTINGS: LlmSettings = {
allowedTiers: [], // ZERO opt-in by default — every user starts in Tier 0 only
taskOverrides: {},
fallbackToRulesOnError: true,
showSourceInUi: true,
cloudConsentGiven: false,
};

View file

@ -1,100 +0,0 @@
/**
* Core chat types for the LLM client.
* These are the high-level types that consumers interact with.
*/
// ---------------------------------------------------------------------------
// Messages
// ---------------------------------------------------------------------------
export interface TextContentPart {
type: 'text';
text: string;
}
export interface ImageContentPart {
type: 'image_url';
image_url: { url: string };
}
export type ContentPart = TextContentPart | ImageContentPart;
export interface ChatMessage {
role: 'system' | 'user' | 'assistant';
content: string | ContentPart[];
}
// ---------------------------------------------------------------------------
// Options
// ---------------------------------------------------------------------------
export interface ChatOptions {
/** Model to use (default from module config, e.g. "ollama/gemma3:4b") */
model?: string;
/** Sampling temperature 0.0-2.0 */
temperature?: number;
/** Max tokens to generate */
maxTokens?: number;
/** System prompt prepended to messages */
systemPrompt?: string;
/** Request timeout in ms (overrides module default) */
timeout?: number;
}
export interface JsonOptions<T = unknown> extends ChatOptions {
/** Validation function applied to parsed JSON. Should throw on invalid data. */
validate?: (data: unknown) => T;
/** Number of extraction retries on parse failure (default: 1) */
jsonRetries?: number;
}
export interface VisionOptions extends ChatOptions {
/** Vision model override (default from module config, e.g. "ollama/llava:7b") */
visionModel?: string;
}
// ---------------------------------------------------------------------------
// Results
// ---------------------------------------------------------------------------
export interface TokenUsage {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
}
export interface ChatResult {
/** Generated text content */
content: string;
/** Model that was actually used */
model: string;
/** Token usage statistics */
usage: TokenUsage;
/** Request latency in milliseconds */
latencyMs: number;
}
export interface JsonResult<T = unknown> extends ChatResult {
/** Parsed and optionally validated data */
data: T;
}
// ---------------------------------------------------------------------------
// Models
// ---------------------------------------------------------------------------
export interface ModelInfo {
id: string;
object: 'model';
created: number;
owned_by: string;
}
// ---------------------------------------------------------------------------
// Health
// ---------------------------------------------------------------------------
export interface HealthStatus {
status: 'healthy' | 'degraded' | 'unhealthy';
providers: Record<string, unknown>;
}

View file

@ -1,26 +0,0 @@
export type {
ChatMessage,
ContentPart,
TextContentPart,
ImageContentPart,
ChatOptions,
JsonOptions,
VisionOptions,
TokenUsage,
ChatResult,
JsonResult,
ModelInfo,
HealthStatus,
} from './chat.types';
export type {
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionChoice,
ChatCompletionStreamChunk,
StreamChoice,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingData,
ModelsListResponse,
} from './openai-compat.types';

View file

@ -1,97 +0,0 @@
/**
* OpenAI-compatible wire format types matching the mana-llm API contract.
* These are internal types used for HTTP communication - consumers should
* use the high-level types from chat.types.ts instead.
*/
import type { ChatMessage, TokenUsage } from './chat.types';
// ---------------------------------------------------------------------------
// Request (POST /v1/chat/completions)
// ---------------------------------------------------------------------------
export interface ChatCompletionRequest {
model: string;
messages: ChatMessage[];
stream?: boolean;
temperature?: number;
max_tokens?: number;
top_p?: number;
frequency_penalty?: number;
presence_penalty?: number;
stop?: string | string[];
}
// ---------------------------------------------------------------------------
// Response (non-streaming)
// ---------------------------------------------------------------------------
export interface ChatCompletionResponse {
id: string;
object: 'chat.completion';
created: number;
model: string;
choices: ChatCompletionChoice[];
usage: TokenUsage;
}
export interface ChatCompletionChoice {
index: number;
message: { role: 'assistant'; content: string };
finish_reason: 'stop' | 'length' | 'content_filter' | null;
}
// ---------------------------------------------------------------------------
// Response (streaming)
// ---------------------------------------------------------------------------
export interface ChatCompletionStreamChunk {
id: string;
object: 'chat.completion.chunk';
created: number;
model: string;
choices: StreamChoice[];
}
export interface StreamChoice {
index: number;
delta: { role?: 'assistant'; content?: string };
finish_reason: string | null;
}
// ---------------------------------------------------------------------------
// Embeddings
// ---------------------------------------------------------------------------
export interface EmbeddingRequest {
model: string;
input: string | string[];
encoding_format?: 'float' | 'base64';
}
export interface EmbeddingResponse {
object: 'list';
data: EmbeddingData[];
model: string;
usage: TokenUsage;
}
export interface EmbeddingData {
object: 'embedding';
index: number;
embedding: number[];
}
// ---------------------------------------------------------------------------
// Models (GET /v1/models)
// ---------------------------------------------------------------------------
export interface ModelsListResponse {
object: 'list';
data: Array<{
id: string;
object: 'model';
created: number;
owned_by: string;
}>;
}

View file

@ -1,5 +0,0 @@
export { extractJson } from './json-extractor';
export { retryFetch } from './retry';
export type { RetryOptions } from './retry';
export { LlmMetricsCollector } from './metrics';
export type { LlmRequestMetrics, MetricsCallback } from './metrics';

View file

@ -1,94 +0,0 @@
/**
* Extract and parse JSON from LLM responses.
*
* LLMs often wrap JSON in markdown code fences or include extra text.
* This utility handles all common patterns:
* 1. Direct JSON parse
* 2. Markdown ```json ... ``` fences
* 3. First { ... } or [ ... ] block in text
*/
export function extractJson<T = unknown>(text: string, validate?: (data: unknown) => T): T {
const trimmed = text.trim();
// Step 1: Try direct parse
const direct = tryParse<T>(trimmed, validate);
if (direct !== undefined) return direct;
// Step 2: Strip markdown code fences
const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
if (fenceMatch) {
const fenced = tryParse<T>(fenceMatch[1].trim(), validate);
if (fenced !== undefined) return fenced;
}
// Step 3: Find first JSON object
const objectStart = trimmed.indexOf('{');
if (objectStart !== -1) {
const objectStr = extractBalanced(trimmed, objectStart, '{', '}');
if (objectStr) {
const obj = tryParse<T>(objectStr, validate);
if (obj !== undefined) return obj;
}
}
// Step 4: Find first JSON array
const arrayStart = trimmed.indexOf('[');
if (arrayStart !== -1) {
const arrayStr = extractBalanced(trimmed, arrayStart, '[', ']');
if (arrayStr) {
const arr = tryParse<T>(arrayStr, validate);
if (arr !== undefined) return arr;
}
}
throw new Error(`Failed to extract JSON from LLM response: ${trimmed.slice(0, 200)}...`);
}
function tryParse<T>(text: string, validate?: (data: unknown) => T): T | undefined {
try {
const parsed = JSON.parse(text);
return validate ? validate(parsed) : parsed;
} catch {
return undefined;
}
}
/**
* Extract a balanced block starting from the given position.
* Handles nested braces/brackets but not strings with escaped delimiters.
*/
function extractBalanced(text: string, start: number, open: string, close: string): string | null {
let depth = 0;
let inString = false;
let escape = false;
for (let i = start; i < text.length; i++) {
const ch = text[i];
if (escape) {
escape = false;
continue;
}
if (ch === '\\') {
escape = true;
continue;
}
if (ch === '"') {
inString = !inString;
continue;
}
if (inString) continue;
if (ch === open) depth++;
if (ch === close) depth--;
if (depth === 0) {
return text.slice(start, i + 1);
}
}
return null;
}

View file

@ -1,88 +0,0 @@
/**
* Request-level metrics for LLM calls.
*
* Provides an optional callback system that backends can hook into
* for monitoring, logging, or forwarding to Prometheus/Grafana.
*/
export interface LlmRequestMetrics {
/** Model requested (e.g. "ollama/gemma3:4b") */
model: string;
/** Model actually used (may differ if fallback occurred) */
actualModel: string;
/** Request type */
type: 'chat' | 'json' | 'vision' | 'visionJson' | 'embed' | 'stream';
/** Total request duration in ms */
latencyMs: number;
/** Token usage */
promptTokens: number;
completionTokens: number;
totalTokens: number;
/** Whether this request was a fallback (model differs from requested) */
wasFallback: boolean;
/** Whether the request succeeded */
success: boolean;
/** Error message if failed */
error?: string;
}
export type MetricsCallback = (metrics: LlmRequestMetrics) => void;
/**
* Simple in-memory metrics aggregator.
* Useful for health endpoints and debugging.
*/
export class LlmMetricsCollector {
private _totalRequests = 0;
private _totalErrors = 0;
private _totalFallbacks = 0;
private _totalTokens = 0;
private _totalLatencyMs = 0;
private _byModel: Map<string, { requests: number; tokens: number; errors: number }> = new Map();
/** Use as MetricsCallback */
readonly collect = (metrics: LlmRequestMetrics): void => {
this._totalRequests++;
this._totalLatencyMs += metrics.latencyMs;
this._totalTokens += metrics.totalTokens;
if (!metrics.success) this._totalErrors++;
if (metrics.wasFallback) this._totalFallbacks++;
const modelKey = metrics.actualModel;
const existing = this._byModel.get(modelKey) ?? { requests: 0, tokens: 0, errors: 0 };
existing.requests++;
existing.tokens += metrics.totalTokens;
if (!metrics.success) existing.errors++;
this._byModel.set(modelKey, existing);
};
/** Get summary stats for health endpoints / dashboards */
getSummary() {
return {
totalRequests: this._totalRequests,
totalErrors: this._totalErrors,
totalFallbacks: this._totalFallbacks,
totalTokens: this._totalTokens,
averageLatencyMs:
this._totalRequests > 0 ? Math.round(this._totalLatencyMs / this._totalRequests) : 0,
fallbackRate:
this._totalRequests > 0
? Math.round((this._totalFallbacks / this._totalRequests) * 100)
: 0,
errorRate:
this._totalRequests > 0 ? Math.round((this._totalErrors / this._totalRequests) * 100) : 0,
byModel: Object.fromEntries(this._byModel),
};
}
/** Reset all counters */
reset(): void {
this._totalRequests = 0;
this._totalErrors = 0;
this._totalFallbacks = 0;
this._totalTokens = 0;
this._totalLatencyMs = 0;
this._byModel.clear();
}
}

View file

@ -1,51 +0,0 @@
/**
* Fetch wrapper with exponential backoff retry for transient failures.
*
* Retries on: 429 (rate limit), 502, 503, 504 (server errors), network errors.
* Does NOT retry on: 400, 401, 403, 404 (client errors).
*/
const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]);
export interface RetryOptions {
maxRetries: number;
/** Base delay in ms (doubles each retry). Default: 200 */
baseDelay?: number;
}
export async function retryFetch(
url: string,
init: RequestInit,
options: RetryOptions
): Promise<Response> {
const { maxRetries, baseDelay = 200 } = options;
let lastError: Error | undefined;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const response = await fetch(url, init);
if (response.ok || !RETRYABLE_STATUS_CODES.has(response.status)) {
return response;
}
// Retryable status code
lastError = new Error(`HTTP ${response.status}: ${response.statusText}`);
} catch (error) {
// Network error (connection refused, timeout, etc.)
lastError = error instanceof Error ? error : new Error(String(error));
}
// Don't sleep after the last attempt
if (attempt < maxRetries) {
const delay = baseDelay * Math.pow(2, attempt);
await sleep(delay);
}
}
throw lastError ?? new Error('retryFetch exhausted all retries');
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}

View file

@ -1,21 +1,14 @@
{
"compilerOptions": {
"target": "ES2021",
"module": "commonjs",
"lib": ["ES2021"],
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"outDir": "./dist",
"rootDir": "./src",
"target": "ES2022",
"module": "ESNext",
"moduleResolution": "bundler",
"lib": ["ES2022", "DOM"],
"strict": true,
"esModuleInterop": true,
"noEmit": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"moduleResolution": "node",
"experimentalDecorators": true,
"emitDecoratorMetadata": true
"forceConsistentCasingInFileNames": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist"]
"exclude": ["node_modules"]
}

33
pnpm-lock.yaml generated
View file

@ -978,6 +978,9 @@ importers:
'@mana/shared-links':
specifier: workspace:*
version: link:../../../../packages/shared-links
'@mana/shared-llm':
specifier: workspace:*
version: link:../../../../packages/shared-llm
'@mana/shared-stores':
specifier: workspace:*
version: link:../../../../packages/shared-stores
@ -2981,31 +2984,19 @@ importers:
packages/shared-llm:
dependencies:
'@nestjs/common':
specifier: ^10.0.0 || ^11.0.0
version: 10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2)
'@nestjs/config':
specifier: ^3.0.0 || ^4.0.0
version: 3.3.0(@nestjs/common@10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2))(rxjs@7.8.2)
'@nestjs/core':
specifier: ^10.0.0 || ^11.0.0
version: 10.4.22(@nestjs/common@10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/platform-express@10.4.22)(reflect-metadata@0.2.2)(rxjs@7.8.2)
reflect-metadata:
specifier: ^0.1.13 || ^0.2.0
version: 0.2.2
rxjs:
specifier: ^7.0.0
version: 7.8.2
'@mana/local-llm':
specifier: workspace:*
version: link:../local-llm
devDependencies:
'@types/node':
specifier: ^20.0.0
version: 20.19.39
typescript:
specifier: ^24.10.1
version: 24.12.2
svelte:
specifier: ^5.0.0
version: 5.55.1
typescript:
specifier: ^5.9.3
version: 5.9.3
vitest:
specifier: ^4.1.2
version: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@20.19.39)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@20.19.39)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3))
packages/shared-logger:
devDependencies: