mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:01:08 +02:00
fix(mana/web): unwrap $state proxy in workbench-scenes Dexie writes
Adding an app to a workbench scene threw DataCloneError. scenesState is a $state array, so current.openApps was a Svelte 5 proxy and spreading it into a new array left proxy entries inside; IndexedDB's structured clone refuses to serialise those. Snapshot before handing the array to patchScene / createScene so Dexie sees plain objects. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
761851847f
commit
56065c8537
38 changed files with 1415 additions and 1665 deletions
|
|
@ -15,6 +15,7 @@ COPY apps/calc/packages/shared ./apps/calc/packages/shared
|
|||
COPY apps/zitare/packages/content ./apps/zitare/packages/content
|
||||
COPY packages/shared-uload ./packages/shared-uload
|
||||
COPY packages/local-llm ./packages/local-llm
|
||||
COPY packages/shared-llm ./packages/shared-llm
|
||||
|
||||
RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \
|
||||
pnpm install --no-frozen-lockfile --ignore-scripts
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@
|
|||
"@mana/shared-i18n": "workspace:*",
|
||||
"@mana/shared-icons": "workspace:*",
|
||||
"@mana/shared-links": "workspace:*",
|
||||
"@mana/shared-llm": "workspace:*",
|
||||
"@mana/shared-stores": "workspace:*",
|
||||
"@mana/shared-tags": "workspace:*",
|
||||
"@mana/shared-tailwind": "workspace:*",
|
||||
|
|
|
|||
119
apps/mana/apps/web/src/lib/llm-tasks/extract-date.ts
Normal file
119
apps/mana/apps/web/src/lib/llm-tasks/extract-date.ts
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
/**
|
||||
* extractDateTask — pulls an ISO date out of a free-form German/English
|
||||
* string. Used by Quick-Add features that want to recognize phrases like
|
||||
* "morgen 14 Uhr" or "next Tuesday".
|
||||
*
|
||||
* Has a runRules() fallback so it works even on Tier 0 (no AI) — the
|
||||
* fallback uses a hand-rolled regex set covering the most common
|
||||
* shortcuts. It's intentionally narrow: it only catches the patterns it
|
||||
* KNOWS, and returns null otherwise. This is the right semantic for
|
||||
* Tier 0 — "I'm not certain enough to guess" is a valid answer when
|
||||
* the user has explicitly opted out of LLM use.
|
||||
*
|
||||
* For production-grade NL date parsing without an LLM, replacing the
|
||||
* regex stub with chrono-node would be a one-line change in runRules().
|
||||
*/
|
||||
|
||||
import type { LlmBackend, LlmTask } from '@mana/shared-llm';
|
||||
|
||||
export interface ExtractDateInput {
|
||||
text: string;
|
||||
/** Reference date for relative parsing ("morgen", "next week"). Defaults to now. */
|
||||
now?: Date;
|
||||
}
|
||||
|
||||
export type ExtractDateOutput = Date | null;
|
||||
|
||||
export const extractDateTask: LlmTask<ExtractDateInput, ExtractDateOutput> = {
|
||||
name: 'common.extractDate',
|
||||
minTier: 'none', // works on Tier 0 thanks to the regex fallback
|
||||
contentClass: 'personal',
|
||||
displayLabel: 'Datum aus Text erkennen',
|
||||
|
||||
async runLlm(input, backend: LlmBackend): Promise<ExtractDateOutput> {
|
||||
const refIso = (input.now ?? new Date()).toISOString();
|
||||
const result = await backend.generate({
|
||||
taskName: extractDateTask.name,
|
||||
contentClass: extractDateTask.contentClass,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content:
|
||||
'You extract date+time references from short user input. Always respond with strict JSON of the form {"iso":"YYYY-MM-DDTHH:MM:SSZ"} or {"iso":null}. No prose, no markdown.',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `Reference time: ${refIso}\nUser input: ${input.text}`,
|
||||
},
|
||||
],
|
||||
temperature: 0,
|
||||
maxTokens: 80,
|
||||
});
|
||||
|
||||
try {
|
||||
// Strip markdown fences if a less-disciplined model added them
|
||||
const cleaned = result.content.replace(/```(?:json)?|```/g, '').trim();
|
||||
const parsed = JSON.parse(cleaned) as { iso: string | null };
|
||||
return parsed.iso ? new Date(parsed.iso) : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
},
|
||||
|
||||
async runRules(input): Promise<ExtractDateOutput> {
|
||||
const text = input.text.toLowerCase().trim();
|
||||
const now = input.now ?? new Date();
|
||||
|
||||
// "heute" / "today"
|
||||
if (/\b(heute|today)\b/.test(text)) {
|
||||
return withTime(new Date(now), text);
|
||||
}
|
||||
|
||||
// "morgen" / "tomorrow"
|
||||
if (/\b(morgen|tomorrow)\b/.test(text)) {
|
||||
const d = new Date(now);
|
||||
d.setDate(d.getDate() + 1);
|
||||
return withTime(d, text);
|
||||
}
|
||||
|
||||
// "übermorgen" / "day after tomorrow"
|
||||
if (/\b(übermorgen|day after tomorrow)\b/.test(text)) {
|
||||
const d = new Date(now);
|
||||
d.setDate(d.getDate() + 2);
|
||||
return withTime(d, text);
|
||||
}
|
||||
|
||||
// "in N tagen" / "in N days"
|
||||
const inDays = text.match(/\bin (\d+) (tagen|days?)\b/);
|
||||
if (inDays) {
|
||||
const d = new Date(now);
|
||||
d.setDate(d.getDate() + parseInt(inDays[1], 10));
|
||||
return withTime(d, text);
|
||||
}
|
||||
|
||||
// Explicit ISO date "2026-04-09" or "2026-04-09T14:00"
|
||||
const iso = text.match(/(\d{4}-\d{2}-\d{2}(?:t\d{2}:\d{2}(?::\d{2})?)?)/);
|
||||
if (iso) {
|
||||
const d = new Date(iso[1]);
|
||||
if (!Number.isNaN(d.getTime())) return d;
|
||||
}
|
||||
|
||||
return null;
|
||||
},
|
||||
};
|
||||
|
||||
/** Apply a "HH:MM" or "HH Uhr" time hint to a date if found in the text. */
|
||||
function withTime(date: Date, text: string): Date {
|
||||
const hhmm = text.match(/\b(\d{1,2}):(\d{2})\b/);
|
||||
if (hhmm) {
|
||||
date.setHours(parseInt(hhmm[1], 10), parseInt(hhmm[2], 10), 0, 0);
|
||||
return date;
|
||||
}
|
||||
const hhUhr = text.match(/\b(\d{1,2})\s*uhr\b/);
|
||||
if (hhUhr) {
|
||||
date.setHours(parseInt(hhUhr[1], 10), 0, 0, 0);
|
||||
return date;
|
||||
}
|
||||
// No time hint — keep the original time-of-day
|
||||
return date;
|
||||
}
|
||||
57
apps/mana/apps/web/src/lib/llm-tasks/summarize.ts
Normal file
57
apps/mana/apps/web/src/lib/llm-tasks/summarize.ts
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
/**
|
||||
* summarizeTextTask — produces a short summary of a longer piece of
|
||||
* text. Used for things like dream entries, voice memo transcripts,
|
||||
* meeting notes.
|
||||
*
|
||||
* Has NO runRules() implementation: a meaningful summary genuinely
|
||||
* requires an LLM, and a fake "first sentence + ellipsis" fallback
|
||||
* would mislead the user. Tasks without a runRules forces the user
|
||||
* to actually pick a higher tier in settings — and the orchestrator's
|
||||
* canRun() will return false for them when they're on Tier 0.
|
||||
*
|
||||
* minTier is set to 'browser' rather than 'mana-server' because Gemma
|
||||
* 4 E2B handles short summarization tasks well in the browser. For
|
||||
* very long inputs (>4k tokens) the task could escalate to
|
||||
* mana-server via a per-task override.
|
||||
*/
|
||||
|
||||
import type { LlmBackend, LlmTask } from '@mana/shared-llm';
|
||||
|
||||
export interface SummarizeInput {
|
||||
text: string;
|
||||
/** Approximate target length in sentences. Default 3. */
|
||||
sentences?: number;
|
||||
}
|
||||
|
||||
export type SummarizeOutput = string;
|
||||
|
||||
export const summarizeTextTask: LlmTask<SummarizeInput, SummarizeOutput> = {
|
||||
name: 'common.summarize',
|
||||
minTier: 'browser', // genuinely needs an LLM — no rules-based equivalent
|
||||
contentClass: 'personal',
|
||||
displayLabel: 'Text zusammenfassen',
|
||||
|
||||
async runLlm(input, backend: LlmBackend): Promise<SummarizeOutput> {
|
||||
const sentences = input.sentences ?? 3;
|
||||
const result = await backend.generate({
|
||||
taskName: summarizeTextTask.name,
|
||||
contentClass: summarizeTextTask.contentClass,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: `Du fasst Text in ${sentences} prägnanten Sätzen zusammen. Behalte die wichtigsten Fakten und Beschlüsse, lasse Füller weg. Kein Markdown, keine Aufzählungen, keine Vorrede — nur die Zusammenfassung.`,
|
||||
},
|
||||
{ role: 'user', content: input.text },
|
||||
],
|
||||
temperature: 0.3,
|
||||
maxTokens: 500,
|
||||
});
|
||||
|
||||
return result.content.trim();
|
||||
},
|
||||
|
||||
// No runRules — this task is impossible without an LLM. The
|
||||
// orchestrator's canRun() will return false for users on Tier 0,
|
||||
// and modules using this task should hide their summarize button
|
||||
// when canRun() is false.
|
||||
};
|
||||
|
|
@ -99,10 +99,9 @@ async function patchScene(
|
|||
id: string,
|
||||
patch: Partial<Pick<LocalWorkbenchScene, 'name' | 'icon' | 'openApps' | 'order'>>
|
||||
) {
|
||||
await db.table<LocalWorkbenchScene>(TABLE).update(id, {
|
||||
...patch,
|
||||
updatedAt: nowIso(),
|
||||
});
|
||||
// Strip Svelte 5 $state proxies — IndexedDB's structured clone can't serialize them.
|
||||
const clean = $state.snapshot({ ...patch, updatedAt: nowIso() });
|
||||
await db.table<LocalWorkbenchScene>(TABLE).update(id, clean);
|
||||
}
|
||||
|
||||
async function patchActiveScene(fn: (apps: WorkbenchSceneApp[]) => WorkbenchSceneApp[]) {
|
||||
|
|
@ -110,7 +109,9 @@ async function patchActiveScene(fn: (apps: WorkbenchSceneApp[]) => WorkbenchScen
|
|||
if (!id) return;
|
||||
const current = scenesState.find((s) => s.id === id);
|
||||
if (!current) return;
|
||||
await patchScene(id, { openApps: fn(current.openApps) });
|
||||
// Snapshot before handing to the mutator so callers operate on plain objects.
|
||||
const plainApps = $state.snapshot(current.openApps) as WorkbenchSceneApp[];
|
||||
await patchScene(id, { openApps: fn(plainApps) });
|
||||
}
|
||||
|
||||
// ─── Public store ─────────────────────────────────────────────
|
||||
|
|
@ -191,7 +192,7 @@ export const workbenchScenesStore = {
|
|||
id,
|
||||
name: opts.name.trim() || 'Neue Szene',
|
||||
icon: opts.icon,
|
||||
openApps: opts.seedApps ? structuredClone(opts.seedApps) : [],
|
||||
openApps: opts.seedApps ? ($state.snapshot(opts.seedApps) as WorkbenchSceneApp[]) : [],
|
||||
order: maxOrder + 1,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
|
|
|
|||
|
|
@ -11,6 +11,16 @@
|
|||
type ModelKey,
|
||||
} from '@mana/local-llm';
|
||||
import { hasModelInCache } from '@mana/local-llm';
|
||||
import {
|
||||
llmOrchestrator,
|
||||
llmSettingsState,
|
||||
updateLlmSettings,
|
||||
ALL_TIERS,
|
||||
tierLabel,
|
||||
type LlmTier,
|
||||
} from '@mana/shared-llm';
|
||||
import { extractDateTask } from '$lib/llm-tasks/extract-date';
|
||||
import { summarizeTextTask } from '$lib/llm-tasks/summarize';
|
||||
import { marked } from 'marked';
|
||||
import { Robot, Trash, PaperPlaneRight, ClockCounterClockwise } from '@mana/shared-icons';
|
||||
|
||||
|
|
@ -43,7 +53,47 @@
|
|||
|
||||
// --- State ---
|
||||
let selectedModel: ModelKey = $state('gemma-4-e2b');
|
||||
let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' = $state('chat');
|
||||
let activeTab: 'chat' | 'extract' | 'classify' | 'compare' | 'benchmark' | 'router' =
|
||||
$state('chat');
|
||||
|
||||
// --- Router tab state ---
|
||||
const settings = $derived(llmSettingsState.current);
|
||||
let routerInput = $state('Treffen mit Sara morgen 14:30');
|
||||
let routerRunning = $state(false);
|
||||
let routerResult = $state<{
|
||||
value: unknown;
|
||||
source: string;
|
||||
latencyMs: number;
|
||||
attempted: string[];
|
||||
} | null>(null);
|
||||
let routerError = $state<string | null>(null);
|
||||
|
||||
function toggleAllowedTier(tier: LlmTier) {
|
||||
const current = settings.allowedTiers;
|
||||
const next = current.includes(tier) ? current.filter((t) => t !== tier) : [...current, tier];
|
||||
updateLlmSettings({ allowedTiers: next });
|
||||
}
|
||||
|
||||
async function runRouterTask(task: typeof extractDateTask | typeof summarizeTextTask) {
|
||||
routerRunning = true;
|
||||
routerResult = null;
|
||||
routerError = null;
|
||||
try {
|
||||
const input = task === extractDateTask ? { text: routerInput } : { text: routerInput };
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const result = await llmOrchestrator.run(task as any, input);
|
||||
routerResult = {
|
||||
value: result.value,
|
||||
source: result.source,
|
||||
latencyMs: result.latencyMs,
|
||||
attempted: result.attempted,
|
||||
};
|
||||
} catch (err) {
|
||||
routerError = err instanceof Error ? `${err.name}: ${err.message}` : String(err);
|
||||
} finally {
|
||||
routerRunning = false;
|
||||
}
|
||||
}
|
||||
const supported = isLocalLlmSupported();
|
||||
const status = getLocalLlmStatus();
|
||||
|
||||
|
|
@ -581,7 +631,7 @@
|
|||
|
||||
<!-- Tabs -->
|
||||
<div class="mb-4 flex gap-1 rounded-lg border border-border bg-card p-1">
|
||||
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }] as tab}
|
||||
{#each [{ id: 'chat', label: 'Chat' }, { id: 'extract', label: 'JSON Extract' }, { id: 'classify', label: 'Classify' }, { id: 'compare', label: 'Compare' }, { id: 'benchmark', label: 'Benchmark' }, { id: 'router', label: 'Router' }] as tab}
|
||||
<button
|
||||
onclick={() => (activeTab = tab.id as typeof activeTab)}
|
||||
class="flex-1 rounded-md px-3 py-1.5 text-sm font-medium transition-colors {activeTab ===
|
||||
|
|
@ -1134,5 +1184,100 @@
|
|||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Router Tab — exercises the @mana/shared-llm tiered orchestrator -->
|
||||
{#if activeTab === 'router'}
|
||||
<div class="flex flex-col gap-4">
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<p class="mb-3 text-sm text-muted-foreground">
|
||||
Smoke-Test für den tiered LLM-Router. Wähle welche Tiers der Orchestrator benutzen darf
|
||||
— der Router wählt dann pro Task die erste passende Schicht aus deiner Liste.
|
||||
</p>
|
||||
|
||||
<div class="mb-4">
|
||||
<div class="mb-2 text-xs font-medium text-muted-foreground">Erlaubte Tiers</div>
|
||||
<div class="flex flex-wrap gap-2">
|
||||
{#each ALL_TIERS as tier}
|
||||
{@const enabled = settings.allowedTiers.includes(tier)}
|
||||
<button
|
||||
onclick={() => toggleAllowedTier(tier)}
|
||||
class="rounded-lg border px-3 py-1.5 text-xs font-medium transition-colors {enabled
|
||||
? 'border-primary bg-primary/20 text-primary'
|
||||
: 'border-border bg-background text-muted-foreground hover:text-foreground'}"
|
||||
>
|
||||
{tierLabel(tier)}
|
||||
</button>
|
||||
{/each}
|
||||
</div>
|
||||
<div class="mt-2 text-xs text-muted-foreground">
|
||||
Aktuell: {settings.allowedTiers.length === 0
|
||||
? 'keine LLM-Tiers — nur Tier 0 (Regeln)'
|
||||
: settings.allowedTiers.map(tierLabel).join(' → ')}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<input
|
||||
type="text"
|
||||
bind:value={routerInput}
|
||||
placeholder="Eingabetext für den Task..."
|
||||
class="mb-3 w-full rounded-lg border border-border bg-background px-3 py-2 text-sm text-foreground placeholder:text-muted-foreground focus:border-primary focus:outline-none"
|
||||
/>
|
||||
|
||||
<div class="flex flex-wrap gap-2">
|
||||
<button
|
||||
onclick={() => runRouterTask(extractDateTask)}
|
||||
disabled={routerRunning || !routerInput.trim()}
|
||||
class="rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
|
||||
>
|
||||
extractDate (hat T0-Fallback)
|
||||
</button>
|
||||
<button
|
||||
onclick={() => runRouterTask(summarizeTextTask)}
|
||||
disabled={routerRunning || !routerInput.trim()}
|
||||
class="rounded-lg bg-primary px-4 py-2 text-sm font-medium text-primary-foreground disabled:opacity-50"
|
||||
>
|
||||
summarize (kein T0)
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="mt-2 text-xs text-muted-foreground">
|
||||
extractDate.canRun: {llmOrchestrator.canRun(extractDateTask)} · summarize.canRun: {llmOrchestrator.canRun(
|
||||
summarizeTextTask
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{#if routerError}
|
||||
<div class="rounded-xl border border-red-500/30 bg-red-500/10 p-4">
|
||||
<div class="text-sm font-medium text-red-400">Task fehlgeschlagen</div>
|
||||
<div class="mt-1 font-mono text-xs text-red-300">{routerError}</div>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
{#if routerResult}
|
||||
<div class="rounded-xl border border-border bg-card p-4">
|
||||
<div class="mb-2 flex items-center gap-2">
|
||||
<span
|
||||
class="rounded-full border border-primary/40 bg-primary/10 px-2 py-0.5 text-xs font-medium text-primary"
|
||||
>
|
||||
{tierLabel(routerResult.source as LlmTier)}
|
||||
</span>
|
||||
<span class="text-xs text-muted-foreground">{routerResult.latencyMs} ms</span>
|
||||
{#if routerResult.attempted.length > 1}
|
||||
<span class="text-xs text-muted-foreground"
|
||||
>(versucht: {routerResult.attempted.join(' → ')})</span
|
||||
>
|
||||
{/if}
|
||||
</div>
|
||||
<pre
|
||||
class="overflow-x-auto rounded-lg bg-background p-3 font-mono text-xs text-foreground">{JSON.stringify(
|
||||
routerResult.value,
|
||||
null,
|
||||
2
|
||||
)}</pre>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
{/if}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -67,6 +67,7 @@ COPY packages/credits ./packages/credits
|
|||
COPY packages/spiral-db ./packages/spiral-db
|
||||
COPY packages/wallpaper-generator ./packages/wallpaper-generator
|
||||
COPY packages/local-llm ./packages/local-llm
|
||||
COPY packages/shared-llm ./packages/shared-llm
|
||||
|
||||
# Install dependencies (shared packages only - app deps added later)
|
||||
RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store \
|
||||
|
|
|
|||
|
|
@ -1,47 +1,26 @@
|
|||
{
|
||||
"name": "@mana/shared-llm",
|
||||
"version": "1.0.0",
|
||||
"version": "2.0.0",
|
||||
"private": true,
|
||||
"description": "Unified LLM client for all Mana backends via mana-llm service",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"description": "Tiered LLM orchestrator for Mana — routes tasks across rules / browser-edge / mana-server / cloud backends with explicit user-controlled privacy tiers",
|
||||
"main": "./src/index.ts",
|
||||
"types": "./src/index.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js",
|
||||
"require": "./dist/index.js"
|
||||
},
|
||||
"./standalone": {
|
||||
"types": "./dist/standalone.d.ts",
|
||||
"import": "./dist/standalone.js",
|
||||
"require": "./dist/standalone.js"
|
||||
}
|
||||
".": "./src/index.ts"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"dev": "tsc --watch",
|
||||
"clean": "rm -rf dist",
|
||||
"type-check": "tsc --noEmit",
|
||||
"test": "vitest run"
|
||||
"clean": "rm -rf dist"
|
||||
},
|
||||
"dependencies": {
|
||||
"@nestjs/common": "^10.0.0 || ^11.0.0",
|
||||
"@nestjs/config": "^3.0.0 || ^4.0.0",
|
||||
"@nestjs/core": "^10.0.0 || ^11.0.0",
|
||||
"reflect-metadata": "^0.1.13 || ^0.2.0",
|
||||
"rxjs": "^7.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@nestjs/common": "^10.0.0 || ^11.0.0",
|
||||
"@nestjs/config": "^3.0.0 || ^4.0.0",
|
||||
"@nestjs/core": "^10.0.0 || ^11.0.0"
|
||||
"@mana/local-llm": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.0.0",
|
||||
"typescript": "^5.0.0",
|
||||
"vitest": "^4.1.2"
|
||||
"@types/node": "^24.10.1",
|
||||
"svelte": "^5.0.0",
|
||||
"typescript": "^5.9.3"
|
||||
},
|
||||
"files": [
|
||||
"dist"
|
||||
]
|
||||
"peerDependencies": {
|
||||
"svelte": "^5.0.0"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,119 +0,0 @@
|
|||
import { describe, it, expect } from 'vitest';
|
||||
import { extractJson } from '../utils/json-extractor';
|
||||
|
||||
describe('extractJson', () => {
|
||||
it('parses direct JSON object', () => {
|
||||
const result = extractJson('{"name": "test", "value": 42}');
|
||||
expect(result).toEqual({ name: 'test', value: 42 });
|
||||
});
|
||||
|
||||
it('parses direct JSON array', () => {
|
||||
const result = extractJson('[1, 2, 3]');
|
||||
expect(result).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it('strips markdown json code fence', () => {
|
||||
const input = '```json\n{"category": "bug", "title": "Fix login"}\n```';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ category: 'bug', title: 'Fix login' });
|
||||
});
|
||||
|
||||
it('strips markdown code fence without json label', () => {
|
||||
const input = '```\n{"key": "value"}\n```';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ key: 'value' });
|
||||
});
|
||||
|
||||
it('extracts JSON from surrounding text', () => {
|
||||
const input =
|
||||
'Here is the analysis:\n{"confidence": 0.95, "species": "Rose"}\nHope this helps!';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ confidence: 0.95, species: 'Rose' });
|
||||
});
|
||||
|
||||
it('extracts JSON array from surrounding text', () => {
|
||||
const input = 'The items are: [1, 2, 3] as requested.';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it('handles nested JSON objects', () => {
|
||||
const input = '{"outer": {"inner": {"deep": true}}, "list": [1, 2]}';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ outer: { inner: { deep: true } }, list: [1, 2] });
|
||||
});
|
||||
|
||||
it('handles JSON with escaped quotes in strings', () => {
|
||||
const input = '{"text": "He said \\"hello\\""}';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ text: 'He said "hello"' });
|
||||
});
|
||||
|
||||
it('handles JSON with braces inside strings', () => {
|
||||
const input = 'Result: {"code": "if (x) { return }"}';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ code: 'if (x) { return }' });
|
||||
});
|
||||
|
||||
it('trims whitespace before parsing', () => {
|
||||
const input = ' \n {"key": "value"} \n ';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ key: 'value' });
|
||||
});
|
||||
|
||||
it('applies validation function on success', () => {
|
||||
const validate = (data: unknown) => {
|
||||
const obj = data as { name: string };
|
||||
if (!obj.name) throw new Error('missing name');
|
||||
return obj;
|
||||
};
|
||||
const result = extractJson('{"name": "test"}', validate);
|
||||
expect(result).toEqual({ name: 'test' });
|
||||
});
|
||||
|
||||
it('throws when validation fails', () => {
|
||||
const validate = (data: unknown) => {
|
||||
const obj = data as { name?: string };
|
||||
if (!obj.name) throw new Error('missing name');
|
||||
return obj;
|
||||
};
|
||||
expect(() => extractJson('{"value": 123}', validate)).toThrow();
|
||||
});
|
||||
|
||||
it('throws on completely invalid input', () => {
|
||||
expect(() => extractJson('This is just plain text with no JSON')).toThrow(
|
||||
'Failed to extract JSON'
|
||||
);
|
||||
});
|
||||
|
||||
it('throws on empty input', () => {
|
||||
expect(() => extractJson('')).toThrow('Failed to extract JSON');
|
||||
});
|
||||
|
||||
it('handles real-world LLM response with preamble', () => {
|
||||
const input = `Based on my analysis, here is the result:
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"foods": [
|
||||
{"name": "Apple", "calories": 95, "protein": 0.5}
|
||||
],
|
||||
"totalCalories": 95,
|
||||
"confidence": 0.9
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
This analysis is based on the image provided.`;
|
||||
|
||||
const result = extractJson<{ foods: unknown[]; totalCalories: number }>(input);
|
||||
expect(result.totalCalories).toBe(95);
|
||||
expect(result.foods).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('prefers object over array when both exist', () => {
|
||||
// Direct parse fails, fence fails, tries object first
|
||||
const input = 'Some text {"key": "val"} and [1, 2, 3]';
|
||||
const result = extractJson(input);
|
||||
expect(result).toEqual({ key: 'val' });
|
||||
});
|
||||
});
|
||||
|
|
@ -1,277 +0,0 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { LlmClient } from '../llm-client';
|
||||
import type { ResolvedLlmOptions } from '../interfaces/llm-options.interface';
|
||||
import type { ChatCompletionResponse } from '../types/openai-compat.types';
|
||||
|
||||
const mockFetch = vi.fn();
|
||||
vi.stubGlobal('fetch', mockFetch);
|
||||
|
||||
const DEFAULT_OPTIONS: ResolvedLlmOptions = {
|
||||
manaLlmUrl: 'http://localhost:3025',
|
||||
defaultModel: 'ollama/gemma3:4b',
|
||||
defaultVisionModel: 'ollama/llava:7b',
|
||||
timeout: 30_000,
|
||||
maxRetries: 0, // No retries in tests for simplicity
|
||||
debug: false,
|
||||
};
|
||||
|
||||
function mockCompletionResponse(
|
||||
content: string,
|
||||
model = 'ollama/gemma3:4b'
|
||||
): ChatCompletionResponse {
|
||||
return {
|
||||
id: 'chatcmpl-test123',
|
||||
object: 'chat.completion',
|
||||
created: Date.now(),
|
||||
model,
|
||||
choices: [{ index: 0, message: { role: 'assistant', content }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
||||
};
|
||||
}
|
||||
|
||||
function mockFetchOk(body: unknown): void {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: () => Promise.resolve(body),
|
||||
text: () => Promise.resolve(JSON.stringify(body)),
|
||||
} as unknown as Response);
|
||||
}
|
||||
|
||||
function mockFetchError(status: number, body = ''): void {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: false,
|
||||
status,
|
||||
statusText: `Error ${status}`,
|
||||
json: () => Promise.resolve({}),
|
||||
text: () => Promise.resolve(body),
|
||||
} as unknown as Response);
|
||||
}
|
||||
|
||||
describe('LlmClient', () => {
|
||||
let client: LlmClient;
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
client = new LlmClient(DEFAULT_OPTIONS);
|
||||
});
|
||||
|
||||
describe('chat', () => {
|
||||
it('sends correct request body', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Hello!'));
|
||||
|
||||
await client.chat('Hi there');
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
const [url, init] = mockFetch.mock.calls[0];
|
||||
expect(url).toBe('http://localhost:3025/v1/chat/completions');
|
||||
|
||||
const body = JSON.parse(init.body);
|
||||
expect(body.model).toBe('ollama/gemma3:4b');
|
||||
expect(body.messages).toEqual([{ role: 'user', content: 'Hi there' }]);
|
||||
expect(body.stream).toBe(false);
|
||||
});
|
||||
|
||||
it('includes system prompt when provided', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Response'));
|
||||
|
||||
await client.chat('Question', { systemPrompt: 'You are helpful.' });
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.messages).toEqual([
|
||||
{ role: 'system', content: 'You are helpful.' },
|
||||
{ role: 'user', content: 'Question' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses custom model and temperature', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Response'));
|
||||
|
||||
await client.chat('Prompt', { model: 'openrouter/gpt-4o', temperature: 0.3 });
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.model).toBe('openrouter/gpt-4o');
|
||||
expect(body.temperature).toBe(0.3);
|
||||
});
|
||||
|
||||
it('returns ChatResult with content and usage', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Generated text'));
|
||||
|
||||
const result = await client.chat('Prompt');
|
||||
|
||||
expect(result.content).toBe('Generated text');
|
||||
expect(result.model).toBe('ollama/gemma3:4b');
|
||||
expect(result.usage.total_tokens).toBe(30);
|
||||
expect(result.latencyMs).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('throws on error response', async () => {
|
||||
mockFetchError(500, 'Internal Server Error');
|
||||
|
||||
await expect(client.chat('Prompt')).rejects.toThrow('mana-llm error 500');
|
||||
});
|
||||
});
|
||||
|
||||
describe('json', () => {
|
||||
it('extracts JSON from response', async () => {
|
||||
mockFetchOk(mockCompletionResponse('{"category": "bug", "title": "Fix it"}'));
|
||||
|
||||
const result = await client.json<{ category: string; title: string }>('Analyze this');
|
||||
|
||||
expect(result.data).toEqual({ category: 'bug', title: 'Fix it' });
|
||||
expect(result.content).toBe('{"category": "bug", "title": "Fix it"}');
|
||||
});
|
||||
|
||||
it('extracts JSON from markdown-wrapped response', async () => {
|
||||
mockFetchOk(mockCompletionResponse('```json\n{"key": "value"}\n```'));
|
||||
|
||||
const result = await client.json('Parse this');
|
||||
expect(result.data).toEqual({ key: 'value' });
|
||||
});
|
||||
|
||||
it('applies validation function', async () => {
|
||||
mockFetchOk(mockCompletionResponse('{"name": "test"}'));
|
||||
|
||||
const validate = (data: unknown) => {
|
||||
const obj = data as { name: string };
|
||||
if (typeof obj.name !== 'string') throw new Error('invalid');
|
||||
return obj;
|
||||
};
|
||||
|
||||
const result = await client.json('Prompt', { validate });
|
||||
expect(result.data.name).toBe('test');
|
||||
});
|
||||
|
||||
it('retries JSON extraction on parse failure', async () => {
|
||||
// First attempt returns bad JSON, second returns good
|
||||
mockFetchOk(mockCompletionResponse('not json at all'));
|
||||
mockFetchOk(mockCompletionResponse('{"valid": true}'));
|
||||
|
||||
const result = await client.json('Prompt', { jsonRetries: 1 });
|
||||
expect(result.data).toEqual({ valid: true });
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('vision', () => {
|
||||
it('builds multimodal message with base64 image', async () => {
|
||||
mockFetchOk(mockCompletionResponse('A rose'));
|
||||
|
||||
await client.vision('What is this?', 'abc123base64', 'image/jpeg');
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.model).toBe('ollama/llava:7b');
|
||||
expect(body.messages[0].content).toEqual([
|
||||
{ type: 'text', text: 'What is this?' },
|
||||
{ type: 'image_url', image_url: { url: 'data:image/jpeg;base64,abc123base64' } },
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses data URL as-is if already formatted', async () => {
|
||||
mockFetchOk(mockCompletionResponse('A cat'));
|
||||
|
||||
await client.vision('What?', 'data:image/png;base64,xyz');
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
const imageUrl = body.messages[0].content[1].image_url.url;
|
||||
expect(imageUrl).toBe('data:image/png;base64,xyz');
|
||||
});
|
||||
|
||||
it('uses custom vision model when specified', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Result'));
|
||||
|
||||
await client.vision('Prompt', 'img', 'image/jpeg', {
|
||||
visionModel: 'ollama/qwen3-vl:4b',
|
||||
});
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.model).toBe('ollama/qwen3-vl:4b');
|
||||
});
|
||||
});
|
||||
|
||||
describe('visionJson', () => {
|
||||
it('extracts JSON from vision response', async () => {
|
||||
mockFetchOk(mockCompletionResponse('```json\n{"species": "Rose", "confidence": 0.95}\n```'));
|
||||
|
||||
const result = await client.visionJson<{ species: string }>(
|
||||
'Identify plant',
|
||||
'imgdata',
|
||||
'image/jpeg'
|
||||
);
|
||||
|
||||
expect(result.data.species).toBe('Rose');
|
||||
});
|
||||
});
|
||||
|
||||
describe('health', () => {
|
||||
it('returns health status', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
status: 'healthy',
|
||||
providers: { ollama: { status: 'healthy' } },
|
||||
}),
|
||||
} as unknown as Response);
|
||||
|
||||
const health = await client.health();
|
||||
expect(health.status).toBe('healthy');
|
||||
});
|
||||
|
||||
it('returns unhealthy on network error', async () => {
|
||||
mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
|
||||
|
||||
const health = await client.health();
|
||||
expect(health.status).toBe('unhealthy');
|
||||
});
|
||||
});
|
||||
|
||||
describe('listModels', () => {
|
||||
it('returns model list', async () => {
|
||||
mockFetch.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: () =>
|
||||
Promise.resolve({
|
||||
data: [{ id: 'ollama/gemma3:4b', object: 'model', created: 0, owned_by: 'ollama' }],
|
||||
}),
|
||||
} as unknown as Response);
|
||||
|
||||
const models = await client.listModels();
|
||||
expect(models).toHaveLength(1);
|
||||
expect(models[0].id).toBe('ollama/gemma3:4b');
|
||||
});
|
||||
});
|
||||
|
||||
describe('chatMessages', () => {
|
||||
it('sends full message history', async () => {
|
||||
mockFetchOk(mockCompletionResponse('Answer'));
|
||||
|
||||
await client.chatMessages([
|
||||
{ role: 'system', content: 'Be brief.' },
|
||||
{ role: 'user', content: 'Hello' },
|
||||
{ role: 'assistant', content: 'Hi!' },
|
||||
{ role: 'user', content: 'How are you?' },
|
||||
]);
|
||||
|
||||
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
|
||||
expect(body.messages).toHaveLength(4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('embed', () => {
|
||||
it('sends embedding request', async () => {
|
||||
mockFetchOk({
|
||||
object: 'list',
|
||||
data: [{ object: 'embedding', index: 0, embedding: [0.1, 0.2, 0.3] }],
|
||||
model: 'ollama/gemma3:4b',
|
||||
usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 },
|
||||
});
|
||||
|
||||
const result = await client.embed('Hello world');
|
||||
expect(result.embeddings).toHaveLength(1);
|
||||
expect(result.embeddings[0]).toEqual([0.1, 0.2, 0.3]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -1,118 +0,0 @@
|
|||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { retryFetch } from '../utils/retry';
|
||||
|
||||
// Mock global fetch
|
||||
const mockFetch = vi.fn();
|
||||
vi.stubGlobal('fetch', mockFetch);
|
||||
|
||||
function mockResponse(status: number, body = ''): Response {
|
||||
return {
|
||||
ok: status >= 200 && status < 300,
|
||||
status,
|
||||
statusText: `Status ${status}`,
|
||||
text: () => Promise.resolve(body),
|
||||
json: () => Promise.resolve(JSON.parse(body || '{}')),
|
||||
headers: new Headers(),
|
||||
} as unknown as Response;
|
||||
}
|
||||
|
||||
describe('retryFetch', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('returns on first successful attempt', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(200, '{"ok": true}'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.ok).toBe(true);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('retries on 503 and succeeds', async () => {
|
||||
mockFetch
|
||||
.mockResolvedValueOnce(mockResponse(503))
|
||||
.mockResolvedValueOnce(mockResponse(200, '{}'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.ok).toBe(true);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('retries on 429 rate limit', async () => {
|
||||
mockFetch
|
||||
.mockResolvedValueOnce(mockResponse(429))
|
||||
.mockResolvedValueOnce(mockResponse(200, '{}'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.ok).toBe(true);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('retries on network error and succeeds', async () => {
|
||||
mockFetch
|
||||
.mockRejectedValueOnce(new Error('ECONNREFUSED'))
|
||||
.mockResolvedValueOnce(mockResponse(200, '{}'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.ok).toBe(true);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('does NOT retry on 400 client error', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(400, 'Bad Request'));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.status).toBe(400);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does NOT retry on 401 unauthorized', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(401));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.status).toBe(401);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does NOT retry on 404 not found', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(404));
|
||||
|
||||
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
|
||||
expect(response.status).toBe(404);
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('throws after exhausting all retries', async () => {
|
||||
mockFetch
|
||||
.mockResolvedValueOnce(mockResponse(503))
|
||||
.mockResolvedValueOnce(mockResponse(503))
|
||||
.mockResolvedValueOnce(mockResponse(503));
|
||||
|
||||
await expect(retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 })).rejects.toThrow(
|
||||
'HTTP 503'
|
||||
);
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
|
||||
});
|
||||
|
||||
it('throws after exhausting retries on network errors', async () => {
|
||||
mockFetch
|
||||
.mockRejectedValueOnce(new Error('ECONNREFUSED'))
|
||||
.mockRejectedValueOnce(new Error('ECONNREFUSED'));
|
||||
|
||||
await expect(retryFetch('http://test', {}, { maxRetries: 1, baseDelay: 10 })).rejects.toThrow(
|
||||
'ECONNREFUSED'
|
||||
);
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('works with maxRetries: 0 (no retries)', async () => {
|
||||
mockFetch.mockResolvedValueOnce(mockResponse(503));
|
||||
|
||||
await expect(retryFetch('http://test', {}, { maxRetries: 0, baseDelay: 10 })).rejects.toThrow();
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
62
packages/shared-llm/src/backends/browser.ts
Normal file
62
packages/shared-llm/src/backends/browser.ts
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Browser-edge backend — wraps @mana/local-llm.
|
||||
*
|
||||
* Inference happens 100% on the user's device via WebGPU. The model
|
||||
* (currently Gemma 4 E2B) is a one-time ~500 MB download cached in the
|
||||
* browser. We do NOT auto-load on backend creation; the user has to
|
||||
* explicitly trigger a load via the settings page or by using a feature
|
||||
* that calls `ensureLoaded()`. This avoids surprising 500 MB downloads.
|
||||
*/
|
||||
|
||||
import {
|
||||
localLLM,
|
||||
LocalLLMEngine,
|
||||
loadLocalLlm,
|
||||
type ChatMessage as LocalChatMessage,
|
||||
} from '@mana/local-llm';
|
||||
import { EdgeLoadFailedError } from '../errors';
|
||||
import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
|
||||
|
||||
export class BrowserBackend implements LlmBackend {
|
||||
readonly tier = 'browser' as const;
|
||||
|
||||
isAvailable(): boolean {
|
||||
return LocalLLMEngine.isSupported();
|
||||
}
|
||||
|
||||
isReady(): boolean {
|
||||
return localLLM.isReady;
|
||||
}
|
||||
|
||||
/** Trigger the one-time model download + WebGPU initialization.
|
||||
* Idempotent — safe to call repeatedly. Throws EdgeLoadFailedError
|
||||
* on failure (model corrupt, WebGPU OOM, etc.). */
|
||||
async ensureLoaded(): Promise<void> {
|
||||
try {
|
||||
await loadLocalLlm();
|
||||
} catch (err) {
|
||||
throw new EdgeLoadFailedError(err instanceof Error ? err.message : String(err));
|
||||
}
|
||||
}
|
||||
|
||||
async generate(req: LlmTaskRequest): Promise<GenerateResult> {
|
||||
await this.ensureLoaded();
|
||||
|
||||
const result = await localLLM.generate({
|
||||
messages: req.messages as LocalChatMessage[],
|
||||
temperature: req.temperature,
|
||||
maxTokens: req.maxTokens,
|
||||
onToken: req.onToken,
|
||||
});
|
||||
|
||||
return {
|
||||
content: result.content,
|
||||
usage: {
|
||||
promptTokens: result.usage.prompt_tokens,
|
||||
completionTokens: result.usage.completion_tokens,
|
||||
totalTokens: result.usage.total_tokens,
|
||||
},
|
||||
latencyMs: result.latencyMs,
|
||||
};
|
||||
}
|
||||
}
|
||||
44
packages/shared-llm/src/backends/cloud.ts
Normal file
44
packages/shared-llm/src/backends/cloud.ts
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Cloud backend — calls services/mana-llm with a `google/...` model
|
||||
* string. mana-llm's ProviderRouter recognizes the `google/` prefix
|
||||
* and routes to its Google Gemini provider, which holds the API key
|
||||
* server-side (we never expose the key to the browser).
|
||||
*
|
||||
* Default model is google/gemini-2.0-flash. The mana-llm google.py
|
||||
* provider also supports gemini-2.5-pro for higher-quality calls but
|
||||
* 2.0-flash is the right default — fast, cheap, multimodal, plenty
|
||||
* good for the kind of structured-output tasks Mana modules need.
|
||||
*
|
||||
* Cloud is gated by `cloudConsentGiven` in LlmSettings — even if a
|
||||
* user has 'cloud' in their allowedTiers, the orchestrator will skip
|
||||
* this backend until they've ticked the consent checkbox once.
|
||||
*/
|
||||
|
||||
import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
|
||||
import { callManaLlmStreaming, resolveLlmBaseUrl } from './remote';
|
||||
|
||||
export interface CloudBackendOptions {
|
||||
/** Gemini model to send. Default 'google/gemini-2.0-flash'. */
|
||||
defaultModel?: string;
|
||||
}
|
||||
|
||||
export class CloudBackend implements LlmBackend {
|
||||
readonly tier = 'cloud' as const;
|
||||
private readonly defaultModel: string;
|
||||
|
||||
constructor(opts: CloudBackendOptions = {}) {
|
||||
this.defaultModel = opts.defaultModel ?? 'google/gemini-2.0-flash';
|
||||
}
|
||||
|
||||
isAvailable(): boolean {
|
||||
return resolveLlmBaseUrl().length > 0;
|
||||
}
|
||||
|
||||
isReady(): boolean {
|
||||
return this.isAvailable();
|
||||
}
|
||||
|
||||
async generate(req: LlmTaskRequest): Promise<GenerateResult> {
|
||||
return callManaLlmStreaming(this.tier, this.defaultModel, req);
|
||||
}
|
||||
}
|
||||
43
packages/shared-llm/src/backends/mana-server.ts
Normal file
43
packages/shared-llm/src/backends/mana-server.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
/**
|
||||
* Mana-server backend — calls services/mana-llm with an Ollama model
|
||||
* string. mana-llm's ProviderRouter recognizes plain Ollama model names
|
||||
* (no provider prefix) and routes them to the local Ollama instance on
|
||||
* the Mac Mini, with automatic Gemini fallback if Ollama is overloaded.
|
||||
*
|
||||
* The default model is gemma3:4b — same model family as the browser
|
||||
* tier (Gemma 4 E2B is the smaller sibling), so prompts behave
|
||||
* consistently when a task auto-falls between tiers.
|
||||
*/
|
||||
|
||||
import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
|
||||
import { callManaLlmStreaming, resolveLlmBaseUrl } from './remote';
|
||||
|
||||
export interface ManaServerBackendOptions {
|
||||
/** Ollama model name to send to mana-llm. Default 'gemma3:4b'. */
|
||||
defaultModel?: string;
|
||||
}
|
||||
|
||||
export class ManaServerBackend implements LlmBackend {
|
||||
readonly tier = 'mana-server' as const;
|
||||
private readonly defaultModel: string;
|
||||
|
||||
constructor(opts: ManaServerBackendOptions = {}) {
|
||||
this.defaultModel = opts.defaultModel ?? 'gemma3:4b';
|
||||
}
|
||||
|
||||
isAvailable(): boolean {
|
||||
// Available if we have a base URL configured at all. We don't
|
||||
// ping /health here — that adds latency to every isAvailable()
|
||||
// check. The first real call will fail loudly if mana-llm is down.
|
||||
return resolveLlmBaseUrl().length > 0;
|
||||
}
|
||||
|
||||
isReady(): boolean {
|
||||
// Stateless from our side — assume ready if available.
|
||||
return this.isAvailable();
|
||||
}
|
||||
|
||||
async generate(req: LlmTaskRequest): Promise<GenerateResult> {
|
||||
return callManaLlmStreaming(this.tier, this.defaultModel, req);
|
||||
}
|
||||
}
|
||||
135
packages/shared-llm/src/backends/remote.ts
Normal file
135
packages/shared-llm/src/backends/remote.ts
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
/**
|
||||
* Shared HTTP transport for the mana-server and cloud backends.
|
||||
*
|
||||
* Both tiers POST to the same OpenAI-compatible endpoint on
|
||||
* services/mana-llm — they only differ in the `model:` string they
|
||||
* send (which selects which provider mana-llm internally routes to).
|
||||
*
|
||||
* The endpoint is `/v1/chat/completions` and the wire format is
|
||||
* straight OpenAI SSE: `data: {…}\n\n` lines, terminated by
|
||||
* `data: [DONE]`. The hand-rolled parser is the same shape as the
|
||||
* existing playground client (apps/mana/apps/web/src/lib/modules/
|
||||
* playground/llm.ts) so the two consumers stay aligned and can be
|
||||
* unified later if we want.
|
||||
*/
|
||||
|
||||
import { BackendUnreachableError, ProviderBlockedError } from '../errors';
|
||||
import type { LlmTier } from '../tiers';
|
||||
import type { GenerateResult, LlmTaskRequest } from '../types';
|
||||
|
||||
const DEFAULT_LLM_URL = 'http://localhost:3025';
|
||||
|
||||
/** Resolve the mana-llm base URL from the window-injected env, falling
|
||||
* back to localhost. Mirrors the playground client pattern. */
|
||||
export function resolveLlmBaseUrl(): string {
|
||||
if (typeof window !== 'undefined') {
|
||||
const fromWindow = (window as unknown as { __PUBLIC_MANA_LLM_URL__?: string })
|
||||
.__PUBLIC_MANA_LLM_URL__;
|
||||
if (fromWindow) return fromWindow.replace(/\/$/, '');
|
||||
}
|
||||
return DEFAULT_LLM_URL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a chat completion to mana-llm and yield streaming token deltas.
|
||||
* The caller is responsible for assembling the final string and tracking
|
||||
* latency.
|
||||
*
|
||||
* `tier` is only used for error tagging — both 'mana-server' and 'cloud'
|
||||
* call the same endpoint with different model strings.
|
||||
*/
|
||||
export async function callManaLlmStreaming(
|
||||
tier: Exclude<LlmTier, 'none' | 'browser'>,
|
||||
model: string,
|
||||
req: LlmTaskRequest
|
||||
): Promise<GenerateResult> {
|
||||
const url = `${resolveLlmBaseUrl()}/v1/chat/completions`;
|
||||
const start = performance.now();
|
||||
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
credentials: 'include', // forwards the Mana auth cookie if present
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: req.messages,
|
||||
temperature: req.temperature ?? 0.7,
|
||||
max_tokens: req.maxTokens ?? 1024,
|
||||
stream: true,
|
||||
}),
|
||||
});
|
||||
} catch (err) {
|
||||
// Network failure — DNS, refused connection, CORS preflight, etc.
|
||||
throw new BackendUnreachableError(
|
||||
tier,
|
||||
undefined,
|
||||
err instanceof Error ? err.message : String(err)
|
||||
);
|
||||
}
|
||||
|
||||
if (!res.ok || !res.body) {
|
||||
const text = await res.text().catch(() => '');
|
||||
// 451 = upstream blocked content (we use this convention; Gemini
|
||||
// safety blocks are mapped to 451 in mana-llm's google provider).
|
||||
// Other 4xx/5xx are generic server errors.
|
||||
if (res.status === 451 || /safety|blocked|filter/i.test(text)) {
|
||||
throw new ProviderBlockedError(tier, text || `HTTP ${res.status}`);
|
||||
}
|
||||
throw new BackendUnreachableError(tier, res.status, text);
|
||||
}
|
||||
|
||||
const reader = res.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let collected = '';
|
||||
let promptTokens = 0;
|
||||
let completionTokens = 0;
|
||||
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
if (done) break;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
// SSE frames are separated by blank lines.
|
||||
let sep: number;
|
||||
while ((sep = buffer.indexOf('\n\n')) !== -1) {
|
||||
const frame = buffer.slice(0, sep);
|
||||
buffer = buffer.slice(sep + 2);
|
||||
|
||||
for (const line of frame.split('\n')) {
|
||||
if (!line.startsWith('data:')) continue;
|
||||
const data = line.slice(5).trim();
|
||||
if (!data || data === '[DONE]') continue;
|
||||
try {
|
||||
const json = JSON.parse(data) as {
|
||||
choices?: Array<{ delta?: { content?: string } }>;
|
||||
usage?: { prompt_tokens?: number; completion_tokens?: number };
|
||||
};
|
||||
const delta = json.choices?.[0]?.delta?.content;
|
||||
if (delta) {
|
||||
collected += delta;
|
||||
req.onToken?.(delta);
|
||||
}
|
||||
if (json.usage) {
|
||||
promptTokens = json.usage.prompt_tokens ?? promptTokens;
|
||||
completionTokens = json.usage.completion_tokens ?? completionTokens;
|
||||
}
|
||||
} catch {
|
||||
// Malformed frame — keepalive comment, skip silently.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: collected,
|
||||
usage: {
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
totalTokens: promptTokens + completionTokens,
|
||||
},
|
||||
latencyMs: Math.round(performance.now() - start),
|
||||
};
|
||||
}
|
||||
80
packages/shared-llm/src/errors.ts
Normal file
80
packages/shared-llm/src/errors.ts
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Typed error classes for the LLM orchestrator. UI code can `instanceof`
|
||||
* these to render task-appropriate failure states (retry button, switch
|
||||
* tier prompt, "blocked by safety filter" notice, etc.).
|
||||
*/
|
||||
|
||||
import type { LlmTier } from './tiers';
|
||||
|
||||
export class LlmError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = 'LlmError';
|
||||
}
|
||||
}
|
||||
|
||||
/** No tier from the user's preference list was able to run the task. */
|
||||
export class NoTierAvailableError extends LlmError {
|
||||
constructor(
|
||||
public readonly taskName: string,
|
||||
public readonly attempted: LlmTier[]
|
||||
) {
|
||||
super(`No tier could run task '${taskName}' (attempted: ${attempted.join(', ') || 'none'})`);
|
||||
this.name = 'NoTierAvailableError';
|
||||
}
|
||||
}
|
||||
|
||||
/** The user's chosen tier is below the task's declared minimum tier. */
|
||||
export class TierTooLowError extends LlmError {
|
||||
constructor(
|
||||
public readonly taskName: string,
|
||||
public readonly requiredTier: LlmTier,
|
||||
public readonly userTier: LlmTier
|
||||
) {
|
||||
super(
|
||||
`Task '${taskName}' requires tier '${requiredTier}' but user is on '${userTier}'. Activate the higher tier in settings.`
|
||||
);
|
||||
this.name = 'TierTooLowError';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The upstream provider blocked the content (e.g. Gemini safety filter,
|
||||
* OpenAI moderation). The UI should offer "retry" + "switch to another
|
||||
* provider" options to the user — this is NOT auto-recoverable because
|
||||
* a different provider might allow the same content (or might not).
|
||||
*/
|
||||
export class ProviderBlockedError extends LlmError {
|
||||
constructor(
|
||||
public readonly tier: LlmTier,
|
||||
public readonly providerMessage: string
|
||||
) {
|
||||
super(`Provider '${tier}' blocked the request: ${providerMessage}`);
|
||||
this.name = 'ProviderBlockedError';
|
||||
}
|
||||
}
|
||||
|
||||
/** Network/server error from a remote tier (mana-server, cloud). */
|
||||
export class BackendUnreachableError extends LlmError {
|
||||
constructor(
|
||||
public readonly tier: LlmTier,
|
||||
public readonly httpStatus?: number,
|
||||
details?: string
|
||||
) {
|
||||
super(
|
||||
`Backend '${tier}' is unreachable${httpStatus ? ` (HTTP ${httpStatus})` : ''}${details ? `: ${details}` : ''}`
|
||||
);
|
||||
this.name = 'BackendUnreachableError';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The browser tier specifically failed to load — model download
|
||||
* interrupted, WebGPU adapter request failed, OOM, etc.
|
||||
*/
|
||||
export class EdgeLoadFailedError extends LlmError {
|
||||
constructor(public readonly cause: string) {
|
||||
super(`Edge LLM failed to load: ${cause}`);
|
||||
this.name = 'EdgeLoadFailedError';
|
||||
}
|
||||
}
|
||||
|
|
@ -1,39 +1,44 @@
|
|||
// Module
|
||||
export { LlmModule } from './llm.module';
|
||||
export { LlmClientService } from './llm-client.service';
|
||||
export { LLM_MODULE_OPTIONS } from './llm.constants';
|
||||
|
||||
// Core client (for advanced use cases)
|
||||
export { LlmClient } from './llm-client';
|
||||
|
||||
// Interfaces
|
||||
export type {
|
||||
LlmModuleOptions,
|
||||
LlmModuleAsyncOptions,
|
||||
LlmOptionsFactory,
|
||||
ResolvedLlmOptions,
|
||||
} from './interfaces';
|
||||
export { resolveOptions } from './interfaces';
|
||||
|
||||
// Types
|
||||
// Tiers + types
|
||||
export { ALL_TIERS, TIER_RANK, tierLabel, type LlmTier } from './tiers';
|
||||
export type {
|
||||
CapabilityRequirements,
|
||||
ChatMessage,
|
||||
ContentPart,
|
||||
TextContentPart,
|
||||
ImageContentPart,
|
||||
ChatOptions,
|
||||
JsonOptions,
|
||||
VisionOptions,
|
||||
TokenUsage,
|
||||
ChatResult,
|
||||
JsonResult,
|
||||
ModelInfo,
|
||||
HealthStatus,
|
||||
ContentClass,
|
||||
GenerateOptions,
|
||||
GenerateResult,
|
||||
LlmBackend,
|
||||
LlmSettings,
|
||||
LlmTaskRequest,
|
||||
LlmTaskResult,
|
||||
} from './types';
|
||||
export { DEFAULT_LLM_SETTINGS } from './types';
|
||||
|
||||
// Utilities
|
||||
export { extractJson } from './utils';
|
||||
// Errors
|
||||
export {
|
||||
BackendUnreachableError,
|
||||
EdgeLoadFailedError,
|
||||
LlmError,
|
||||
NoTierAvailableError,
|
||||
ProviderBlockedError,
|
||||
TierTooLowError,
|
||||
} from './errors';
|
||||
|
||||
// Metrics
|
||||
export { LlmMetricsCollector } from './utils';
|
||||
export type { LlmRequestMetrics, MetricsCallback } from './utils';
|
||||
// Task contract
|
||||
export { buildTaskRequest, type LlmTask } from './task';
|
||||
|
||||
// Orchestrator (rarely instantiated directly — most consumers use the
|
||||
// store's singleton instead)
|
||||
export { LlmOrchestrator, type LlmOrchestratorOptions } from './orchestrator';
|
||||
|
||||
// Backends (exported for tests + custom orchestrator setups)
|
||||
export { BrowserBackend } from './backends/browser';
|
||||
export { CloudBackend, type CloudBackendOptions } from './backends/cloud';
|
||||
export { ManaServerBackend, type ManaServerBackendOptions } from './backends/mana-server';
|
||||
|
||||
// Singleton store + Svelte 5 reactive hooks
|
||||
export {
|
||||
llmOrchestrator,
|
||||
llmSettingsState,
|
||||
updateLlmSettings,
|
||||
useTaskAvailability,
|
||||
} from './store.svelte';
|
||||
|
|
|
|||
|
|
@ -1,8 +0,0 @@
|
|||
export type {
|
||||
LlmModuleOptions,
|
||||
LlmModuleAsyncOptions,
|
||||
LlmOptionsFactory,
|
||||
ResolvedLlmOptions,
|
||||
} from './llm-options.interface';
|
||||
|
||||
export { resolveOptions } from './llm-options.interface';
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
import type { ModuleMetadata, Type } from '@nestjs/common';
|
||||
import type { MetricsCallback } from '../utils/metrics';
|
||||
|
||||
export interface LlmModuleOptions {
|
||||
/** mana-llm service URL (default: http://localhost:3025) */
|
||||
manaLlmUrl?: string;
|
||||
/** Default text model (default: ollama/gemma3:4b) */
|
||||
defaultModel?: string;
|
||||
/** Default vision model (default: ollama/llava:7b) */
|
||||
defaultVisionModel?: string;
|
||||
/** Request timeout in ms (default: 120000) */
|
||||
timeout?: number;
|
||||
/** Max retries on transient failures (default: 2) */
|
||||
maxRetries?: number;
|
||||
/** Enable debug logging (default: false) */
|
||||
debug?: boolean;
|
||||
/** Optional callback invoked after every LLM request with metrics */
|
||||
onMetrics?: MetricsCallback;
|
||||
}
|
||||
|
||||
export interface LlmModuleAsyncOptions extends Pick<ModuleMetadata, 'imports'> {
|
||||
useExisting?: Type<LlmOptionsFactory>;
|
||||
useClass?: Type<LlmOptionsFactory>;
|
||||
useFactory?: (...args: any[]) => Promise<LlmModuleOptions> | LlmModuleOptions;
|
||||
inject?: any[];
|
||||
}
|
||||
|
||||
export interface LlmOptionsFactory {
|
||||
createLlmOptions(): Promise<LlmModuleOptions> | LlmModuleOptions;
|
||||
}
|
||||
|
||||
export interface ResolvedLlmOptions {
|
||||
manaLlmUrl: string;
|
||||
defaultModel: string;
|
||||
defaultVisionModel: string;
|
||||
timeout: number;
|
||||
maxRetries: number;
|
||||
debug: boolean;
|
||||
onMetrics?: MetricsCallback;
|
||||
}
|
||||
|
||||
export function resolveOptions(options: LlmModuleOptions): ResolvedLlmOptions {
|
||||
return {
|
||||
manaLlmUrl: options.manaLlmUrl ?? 'http://localhost:3025',
|
||||
defaultModel: options.defaultModel ?? 'ollama/gemma3:4b',
|
||||
defaultVisionModel: options.defaultVisionModel ?? 'ollama/llava:7b',
|
||||
timeout: options.timeout ?? 120_000,
|
||||
maxRetries: options.maxRetries ?? 2,
|
||||
debug: options.debug ?? false,
|
||||
onMetrics: options.onMetrics,
|
||||
};
|
||||
}
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
import { Inject, Injectable } from '@nestjs/common';
|
||||
import { LlmClient } from './llm-client';
|
||||
import { LLM_MODULE_OPTIONS } from './llm.constants';
|
||||
import type { LlmModuleOptions } from './interfaces/llm-options.interface';
|
||||
import { resolveOptions } from './interfaces/llm-options.interface';
|
||||
|
||||
/**
|
||||
* NestJS injectable wrapper around LlmClient.
|
||||
* All logic lives in the framework-agnostic LlmClient base class.
|
||||
*/
|
||||
@Injectable()
|
||||
export class LlmClientService extends LlmClient {
|
||||
constructor(@Inject(LLM_MODULE_OPTIONS) options: LlmModuleOptions) {
|
||||
super(resolveOptions(options));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,392 +0,0 @@
|
|||
/**
|
||||
* Framework-agnostic LLM client that communicates with the mana-llm service.
|
||||
*
|
||||
* This is the core implementation shared between the NestJS LlmClientService
|
||||
* and the standalone LlmClient export (for non-NestJS consumers like bot-services).
|
||||
*/
|
||||
|
||||
import type { ResolvedLlmOptions } from './interfaces/llm-options.interface';
|
||||
import type {
|
||||
ChatMessage,
|
||||
ChatOptions,
|
||||
ChatResult,
|
||||
JsonOptions,
|
||||
JsonResult,
|
||||
VisionOptions,
|
||||
TokenUsage,
|
||||
ModelInfo,
|
||||
HealthStatus,
|
||||
} from './types/chat.types';
|
||||
import type {
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
EmbeddingResponse,
|
||||
} from './types/openai-compat.types';
|
||||
import type { LlmRequestMetrics } from './utils/metrics';
|
||||
import { extractJson } from './utils/json-extractor';
|
||||
import { retryFetch } from './utils/retry';
|
||||
|
||||
function createTimeoutSignal(ms: number): any {
|
||||
const controller = new AbortController();
|
||||
setTimeout(() => controller.abort(), ms);
|
||||
return controller.signal;
|
||||
}
|
||||
|
||||
export class LlmClient {
|
||||
private readonly baseUrl: string;
|
||||
private readonly options: ResolvedLlmOptions;
|
||||
|
||||
constructor(options: ResolvedLlmOptions) {
|
||||
this.options = options;
|
||||
this.baseUrl = options.manaLlmUrl.replace(/\/+$/, '');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Text Chat
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Simple chat with a single prompt string. */
|
||||
async chat(prompt: string, opts?: ChatOptions): Promise<ChatResult> {
|
||||
const messages = this.buildMessages(prompt, opts?.systemPrompt);
|
||||
return this.chatMessages(messages, opts);
|
||||
}
|
||||
|
||||
/** Chat with full message history. */
|
||||
async chatMessages(messages: ChatMessage[], opts?: ChatOptions): Promise<ChatResult> {
|
||||
const requestedModel = opts?.model ?? this.options.defaultModel;
|
||||
const body = this.buildRequest(messages, opts, false);
|
||||
const start = Date.now();
|
||||
|
||||
try {
|
||||
const response = await this.fetchCompletion(body, opts?.timeout);
|
||||
const latencyMs = Date.now() - start;
|
||||
const usage = response.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
|
||||
|
||||
this.emitMetrics({
|
||||
model: requestedModel,
|
||||
actualModel: response.model,
|
||||
type: 'chat',
|
||||
latencyMs,
|
||||
promptTokens: usage.prompt_tokens,
|
||||
completionTokens: usage.completion_tokens,
|
||||
totalTokens: usage.total_tokens,
|
||||
wasFallback: response.model !== requestedModel && !response.model.endsWith(requestedModel),
|
||||
success: true,
|
||||
});
|
||||
|
||||
return {
|
||||
content: response.choices[0]?.message?.content ?? '',
|
||||
model: response.model,
|
||||
usage,
|
||||
latencyMs,
|
||||
};
|
||||
} catch (error) {
|
||||
this.emitMetrics({
|
||||
model: requestedModel,
|
||||
actualModel: requestedModel,
|
||||
type: 'chat',
|
||||
latencyMs: Date.now() - start,
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0,
|
||||
wasFallback: false,
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Streaming
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Streaming chat - returns an async iterable of text tokens. */
|
||||
async *chatStream(prompt: string, opts?: ChatOptions): AsyncIterable<string> {
|
||||
const messages = this.buildMessages(prompt, opts?.systemPrompt);
|
||||
yield* this.chatStreamMessages(messages, opts);
|
||||
}
|
||||
|
||||
/** Streaming chat with full message history. */
|
||||
async *chatStreamMessages(messages: ChatMessage[], opts?: ChatOptions): AsyncIterable<string> {
|
||||
const body = this.buildRequest(messages, opts, true);
|
||||
const timeout = opts?.timeout ?? this.options.timeout;
|
||||
|
||||
const response = await retryFetch(
|
||||
`${this.baseUrl}/v1/chat/completions`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
signal: createTimeoutSignal(timeout),
|
||||
},
|
||||
{ maxRetries: this.options.maxRetries }
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => '');
|
||||
throw new Error(`mana-llm stream error ${response.status}: ${text}`);
|
||||
}
|
||||
|
||||
if (!response.body) {
|
||||
throw new Error('mana-llm returned no response body for stream');
|
||||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed || !trimmed.startsWith('data: ')) continue;
|
||||
|
||||
const data = trimmed.slice(6);
|
||||
if (data === '[DONE]') return;
|
||||
|
||||
try {
|
||||
const chunk = JSON.parse(data);
|
||||
const content = chunk.choices?.[0]?.delta?.content;
|
||||
if (content) yield content;
|
||||
} catch {
|
||||
// Skip unparseable chunks
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Structured JSON Output
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Chat that extracts and parses JSON from the response. */
|
||||
async json<T = unknown>(prompt: string, opts?: JsonOptions<T>): Promise<JsonResult<T>> {
|
||||
const messages = this.buildMessages(prompt, opts?.systemPrompt);
|
||||
return this.jsonMessages<T>(messages, opts);
|
||||
}
|
||||
|
||||
/** JSON extraction from full message history. */
|
||||
async jsonMessages<T = unknown>(
|
||||
messages: ChatMessage[],
|
||||
opts?: JsonOptions<T>
|
||||
): Promise<JsonResult<T>> {
|
||||
const maxAttempts = (opts?.jsonRetries ?? 1) + 1;
|
||||
let lastError: Error | undefined;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
const result = await this.chatMessages(messages, opts);
|
||||
|
||||
try {
|
||||
const data = extractJson<T>(result.content, opts?.validate);
|
||||
return { ...result, data };
|
||||
} catch (error) {
|
||||
lastError = error instanceof Error ? error : new Error(String(error));
|
||||
if (this.options.debug) {
|
||||
console.warn(
|
||||
`[shared-llm] JSON extraction attempt ${attempt + 1}/${maxAttempts} failed:`,
|
||||
lastError.message
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError ?? new Error('JSON extraction failed');
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Vision
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Analyze an image with a text prompt. */
|
||||
async vision(
|
||||
prompt: string,
|
||||
imageBase64: string,
|
||||
mimeType?: string,
|
||||
opts?: VisionOptions
|
||||
): Promise<ChatResult> {
|
||||
const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
|
||||
const model = opts?.visionModel ?? this.options.defaultVisionModel;
|
||||
return this.chatMessages(messages, { ...opts, model });
|
||||
}
|
||||
|
||||
/** Vision + JSON extraction. */
|
||||
async visionJson<T = unknown>(
|
||||
prompt: string,
|
||||
imageBase64: string,
|
||||
mimeType?: string,
|
||||
opts?: VisionOptions & JsonOptions<T>
|
||||
): Promise<JsonResult<T>> {
|
||||
const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
|
||||
const model = opts?.visionModel ?? this.options.defaultVisionModel;
|
||||
return this.jsonMessages<T>(messages, { ...opts, model });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Embeddings
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Generate embeddings for text input. */
|
||||
async embed(
|
||||
input: string | string[],
|
||||
model?: string
|
||||
): Promise<{ embeddings: number[][]; usage: TokenUsage }> {
|
||||
const response = await retryFetch(
|
||||
`${this.baseUrl}/v1/embeddings`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: model ?? this.options.defaultModel,
|
||||
input,
|
||||
}),
|
||||
signal: createTimeoutSignal(this.options.timeout),
|
||||
},
|
||||
{ maxRetries: this.options.maxRetries }
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => '');
|
||||
throw new Error(`mana-llm embeddings error ${response.status}: ${text}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as EmbeddingResponse;
|
||||
return {
|
||||
embeddings: data.data.map((d) => d.embedding),
|
||||
usage: data.usage,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Health & Models
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Check mana-llm health and provider status. */
|
||||
async health(): Promise<HealthStatus> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: createTimeoutSignal(5_000),
|
||||
});
|
||||
if (!response.ok) {
|
||||
return { status: 'unhealthy', providers: {} };
|
||||
}
|
||||
return (await response.json()) as HealthStatus;
|
||||
} catch {
|
||||
return { status: 'unhealthy', providers: {} };
|
||||
}
|
||||
}
|
||||
|
||||
/** List available models from all providers. */
|
||||
async listModels(): Promise<ModelInfo[]> {
|
||||
const response = await fetch(`${this.baseUrl}/v1/models`, {
|
||||
signal: createTimeoutSignal(10_000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`mana-llm models error ${response.status}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as { data: ModelInfo[] };
|
||||
return data.data ?? [];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Private helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
private buildMessages(prompt: string, systemPrompt?: string): ChatMessage[] {
|
||||
const messages: ChatMessage[] = [];
|
||||
if (systemPrompt) {
|
||||
messages.push({ role: 'system', content: systemPrompt });
|
||||
}
|
||||
messages.push({ role: 'user', content: prompt });
|
||||
return messages;
|
||||
}
|
||||
|
||||
private buildVisionMessages(
|
||||
prompt: string,
|
||||
imageBase64: string,
|
||||
mimeType?: string,
|
||||
systemPrompt?: string
|
||||
): ChatMessage[] {
|
||||
const mime = mimeType ?? 'image/jpeg';
|
||||
const dataUrl = imageBase64.startsWith('data:')
|
||||
? imageBase64
|
||||
: `data:${mime};base64,${imageBase64}`;
|
||||
|
||||
const messages: ChatMessage[] = [];
|
||||
if (systemPrompt) {
|
||||
messages.push({ role: 'system', content: systemPrompt });
|
||||
}
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: prompt },
|
||||
{ type: 'image_url', image_url: { url: dataUrl } },
|
||||
],
|
||||
});
|
||||
return messages;
|
||||
}
|
||||
|
||||
private buildRequest(
|
||||
messages: ChatMessage[],
|
||||
opts: ChatOptions | undefined,
|
||||
stream: boolean
|
||||
): ChatCompletionRequest {
|
||||
const request: ChatCompletionRequest = {
|
||||
model: opts?.model ?? this.options.defaultModel,
|
||||
messages,
|
||||
stream,
|
||||
};
|
||||
|
||||
if (opts?.temperature !== undefined) request.temperature = opts.temperature;
|
||||
if (opts?.maxTokens !== undefined) request.max_tokens = opts.maxTokens;
|
||||
|
||||
return request;
|
||||
}
|
||||
|
||||
private async fetchCompletion(
|
||||
body: ChatCompletionRequest,
|
||||
timeoutOverride?: number
|
||||
): Promise<ChatCompletionResponse> {
|
||||
const timeout = timeoutOverride ?? this.options.timeout;
|
||||
|
||||
const response = await retryFetch(
|
||||
`${this.baseUrl}/v1/chat/completions`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
signal: createTimeoutSignal(timeout),
|
||||
},
|
||||
{ maxRetries: this.options.maxRetries }
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text().catch(() => '');
|
||||
throw new Error(`mana-llm error ${response.status}: ${text}`);
|
||||
}
|
||||
|
||||
return (await response.json()) as ChatCompletionResponse;
|
||||
}
|
||||
|
||||
private emitMetrics(metrics: LlmRequestMetrics): void {
|
||||
if (this.options.onMetrics) {
|
||||
try {
|
||||
this.options.onMetrics(metrics);
|
||||
} catch {
|
||||
// Never let metrics callback break the request
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1 +0,0 @@
|
|||
export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
import { DynamicModule, Module, Global, Provider } from '@nestjs/common';
|
||||
import type {
|
||||
LlmModuleOptions,
|
||||
LlmModuleAsyncOptions,
|
||||
LlmOptionsFactory,
|
||||
} from './interfaces/llm-options.interface';
|
||||
import { LlmClientService } from './llm-client.service';
|
||||
import { LLM_MODULE_OPTIONS } from './llm.constants';
|
||||
|
||||
@Global()
|
||||
@Module({})
|
||||
export class LlmModule {
|
||||
static forRoot(options: LlmModuleOptions): DynamicModule {
|
||||
return {
|
||||
module: LlmModule,
|
||||
providers: [
|
||||
{
|
||||
provide: LLM_MODULE_OPTIONS,
|
||||
useValue: options,
|
||||
},
|
||||
LlmClientService,
|
||||
],
|
||||
exports: [LLM_MODULE_OPTIONS, LlmClientService],
|
||||
};
|
||||
}
|
||||
|
||||
static forRootAsync(options: LlmModuleAsyncOptions): DynamicModule {
|
||||
const asyncProviders = this.createAsyncProviders(options);
|
||||
|
||||
return {
|
||||
module: LlmModule,
|
||||
imports: options.imports || [],
|
||||
providers: [...asyncProviders, LlmClientService],
|
||||
exports: [LLM_MODULE_OPTIONS, LlmClientService],
|
||||
};
|
||||
}
|
||||
|
||||
private static createAsyncProviders(options: LlmModuleAsyncOptions): Provider[] {
|
||||
if (options.useFactory) {
|
||||
return [
|
||||
{
|
||||
provide: LLM_MODULE_OPTIONS,
|
||||
useFactory: options.useFactory,
|
||||
inject: options.inject || [],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const useClass = options.useClass;
|
||||
const useExisting = options.useExisting;
|
||||
|
||||
if (useClass) {
|
||||
return [
|
||||
{
|
||||
provide: LLM_MODULE_OPTIONS,
|
||||
useFactory: async (optionsFactory: LlmOptionsFactory) =>
|
||||
await optionsFactory.createLlmOptions(),
|
||||
inject: [useClass],
|
||||
},
|
||||
{
|
||||
provide: useClass,
|
||||
useClass,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
if (useExisting) {
|
||||
return [
|
||||
{
|
||||
provide: LLM_MODULE_OPTIONS,
|
||||
useFactory: async (optionsFactory: LlmOptionsFactory) =>
|
||||
await optionsFactory.createLlmOptions(),
|
||||
inject: [useExisting],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
}
|
||||
258
packages/shared-llm/src/orchestrator.ts
Normal file
258
packages/shared-llm/src/orchestrator.ts
Normal file
|
|
@ -0,0 +1,258 @@
|
|||
/**
|
||||
* LlmOrchestrator — routes LlmTasks across the four privacy tiers
|
||||
* (none / browser / mana-server / cloud) according to the user's
|
||||
* settings, the task's minimum tier, and the input's content class.
|
||||
*
|
||||
* Routing rules — applied in this exact order:
|
||||
*
|
||||
* 1. If the task's minTier is above the user's HIGHEST allowed tier,
|
||||
* we cannot run the LLM path at all. Try runRules() if defined,
|
||||
* else throw TierTooLowError.
|
||||
*
|
||||
* 2. If contentClass is 'sensitive', strip 'mana-server' and 'cloud'
|
||||
* from the candidate tier list — sensitive content NEVER leaves
|
||||
* the device, even if the user has these tiers enabled globally.
|
||||
* This is the privacy backstop the user can't accidentally
|
||||
* override task-by-task.
|
||||
*
|
||||
* 3. If a per-task override exists in settings.taskOverrides, use it
|
||||
* verbatim (still subject to rule 2 — task overrides cannot
|
||||
* bypass the sensitive-content backstop).
|
||||
*
|
||||
* 4. Otherwise, pick the FIRST tier from settings.allowedTiers that
|
||||
* (a) is in the candidate set after rules 1+2, (b) has an
|
||||
* available + ready backend, (c) the cloud-consent gate is
|
||||
* satisfied if it's the cloud tier.
|
||||
*
|
||||
* 5. Run the task on the chosen backend.
|
||||
*
|
||||
* 6. If the run throws and settings.fallbackToRulesOnError is true
|
||||
* and the task has a runRules() implementation, fall back to
|
||||
* rules. We do NOT auto-fall to a different LLM tier on error —
|
||||
* the user explicitly chose this tier and silently switching
|
||||
* providers would be a privacy/trust break.
|
||||
*
|
||||
* 7. If everything fails, throw NoTierAvailableError. UI catches it
|
||||
* and offers a "retry" / "switch tier" / "enter manually" prompt.
|
||||
*/
|
||||
|
||||
import {
|
||||
BackendUnreachableError,
|
||||
NoTierAvailableError,
|
||||
ProviderBlockedError,
|
||||
TierTooLowError,
|
||||
} from './errors';
|
||||
import type { LlmTask } from './task';
|
||||
import type { LlmTier } from './tiers';
|
||||
import { TIER_RANK } from './tiers';
|
||||
import type { LlmBackend, LlmSettings, LlmTaskRequest, LlmTaskResult } from './types';
|
||||
|
||||
export interface LlmOrchestratorOptions {
|
||||
settings: LlmSettings;
|
||||
backends: LlmBackend[];
|
||||
}
|
||||
|
||||
export class LlmOrchestrator {
|
||||
private settings: LlmSettings;
|
||||
private backendsByTier: Map<LlmTier, LlmBackend>;
|
||||
|
||||
constructor(opts: LlmOrchestratorOptions) {
|
||||
this.settings = opts.settings;
|
||||
this.backendsByTier = new Map();
|
||||
for (const b of opts.backends) {
|
||||
this.backendsByTier.set(b.tier, b);
|
||||
}
|
||||
}
|
||||
|
||||
/** Replace the settings object — call this when the user updates
|
||||
* their preferences in the settings UI. */
|
||||
updateSettings(settings: LlmSettings): void {
|
||||
this.settings = settings;
|
||||
}
|
||||
|
||||
/** Public read-only view for UI components that want to react to
|
||||
* the current settings (e.g. the tier selector). */
|
||||
getSettings(): Readonly<LlmSettings> {
|
||||
return this.settings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can the user (with their current settings) run this task at all?
|
||||
* The UI uses this to decide whether to show a feature button as
|
||||
* enabled / disabled / hidden. Does NOT check backend readiness —
|
||||
* that's a per-call concern. Just checks "is there any conceivable
|
||||
* tier in the user's allowedTiers that satisfies task.minTier and
|
||||
* is permitted for task.contentClass?".
|
||||
*/
|
||||
canRun<TIn, TOut>(task: LlmTask<TIn, TOut>): boolean {
|
||||
// Rules-only tasks always run if they have a fallback
|
||||
if (task.minTier === 'none') return true;
|
||||
if (task.runRules) return true;
|
||||
|
||||
const candidates = this.candidateTiers(task);
|
||||
return candidates.some((t) => {
|
||||
const backend = this.backendsByTier.get(t);
|
||||
return backend?.isAvailable() ?? false;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the task. Honors the routing rules above. The returned
|
||||
* LlmTaskResult includes which tier actually ran, plus a trail
|
||||
* of tiers that were attempted and skipped before it.
|
||||
*/
|
||||
async run<TIn, TOut>(task: LlmTask<TIn, TOut>, input: TIn): Promise<LlmTaskResult<TOut>> {
|
||||
const start = performance.now();
|
||||
const attempted: LlmTier[] = [];
|
||||
|
||||
// Rule 1: tier-too-low check
|
||||
const userMaxTier = this.userMaxTier();
|
||||
if (TIER_RANK[task.minTier] > TIER_RANK[userMaxTier]) {
|
||||
if (task.runRules) {
|
||||
const value = await task.runRules(input);
|
||||
return {
|
||||
value,
|
||||
source: 'none',
|
||||
latencyMs: Math.round(performance.now() - start),
|
||||
attempted: ['none'],
|
||||
};
|
||||
}
|
||||
throw new TierTooLowError(task.name, task.minTier, userMaxTier);
|
||||
}
|
||||
|
||||
// Rules-2-3: candidate tier list and per-task override
|
||||
const candidates = this.candidateTiers(task);
|
||||
const override = this.settings.taskOverrides[task.name];
|
||||
const orderedTiers = override ? [override].filter((t) => candidates.includes(t)) : candidates;
|
||||
|
||||
// Rule 4-5: try the first runnable tier
|
||||
for (const tier of orderedTiers) {
|
||||
if (tier === 'none') {
|
||||
if (task.runRules) {
|
||||
const value = await task.runRules(input);
|
||||
return {
|
||||
value,
|
||||
source: 'none',
|
||||
latencyMs: Math.round(performance.now() - start),
|
||||
attempted: [...attempted, 'none'],
|
||||
};
|
||||
}
|
||||
attempted.push('none');
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cloud-consent gate
|
||||
if (tier === 'cloud' && !this.settings.cloudConsentGiven) {
|
||||
attempted.push('cloud');
|
||||
continue;
|
||||
}
|
||||
|
||||
const backend = this.backendsByTier.get(tier);
|
||||
if (!backend) {
|
||||
attempted.push(tier);
|
||||
continue;
|
||||
}
|
||||
if (!backend.isAvailable()) {
|
||||
attempted.push(tier);
|
||||
continue;
|
||||
}
|
||||
const ready = await backend.isReady();
|
||||
if (!ready) {
|
||||
attempted.push(tier);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const request = this.buildRequest(task, input);
|
||||
const generated = await task.runLlm(input, backend);
|
||||
return {
|
||||
value: generated,
|
||||
source: tier,
|
||||
latencyMs: Math.round(performance.now() - start),
|
||||
attempted: [...attempted, tier],
|
||||
};
|
||||
// `request` is intentionally unused — the task constructs
|
||||
// its own LlmTaskRequest internally via runLlm. We build
|
||||
// it here only as a future hook for telemetry.
|
||||
void request;
|
||||
} catch (err) {
|
||||
attempted.push(tier);
|
||||
// Rule 6: rules-fallback on error
|
||||
if (
|
||||
this.settings.fallbackToRulesOnError &&
|
||||
task.runRules &&
|
||||
!(err instanceof ProviderBlockedError)
|
||||
) {
|
||||
// Provider-blocked errors should NOT silently fall to
|
||||
// rules — they should bubble up so the UI can offer
|
||||
// "retry" / "switch tier" prompts. Other errors
|
||||
// (network failure, OOM, model not loaded) get the
|
||||
// silent rules fallback.
|
||||
try {
|
||||
const value = await task.runRules(input);
|
||||
return {
|
||||
value,
|
||||
source: 'none',
|
||||
latencyMs: Math.round(performance.now() - start),
|
||||
attempted: [...attempted, 'none'],
|
||||
};
|
||||
} catch {
|
||||
// rules fallback also failed — re-throw original
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
// Re-throw provider blocks and unrecoverable errors
|
||||
if (err instanceof ProviderBlockedError || err instanceof BackendUnreachableError) {
|
||||
throw err;
|
||||
}
|
||||
// Unknown error — try the next tier in the list
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
throw new NoTierAvailableError(task.name, attempted);
|
||||
}
|
||||
|
||||
/** Highest tier in the user's allowedTiers list (by rank). */
|
||||
private userMaxTier(): LlmTier {
|
||||
if (this.settings.allowedTiers.length === 0) return 'none';
|
||||
return this.settings.allowedTiers.reduce(
|
||||
(max, t) => (TIER_RANK[t] > TIER_RANK[max] ? t : max),
|
||||
'none' as LlmTier
|
||||
);
|
||||
}
|
||||
|
||||
/** Candidate tier list after applying rules 1 + 2.
|
||||
* - Rule 1: only tiers >= task.minTier
|
||||
* - Rule 2: sensitive content excludes mana-server + cloud
|
||||
* Also always includes 'none' at the end if the task has runRules. */
|
||||
private candidateTiers<TIn, TOut>(task: LlmTask<TIn, TOut>): LlmTier[] {
|
||||
// Start from the user's allowed tiers, in their preference order
|
||||
let tiers = this.settings.allowedTiers.filter((t) => TIER_RANK[t] >= TIER_RANK[task.minTier]);
|
||||
|
||||
// Rule 2: sensitive content backstop
|
||||
if (task.contentClass === 'sensitive') {
|
||||
tiers = tiers.filter((t) => t === 'browser');
|
||||
}
|
||||
|
||||
// 'none' is always tail-appended if the task has a rules implementation,
|
||||
// so the for-loop in run() naturally falls through to it.
|
||||
if (task.runRules && !tiers.includes('none')) {
|
||||
tiers.push('none');
|
||||
}
|
||||
return tiers;
|
||||
}
|
||||
|
||||
private buildRequest<TIn, TOut>(task: LlmTask<TIn, TOut>, _input: TIn): LlmTaskRequest {
|
||||
// Right now this is a placeholder — tasks build their own
|
||||
// LlmTaskRequest inside runLlm. Once we add token-counting
|
||||
// telemetry we'll move that construction up here so the
|
||||
// orchestrator can prepend the task metadata uniformly.
|
||||
return {
|
||||
taskName: task.name,
|
||||
contentClass: task.contentClass,
|
||||
requires: task.requires,
|
||||
messages: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
/**
|
||||
* Standalone exports for non-NestJS consumers (e.g. bot-services).
|
||||
*
|
||||
* Usage:
|
||||
* import { LlmClient } from '@mana/shared-llm/standalone';
|
||||
* const llm = new LlmClient({ manaLlmUrl: 'http://localhost:3025' });
|
||||
*/
|
||||
|
||||
export { LlmClient } from './llm-client';
|
||||
export { resolveOptions } from './interfaces/llm-options.interface';
|
||||
export type { LlmModuleOptions, ResolvedLlmOptions } from './interfaces/llm-options.interface';
|
||||
|
||||
// Types
|
||||
export type {
|
||||
ChatMessage,
|
||||
ContentPart,
|
||||
TextContentPart,
|
||||
ImageContentPart,
|
||||
ChatOptions,
|
||||
JsonOptions,
|
||||
VisionOptions,
|
||||
TokenUsage,
|
||||
ChatResult,
|
||||
JsonResult,
|
||||
ModelInfo,
|
||||
HealthStatus,
|
||||
} from './types';
|
||||
|
||||
// Utilities
|
||||
export { extractJson } from './utils';
|
||||
107
packages/shared-llm/src/store.svelte.ts
Normal file
107
packages/shared-llm/src/store.svelte.ts
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
/**
|
||||
* Svelte 5 reactive store for the LLM orchestrator.
|
||||
*
|
||||
* Lives at module-scope as a singleton because there is exactly one
|
||||
* orchestrator + settings per page session. Settings are persisted to
|
||||
* localStorage for now (Phase 1) — Phase 2 will move them into the
|
||||
* encrypted IndexedDB settings table once that exists.
|
||||
*
|
||||
* Usage in a Svelte 5 component:
|
||||
*
|
||||
* import { llmOrchestrator, llmSettingsState, useTaskAvailability } from '@mana/shared-llm';
|
||||
* import { extractDateTask } from '$lib/llm-tasks/extract-date';
|
||||
*
|
||||
* const available = useTaskAvailability(extractDateTask);
|
||||
* // ... reactively true/false based on settings + backend readiness
|
||||
*
|
||||
* {#if available.current}
|
||||
* <button onclick={() => orchestrator.run(extractDateTask, text)}>...</button>
|
||||
* {/if}
|
||||
*/
|
||||
|
||||
import { BrowserBackend } from './backends/browser';
|
||||
import { CloudBackend } from './backends/cloud';
|
||||
import { ManaServerBackend } from './backends/mana-server';
|
||||
import { LlmOrchestrator } from './orchestrator';
|
||||
import type { LlmTask } from './task';
|
||||
import { DEFAULT_LLM_SETTINGS, type LlmSettings } from './types';
|
||||
|
||||
const STORAGE_KEY = 'mana.llm.settings.v1';
|
||||
|
||||
/** Load persisted settings, falling back to defaults on first run or
|
||||
* any parse error. localStorage is fine for Phase 1 — small payload,
|
||||
* not encrypted-sensitive (the user's tier preference is hardly
|
||||
* secret), and trivial to migrate to IndexedDB later. */
|
||||
function loadSettings(): LlmSettings {
|
||||
if (typeof localStorage === 'undefined') return { ...DEFAULT_LLM_SETTINGS };
|
||||
try {
|
||||
const raw = localStorage.getItem(STORAGE_KEY);
|
||||
if (!raw) return { ...DEFAULT_LLM_SETTINGS };
|
||||
const parsed = JSON.parse(raw) as Partial<LlmSettings>;
|
||||
return { ...DEFAULT_LLM_SETTINGS, ...parsed };
|
||||
} catch {
|
||||
return { ...DEFAULT_LLM_SETTINGS };
|
||||
}
|
||||
}
|
||||
|
||||
function persistSettings(settings: LlmSettings): void {
|
||||
if (typeof localStorage === 'undefined') return;
|
||||
try {
|
||||
localStorage.setItem(STORAGE_KEY, JSON.stringify(settings));
|
||||
} catch {
|
||||
// Quota exceeded or storage disabled — non-fatal, settings just
|
||||
// won't persist across sessions.
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Reactive state ──────────────────────────────────────────────
|
||||
|
||||
let _settings = $state<LlmSettings>(loadSettings());
|
||||
|
||||
// Backends are constructed once per page session. They're stateless
|
||||
// (or hold their own internal state in the case of BrowserBackend
|
||||
// pointing at @mana/local-llm's singleton), so a fresh instance per
|
||||
// orchestrator is fine.
|
||||
const backends = [new BrowserBackend(), new ManaServerBackend(), new CloudBackend()];
|
||||
|
||||
export const llmOrchestrator = new LlmOrchestrator({
|
||||
settings: _settings,
|
||||
backends,
|
||||
});
|
||||
|
||||
/** Reactive accessor for the current settings. UI components read
|
||||
* via `llmSettingsState.current` to get a $state-tracked snapshot. */
|
||||
export const llmSettingsState = {
|
||||
get current(): LlmSettings {
|
||||
return _settings;
|
||||
},
|
||||
};
|
||||
|
||||
/** Update settings (or part of them). Persists to localStorage and
|
||||
* pushes the new value into the orchestrator. */
|
||||
export function updateLlmSettings(patch: Partial<LlmSettings>): void {
|
||||
_settings = { ..._settings, ...patch };
|
||||
persistSettings(_settings);
|
||||
llmOrchestrator.updateSettings(_settings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Svelte 5 reactive hook: returns `{ current: boolean }` indicating
|
||||
* whether the given task can run with the user's current settings.
|
||||
* Reactive against `llmSettingsState` so the UI re-renders when the
|
||||
* user toggles a tier in the settings page.
|
||||
*
|
||||
* Use this to gate feature buttons — show them as enabled when the
|
||||
* task is runnable, disabled (with a tooltip) when not.
|
||||
*/
|
||||
export function useTaskAvailability<TIn, TOut>(
|
||||
task: LlmTask<TIn, TOut>
|
||||
): { readonly current: boolean } {
|
||||
return {
|
||||
get current() {
|
||||
// Reading _settings here registers the reactive dependency
|
||||
void _settings;
|
||||
return llmOrchestrator.canRun(task);
|
||||
},
|
||||
};
|
||||
}
|
||||
82
packages/shared-llm/src/task.ts
Normal file
82
packages/shared-llm/src/task.ts
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* The LlmTask contract — the unit of work modules describe to the
|
||||
* orchestrator. Tasks bundle:
|
||||
*
|
||||
* 1. The LLM-side implementation (used for browser/server/cloud tiers)
|
||||
* 2. An optional rules-tier fallback (used when the LLM tier is
|
||||
* unavailable, fails, or the user has opted out of all LLM tiers)
|
||||
* 3. Routing metadata (minimum tier, content class, capability needs)
|
||||
*
|
||||
* Tasks live next to the modules that use them — there is intentionally
|
||||
* no central task registry. The convention is:
|
||||
*
|
||||
* apps/mana/apps/web/src/lib/llm-tasks/ ← cross-module helpers
|
||||
* apps/mana/apps/web/src/lib/modules/notes/llm-tasks/ ← notes-specific
|
||||
*
|
||||
* The orchestrator never imports tasks directly — modules import tasks
|
||||
* AND the orchestrator and call `orchestrator.run(task, input)`.
|
||||
*/
|
||||
|
||||
import type { LlmTier } from './tiers';
|
||||
import type { ContentClass, CapabilityRequirements, LlmBackend, LlmTaskRequest } from './types';
|
||||
|
||||
export interface LlmTask<TInput, TOutput> {
|
||||
/**
|
||||
* Stable identifier for this task. Used for telemetry, per-task
|
||||
* tier overrides in user settings, and debug logs. Convention is
|
||||
* `{module}.{action}` — e.g. `notes.extractTags`, `todo.parseQuickAdd`.
|
||||
*/
|
||||
readonly name: string;
|
||||
|
||||
/** Lowest tier this task can produce a useful result on. */
|
||||
readonly minTier: LlmTier;
|
||||
|
||||
/** Privacy class of inputs this task handles. */
|
||||
readonly contentClass: ContentClass;
|
||||
|
||||
/** Capability requirements that exclude tiers/backends that can't satisfy them. */
|
||||
readonly requires?: CapabilityRequirements;
|
||||
|
||||
/**
|
||||
* User-facing label, shown when telling the user "this task needs
|
||||
* AI" or "this result was computed via tier X".
|
||||
*/
|
||||
readonly displayLabel: string;
|
||||
|
||||
/**
|
||||
* The LLM-based implementation. Builds an LlmTaskRequest from the
|
||||
* task input and asks the backend to run it, then maps the
|
||||
* generated text back into the typed TOutput shape (e.g. parses
|
||||
* JSON, validates a date, looks up a tag).
|
||||
*/
|
||||
runLlm(input: TInput, backend: LlmBackend): Promise<TOutput>;
|
||||
|
||||
/**
|
||||
* Optional deterministic fallback — runs when no LLM tier is
|
||||
* available, or when the LLM tier failed and
|
||||
* `fallbackToRulesOnError` is enabled in user settings.
|
||||
*
|
||||
* Returning the typed TOutput indicates success. Throwing means
|
||||
* the rules implementation also can't handle this input — the
|
||||
* orchestrator will then surface a NoTierAvailableError so the
|
||||
* UI can ask the user for direct input.
|
||||
*/
|
||||
runRules?(input: TInput): Promise<TOutput>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper for tasks that need to construct an LlmTaskRequest from their
|
||||
* own input. Centralizes the boilerplate so individual tasks don't have
|
||||
* to redeclare taskName / contentClass / requires every time.
|
||||
*/
|
||||
export function buildTaskRequest<TInput, TOutput>(
|
||||
task: LlmTask<TInput, TOutput>,
|
||||
overrides: Omit<LlmTaskRequest, 'taskName' | 'contentClass' | 'requires'>
|
||||
): LlmTaskRequest {
|
||||
return {
|
||||
...overrides,
|
||||
taskName: task.name,
|
||||
contentClass: task.contentClass,
|
||||
requires: task.requires,
|
||||
};
|
||||
}
|
||||
50
packages/shared-llm/src/tiers.ts
Normal file
50
packages/shared-llm/src/tiers.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Tier definitions for the Mana LLM orchestrator.
|
||||
*
|
||||
* Four tiers, ordered from most-private to least-private:
|
||||
*
|
||||
* none — Deterministic parsers / heuristics. No LLM at all.
|
||||
* Always available. Zero cost. Quality varies by task.
|
||||
*
|
||||
* browser — Gemma 4 E2B running in the user's browser via WebGPU
|
||||
* (@mana/local-llm). 100% on-device. Requires the
|
||||
* ~500 MB model to be downloaded once and ~2 GB VRAM.
|
||||
*
|
||||
* mana-server — services/mana-llm + Ollama on our own infrastructure
|
||||
* (currently the Mac Mini, gemma3:4b by default).
|
||||
* Data leaves the device but stays in our control.
|
||||
*
|
||||
* cloud — services/mana-llm proxied to a third-party provider
|
||||
* (Google Gemini, configured via google_api_key in the
|
||||
* mana-llm service env). Data goes to the third party.
|
||||
*
|
||||
* The numeric rank is used by the orchestrator to compare a user's
|
||||
* preferred tier against a task's minimum tier ("can the user even
|
||||
* run this task?") and is the canonical sort order for the privacy
|
||||
* gradient.
|
||||
*/
|
||||
|
||||
export type LlmTier = 'none' | 'browser' | 'mana-server' | 'cloud';
|
||||
|
||||
export const TIER_RANK: Record<LlmTier, number> = {
|
||||
none: 0,
|
||||
browser: 1,
|
||||
'mana-server': 2,
|
||||
cloud: 3,
|
||||
};
|
||||
|
||||
export const ALL_TIERS: readonly LlmTier[] = ['none', 'browser', 'mana-server', 'cloud'];
|
||||
|
||||
/** Human-readable label, kept here so backends/UI agree on naming. */
|
||||
export function tierLabel(tier: LlmTier): string {
|
||||
switch (tier) {
|
||||
case 'none':
|
||||
return 'Lokal (ohne KI)';
|
||||
case 'browser':
|
||||
return 'Auf deinem Gerät';
|
||||
case 'mana-server':
|
||||
return 'Mana-Server';
|
||||
case 'cloud':
|
||||
return 'Google Gemini';
|
||||
}
|
||||
}
|
||||
150
packages/shared-llm/src/types.ts
Normal file
150
packages/shared-llm/src/types.ts
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
/**
|
||||
* Shared types for the Mana LLM orchestrator.
|
||||
*
|
||||
* These deliberately mirror the surface of @mana/local-llm so that the
|
||||
* browser tier can pass them straight through, but they are intentionally
|
||||
* a SUPERSET (with task name, content class, capability requirements,
|
||||
* rule fallback) so the orchestrator can route intelligently.
|
||||
*/
|
||||
|
||||
import type { LlmTier } from './tiers';
|
||||
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
export interface GenerateOptions {
|
||||
messages: ChatMessage[];
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
/** Optional streaming callback — called once per emitted token chunk */
|
||||
onToken?: (token: string) => void;
|
||||
}
|
||||
|
||||
export interface GenerateResult {
|
||||
content: string;
|
||||
usage?: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
};
|
||||
latencyMs: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* The privacy class of the input being processed. The orchestrator uses
|
||||
* this to ENFORCE that sensitive content never leaves the device, even
|
||||
* if the user has globally allowed cloud tiers.
|
||||
*
|
||||
* public — already public-domain content (e.g. an open URL the user
|
||||
* wants summarized). Anything is fair game.
|
||||
* personal — the user's own content but routine (a calendar event, a
|
||||
* todo title). Default for most module tasks. Allowed on
|
||||
* any tier the user has enabled.
|
||||
* sensitive — explicitly private content (notes flagged sensitive,
|
||||
* diary entries, dreams, financial data). The orchestrator
|
||||
* restricts these to {none, browser} regardless of user's
|
||||
* global settings — the user has to explicitly opt out of
|
||||
* this protection per-task to send sensitive content to
|
||||
* server/cloud tiers.
|
||||
*/
|
||||
export type ContentClass = 'public' | 'personal' | 'sensitive';
|
||||
|
||||
export interface CapabilityRequirements {
|
||||
/** Task needs to receive structured JSON in response */
|
||||
json?: boolean;
|
||||
/** Task needs at least this many context tokens (input + output) */
|
||||
minContextTokens?: number;
|
||||
/** Task needs streaming support (per-token onToken callbacks) */
|
||||
streaming?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* The high-level "I want to do X" descriptor that flows from a module
|
||||
* to the orchestrator. Concrete LlmTask implementations build these
|
||||
* internally before delegating to the orchestrator.
|
||||
*/
|
||||
export interface LlmTaskRequest extends GenerateOptions {
|
||||
/** Stable name for analytics + per-task overrides — e.g. "notes.extractTags" */
|
||||
taskName: string;
|
||||
contentClass: ContentClass;
|
||||
requires?: CapabilityRequirements;
|
||||
}
|
||||
|
||||
/**
|
||||
* The result of running a task through the orchestrator. Carries the
|
||||
* tier that actually executed (which may differ from the user's
|
||||
* preferred tier if a fallback kicked in) and the trail of tiers
|
||||
* that were tried first — useful for telemetry and for debugging
|
||||
* "why did this task end up running on tier X?".
|
||||
*/
|
||||
export interface LlmTaskResult<T = string> {
|
||||
value: T;
|
||||
source: LlmTier;
|
||||
latencyMs: number;
|
||||
/** Tiers that were attempted before `source` succeeded */
|
||||
attempted: LlmTier[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Backend interface that the orchestrator talks to. The "none" tier
|
||||
* does NOT implement this — rule-based fallbacks live on each
|
||||
* concrete LlmTask, not on a backend object.
|
||||
*/
|
||||
export interface LlmBackend {
|
||||
readonly tier: Exclude<LlmTier, 'none'>;
|
||||
|
||||
/** Could this backend run AT ALL given the current environment?
|
||||
* e.g. browser tier checks for WebGPU + user-enabled, server tier
|
||||
* checks for a configured base URL. */
|
||||
isAvailable(): boolean;
|
||||
|
||||
/** Could this backend run RIGHT NOW? e.g. browser tier checks if
|
||||
* the model is loaded into VRAM. May return false even when
|
||||
* isAvailable() is true (model still downloading, server in
|
||||
* startup, …). */
|
||||
isReady(): boolean | Promise<boolean>;
|
||||
|
||||
/** Run a task. The backend is responsible for actually performing
|
||||
* the inference and returning the result; it does NOT decide
|
||||
* whether it SHOULD run (the orchestrator did that). */
|
||||
generate(req: LlmTaskRequest): Promise<GenerateResult>;
|
||||
}
|
||||
|
||||
/**
|
||||
* The mutable user preferences that drive routing.
|
||||
*/
|
||||
export interface LlmSettings {
|
||||
/** Tiers the orchestrator is allowed to use, in preference order.
|
||||
* An empty array means "no AI at all" — only Tier 0 (rules) runs. */
|
||||
allowedTiers: LlmTier[];
|
||||
|
||||
/** Per-task overrides — keyed by task name, value is the tier to
|
||||
* use for that task specifically (overrides allowedTiers order). */
|
||||
taskOverrides: Record<string, LlmTier>;
|
||||
|
||||
/** When the user-chosen tier fails to run a task, fall back to
|
||||
* the rules tier (if the task has a runT0 implementation).
|
||||
* When false, failures surface as errors instead. */
|
||||
fallbackToRulesOnError: boolean;
|
||||
|
||||
/** Show a small "via Edge / via Server / via Gemini" badge under
|
||||
* every LLM result. Default true — helps the user understand
|
||||
* where their data went. */
|
||||
showSourceInUi: boolean;
|
||||
|
||||
/** First-time consent for the cloud tier. Until this is true, the
|
||||
* cloud tier is treated as unavailable even if it's in
|
||||
* allowedTiers. The user must explicitly tick a "yes I understand
|
||||
* Google sees my data" checkbox once. */
|
||||
cloudConsentGiven: boolean;
|
||||
}
|
||||
|
||||
export const DEFAULT_LLM_SETTINGS: LlmSettings = {
|
||||
allowedTiers: [], // ZERO opt-in by default — every user starts in Tier 0 only
|
||||
taskOverrides: {},
|
||||
fallbackToRulesOnError: true,
|
||||
showSourceInUi: true,
|
||||
cloudConsentGiven: false,
|
||||
};
|
||||
|
|
@ -1,100 +0,0 @@
|
|||
/**
|
||||
* Core chat types for the LLM client.
|
||||
* These are the high-level types that consumers interact with.
|
||||
*/
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Messages
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface TextContentPart {
|
||||
type: 'text';
|
||||
text: string;
|
||||
}
|
||||
|
||||
export interface ImageContentPart {
|
||||
type: 'image_url';
|
||||
image_url: { url: string };
|
||||
}
|
||||
|
||||
export type ContentPart = TextContentPart | ImageContentPart;
|
||||
|
||||
export interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string | ContentPart[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Options
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ChatOptions {
|
||||
/** Model to use (default from module config, e.g. "ollama/gemma3:4b") */
|
||||
model?: string;
|
||||
/** Sampling temperature 0.0-2.0 */
|
||||
temperature?: number;
|
||||
/** Max tokens to generate */
|
||||
maxTokens?: number;
|
||||
/** System prompt prepended to messages */
|
||||
systemPrompt?: string;
|
||||
/** Request timeout in ms (overrides module default) */
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export interface JsonOptions<T = unknown> extends ChatOptions {
|
||||
/** Validation function applied to parsed JSON. Should throw on invalid data. */
|
||||
validate?: (data: unknown) => T;
|
||||
/** Number of extraction retries on parse failure (default: 1) */
|
||||
jsonRetries?: number;
|
||||
}
|
||||
|
||||
export interface VisionOptions extends ChatOptions {
|
||||
/** Vision model override (default from module config, e.g. "ollama/llava:7b") */
|
||||
visionModel?: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Results
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface TokenUsage {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
}
|
||||
|
||||
export interface ChatResult {
|
||||
/** Generated text content */
|
||||
content: string;
|
||||
/** Model that was actually used */
|
||||
model: string;
|
||||
/** Token usage statistics */
|
||||
usage: TokenUsage;
|
||||
/** Request latency in milliseconds */
|
||||
latencyMs: number;
|
||||
}
|
||||
|
||||
export interface JsonResult<T = unknown> extends ChatResult {
|
||||
/** Parsed and optionally validated data */
|
||||
data: T;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Models
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ModelInfo {
|
||||
id: string;
|
||||
object: 'model';
|
||||
created: number;
|
||||
owned_by: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Health
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface HealthStatus {
|
||||
status: 'healthy' | 'degraded' | 'unhealthy';
|
||||
providers: Record<string, unknown>;
|
||||
}
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
export type {
|
||||
ChatMessage,
|
||||
ContentPart,
|
||||
TextContentPart,
|
||||
ImageContentPart,
|
||||
ChatOptions,
|
||||
JsonOptions,
|
||||
VisionOptions,
|
||||
TokenUsage,
|
||||
ChatResult,
|
||||
JsonResult,
|
||||
ModelInfo,
|
||||
HealthStatus,
|
||||
} from './chat.types';
|
||||
|
||||
export type {
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionChoice,
|
||||
ChatCompletionStreamChunk,
|
||||
StreamChoice,
|
||||
EmbeddingRequest,
|
||||
EmbeddingResponse,
|
||||
EmbeddingData,
|
||||
ModelsListResponse,
|
||||
} from './openai-compat.types';
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
/**
|
||||
* OpenAI-compatible wire format types matching the mana-llm API contract.
|
||||
* These are internal types used for HTTP communication - consumers should
|
||||
* use the high-level types from chat.types.ts instead.
|
||||
*/
|
||||
|
||||
import type { ChatMessage, TokenUsage } from './chat.types';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Request (POST /v1/chat/completions)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ChatCompletionRequest {
|
||||
model: string;
|
||||
messages: ChatMessage[];
|
||||
stream?: boolean;
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
top_p?: number;
|
||||
frequency_penalty?: number;
|
||||
presence_penalty?: number;
|
||||
stop?: string | string[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Response (non-streaming)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ChatCompletionResponse {
|
||||
id: string;
|
||||
object: 'chat.completion';
|
||||
created: number;
|
||||
model: string;
|
||||
choices: ChatCompletionChoice[];
|
||||
usage: TokenUsage;
|
||||
}
|
||||
|
||||
export interface ChatCompletionChoice {
|
||||
index: number;
|
||||
message: { role: 'assistant'; content: string };
|
||||
finish_reason: 'stop' | 'length' | 'content_filter' | null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Response (streaming)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ChatCompletionStreamChunk {
|
||||
id: string;
|
||||
object: 'chat.completion.chunk';
|
||||
created: number;
|
||||
model: string;
|
||||
choices: StreamChoice[];
|
||||
}
|
||||
|
||||
export interface StreamChoice {
|
||||
index: number;
|
||||
delta: { role?: 'assistant'; content?: string };
|
||||
finish_reason: string | null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Embeddings
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface EmbeddingRequest {
|
||||
model: string;
|
||||
input: string | string[];
|
||||
encoding_format?: 'float' | 'base64';
|
||||
}
|
||||
|
||||
export interface EmbeddingResponse {
|
||||
object: 'list';
|
||||
data: EmbeddingData[];
|
||||
model: string;
|
||||
usage: TokenUsage;
|
||||
}
|
||||
|
||||
export interface EmbeddingData {
|
||||
object: 'embedding';
|
||||
index: number;
|
||||
embedding: number[];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Models (GET /v1/models)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ModelsListResponse {
|
||||
object: 'list';
|
||||
data: Array<{
|
||||
id: string;
|
||||
object: 'model';
|
||||
created: number;
|
||||
owned_by: string;
|
||||
}>;
|
||||
}
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
export { extractJson } from './json-extractor';
|
||||
export { retryFetch } from './retry';
|
||||
export type { RetryOptions } from './retry';
|
||||
export { LlmMetricsCollector } from './metrics';
|
||||
export type { LlmRequestMetrics, MetricsCallback } from './metrics';
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
/**
|
||||
* Extract and parse JSON from LLM responses.
|
||||
*
|
||||
* LLMs often wrap JSON in markdown code fences or include extra text.
|
||||
* This utility handles all common patterns:
|
||||
* 1. Direct JSON parse
|
||||
* 2. Markdown ```json ... ``` fences
|
||||
* 3. First { ... } or [ ... ] block in text
|
||||
*/
|
||||
export function extractJson<T = unknown>(text: string, validate?: (data: unknown) => T): T {
|
||||
const trimmed = text.trim();
|
||||
|
||||
// Step 1: Try direct parse
|
||||
const direct = tryParse<T>(trimmed, validate);
|
||||
if (direct !== undefined) return direct;
|
||||
|
||||
// Step 2: Strip markdown code fences
|
||||
const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (fenceMatch) {
|
||||
const fenced = tryParse<T>(fenceMatch[1].trim(), validate);
|
||||
if (fenced !== undefined) return fenced;
|
||||
}
|
||||
|
||||
// Step 3: Find first JSON object
|
||||
const objectStart = trimmed.indexOf('{');
|
||||
if (objectStart !== -1) {
|
||||
const objectStr = extractBalanced(trimmed, objectStart, '{', '}');
|
||||
if (objectStr) {
|
||||
const obj = tryParse<T>(objectStr, validate);
|
||||
if (obj !== undefined) return obj;
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Find first JSON array
|
||||
const arrayStart = trimmed.indexOf('[');
|
||||
if (arrayStart !== -1) {
|
||||
const arrayStr = extractBalanced(trimmed, arrayStart, '[', ']');
|
||||
if (arrayStr) {
|
||||
const arr = tryParse<T>(arrayStr, validate);
|
||||
if (arr !== undefined) return arr;
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Failed to extract JSON from LLM response: ${trimmed.slice(0, 200)}...`);
|
||||
}
|
||||
|
||||
function tryParse<T>(text: string, validate?: (data: unknown) => T): T | undefined {
|
||||
try {
|
||||
const parsed = JSON.parse(text);
|
||||
return validate ? validate(parsed) : parsed;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a balanced block starting from the given position.
|
||||
* Handles nested braces/brackets but not strings with escaped delimiters.
|
||||
*/
|
||||
function extractBalanced(text: string, start: number, open: string, close: string): string | null {
|
||||
let depth = 0;
|
||||
let inString = false;
|
||||
let escape = false;
|
||||
|
||||
for (let i = start; i < text.length; i++) {
|
||||
const ch = text[i];
|
||||
|
||||
if (escape) {
|
||||
escape = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '\\') {
|
||||
escape = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ch === '"') {
|
||||
inString = !inString;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (inString) continue;
|
||||
|
||||
if (ch === open) depth++;
|
||||
if (ch === close) depth--;
|
||||
|
||||
if (depth === 0) {
|
||||
return text.slice(start, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
|
@ -1,88 +0,0 @@
|
|||
/**
|
||||
* Request-level metrics for LLM calls.
|
||||
*
|
||||
* Provides an optional callback system that backends can hook into
|
||||
* for monitoring, logging, or forwarding to Prometheus/Grafana.
|
||||
*/
|
||||
|
||||
export interface LlmRequestMetrics {
|
||||
/** Model requested (e.g. "ollama/gemma3:4b") */
|
||||
model: string;
|
||||
/** Model actually used (may differ if fallback occurred) */
|
||||
actualModel: string;
|
||||
/** Request type */
|
||||
type: 'chat' | 'json' | 'vision' | 'visionJson' | 'embed' | 'stream';
|
||||
/** Total request duration in ms */
|
||||
latencyMs: number;
|
||||
/** Token usage */
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
/** Whether this request was a fallback (model differs from requested) */
|
||||
wasFallback: boolean;
|
||||
/** Whether the request succeeded */
|
||||
success: boolean;
|
||||
/** Error message if failed */
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export type MetricsCallback = (metrics: LlmRequestMetrics) => void;
|
||||
|
||||
/**
|
||||
* Simple in-memory metrics aggregator.
|
||||
* Useful for health endpoints and debugging.
|
||||
*/
|
||||
export class LlmMetricsCollector {
|
||||
private _totalRequests = 0;
|
||||
private _totalErrors = 0;
|
||||
private _totalFallbacks = 0;
|
||||
private _totalTokens = 0;
|
||||
private _totalLatencyMs = 0;
|
||||
private _byModel: Map<string, { requests: number; tokens: number; errors: number }> = new Map();
|
||||
|
||||
/** Use as MetricsCallback */
|
||||
readonly collect = (metrics: LlmRequestMetrics): void => {
|
||||
this._totalRequests++;
|
||||
this._totalLatencyMs += metrics.latencyMs;
|
||||
this._totalTokens += metrics.totalTokens;
|
||||
|
||||
if (!metrics.success) this._totalErrors++;
|
||||
if (metrics.wasFallback) this._totalFallbacks++;
|
||||
|
||||
const modelKey = metrics.actualModel;
|
||||
const existing = this._byModel.get(modelKey) ?? { requests: 0, tokens: 0, errors: 0 };
|
||||
existing.requests++;
|
||||
existing.tokens += metrics.totalTokens;
|
||||
if (!metrics.success) existing.errors++;
|
||||
this._byModel.set(modelKey, existing);
|
||||
};
|
||||
|
||||
/** Get summary stats for health endpoints / dashboards */
|
||||
getSummary() {
|
||||
return {
|
||||
totalRequests: this._totalRequests,
|
||||
totalErrors: this._totalErrors,
|
||||
totalFallbacks: this._totalFallbacks,
|
||||
totalTokens: this._totalTokens,
|
||||
averageLatencyMs:
|
||||
this._totalRequests > 0 ? Math.round(this._totalLatencyMs / this._totalRequests) : 0,
|
||||
fallbackRate:
|
||||
this._totalRequests > 0
|
||||
? Math.round((this._totalFallbacks / this._totalRequests) * 100)
|
||||
: 0,
|
||||
errorRate:
|
||||
this._totalRequests > 0 ? Math.round((this._totalErrors / this._totalRequests) * 100) : 0,
|
||||
byModel: Object.fromEntries(this._byModel),
|
||||
};
|
||||
}
|
||||
|
||||
/** Reset all counters */
|
||||
reset(): void {
|
||||
this._totalRequests = 0;
|
||||
this._totalErrors = 0;
|
||||
this._totalFallbacks = 0;
|
||||
this._totalTokens = 0;
|
||||
this._totalLatencyMs = 0;
|
||||
this._byModel.clear();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
/**
|
||||
* Fetch wrapper with exponential backoff retry for transient failures.
|
||||
*
|
||||
* Retries on: 429 (rate limit), 502, 503, 504 (server errors), network errors.
|
||||
* Does NOT retry on: 400, 401, 403, 404 (client errors).
|
||||
*/
|
||||
|
||||
const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]);
|
||||
|
||||
export interface RetryOptions {
|
||||
maxRetries: number;
|
||||
/** Base delay in ms (doubles each retry). Default: 200 */
|
||||
baseDelay?: number;
|
||||
}
|
||||
|
||||
export async function retryFetch(
|
||||
url: string,
|
||||
init: RequestInit,
|
||||
options: RetryOptions
|
||||
): Promise<Response> {
|
||||
const { maxRetries, baseDelay = 200 } = options;
|
||||
let lastError: Error | undefined;
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const response = await fetch(url, init);
|
||||
|
||||
if (response.ok || !RETRYABLE_STATUS_CODES.has(response.status)) {
|
||||
return response;
|
||||
}
|
||||
|
||||
// Retryable status code
|
||||
lastError = new Error(`HTTP ${response.status}: ${response.statusText}`);
|
||||
} catch (error) {
|
||||
// Network error (connection refused, timeout, etc.)
|
||||
lastError = error instanceof Error ? error : new Error(String(error));
|
||||
}
|
||||
|
||||
// Don't sleep after the last attempt
|
||||
if (attempt < maxRetries) {
|
||||
const delay = baseDelay * Math.pow(2, attempt);
|
||||
await sleep(delay);
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError ?? new Error('retryFetch exhausted all retries');
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
|
@ -1,21 +1,14 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2021",
|
||||
"module": "commonjs",
|
||||
"lib": ["ES2021"],
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"target": "ES2022",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"lib": ["ES2022", "DOM"],
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"noEmit": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"moduleResolution": "node",
|
||||
"experimentalDecorators": true,
|
||||
"emitDecoratorMetadata": true
|
||||
"forceConsistentCasingInFileNames": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
|
|
|
|||
33
pnpm-lock.yaml
generated
33
pnpm-lock.yaml
generated
|
|
@ -978,6 +978,9 @@ importers:
|
|||
'@mana/shared-links':
|
||||
specifier: workspace:*
|
||||
version: link:../../../../packages/shared-links
|
||||
'@mana/shared-llm':
|
||||
specifier: workspace:*
|
||||
version: link:../../../../packages/shared-llm
|
||||
'@mana/shared-stores':
|
||||
specifier: workspace:*
|
||||
version: link:../../../../packages/shared-stores
|
||||
|
|
@ -2981,31 +2984,19 @@ importers:
|
|||
|
||||
packages/shared-llm:
|
||||
dependencies:
|
||||
'@nestjs/common':
|
||||
specifier: ^10.0.0 || ^11.0.0
|
||||
version: 10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2)
|
||||
'@nestjs/config':
|
||||
specifier: ^3.0.0 || ^4.0.0
|
||||
version: 3.3.0(@nestjs/common@10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2))(rxjs@7.8.2)
|
||||
'@nestjs/core':
|
||||
specifier: ^10.0.0 || ^11.0.0
|
||||
version: 10.4.22(@nestjs/common@10.4.22(class-transformer@0.5.1)(class-validator@0.14.4)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/platform-express@10.4.22)(reflect-metadata@0.2.2)(rxjs@7.8.2)
|
||||
reflect-metadata:
|
||||
specifier: ^0.1.13 || ^0.2.0
|
||||
version: 0.2.2
|
||||
rxjs:
|
||||
specifier: ^7.0.0
|
||||
version: 7.8.2
|
||||
'@mana/local-llm':
|
||||
specifier: workspace:*
|
||||
version: link:../local-llm
|
||||
devDependencies:
|
||||
'@types/node':
|
||||
specifier: ^20.0.0
|
||||
version: 20.19.39
|
||||
typescript:
|
||||
specifier: ^24.10.1
|
||||
version: 24.12.2
|
||||
svelte:
|
||||
specifier: ^5.0.0
|
||||
version: 5.55.1
|
||||
typescript:
|
||||
specifier: ^5.9.3
|
||||
version: 5.9.3
|
||||
vitest:
|
||||
specifier: ^4.1.2
|
||||
version: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@20.19.39)(@vitest/coverage-v8@4.1.3)(@vitest/ui@4.1.3)(jsdom@29.0.2(@noble/hashes@2.0.1))(vite@6.4.2(@types/node@20.19.39)(jiti@2.6.1)(lightningcss@1.32.0)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3))
|
||||
|
||||
packages/shared-logger:
|
||||
devDependencies:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue