managarten/packages/shared-llm/src/backends/browser.ts
Till JS 56065c8537 fix(mana/web): unwrap $state proxy in workbench-scenes Dexie writes
Adding an app to a workbench scene threw DataCloneError. scenesState
is a $state array, so current.openApps was a Svelte 5 proxy and
spreading it into a new array left proxy entries inside; IndexedDB's
structured clone refuses to serialise those. Snapshot before handing
the array to patchScene / createScene so Dexie sees plain objects.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 00:44:00 +02:00

62 lines
1.7 KiB
TypeScript

/**
* Browser-edge backend — wraps @mana/local-llm.
*
* Inference happens 100% on the user's device via WebGPU. The model
* (currently Gemma 4 E2B) is a one-time ~500 MB download cached in the
* browser. We do NOT auto-load on backend creation; the user has to
* explicitly trigger a load via the settings page or by using a feature
* that calls `ensureLoaded()`. This avoids surprising 500 MB downloads.
*/
import {
localLLM,
LocalLLMEngine,
loadLocalLlm,
type ChatMessage as LocalChatMessage,
} from '@mana/local-llm';
import { EdgeLoadFailedError } from '../errors';
import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
export class BrowserBackend implements LlmBackend {
readonly tier = 'browser' as const;
isAvailable(): boolean {
return LocalLLMEngine.isSupported();
}
isReady(): boolean {
return localLLM.isReady;
}
/** Trigger the one-time model download + WebGPU initialization.
* Idempotent — safe to call repeatedly. Throws EdgeLoadFailedError
* on failure (model corrupt, WebGPU OOM, etc.). */
async ensureLoaded(): Promise<void> {
try {
await loadLocalLlm();
} catch (err) {
throw new EdgeLoadFailedError(err instanceof Error ? err.message : String(err));
}
}
async generate(req: LlmTaskRequest): Promise<GenerateResult> {
await this.ensureLoaded();
const result = await localLLM.generate({
messages: req.messages as LocalChatMessage[],
temperature: req.temperature,
maxTokens: req.maxTokens,
onToken: req.onToken,
});
return {
content: result.content,
usage: {
promptTokens: result.usage.prompt_tokens,
completionTokens: result.usage.completion_tokens,
totalTokens: result.usage.total_tokens,
},
latencyMs: result.latencyMs,
};
}
}