mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-19 20:21:26 +02:00
Adding an app to a workbench scene threw DataCloneError. scenesState is a $state array, so current.openApps was a Svelte 5 proxy and spreading it into a new array left proxy entries inside; IndexedDB's structured clone refuses to serialise those. Snapshot before handing the array to patchScene / createScene so Dexie sees plain objects. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
62 lines
1.7 KiB
TypeScript
62 lines
1.7 KiB
TypeScript
/**
|
|
* Browser-edge backend — wraps @mana/local-llm.
|
|
*
|
|
* Inference happens 100% on the user's device via WebGPU. The model
|
|
* (currently Gemma 4 E2B) is a one-time ~500 MB download cached in the
|
|
* browser. We do NOT auto-load on backend creation; the user has to
|
|
* explicitly trigger a load via the settings page or by using a feature
|
|
* that calls `ensureLoaded()`. This avoids surprising 500 MB downloads.
|
|
*/
|
|
|
|
import {
|
|
localLLM,
|
|
LocalLLMEngine,
|
|
loadLocalLlm,
|
|
type ChatMessage as LocalChatMessage,
|
|
} from '@mana/local-llm';
|
|
import { EdgeLoadFailedError } from '../errors';
|
|
import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
|
|
|
|
export class BrowserBackend implements LlmBackend {
|
|
readonly tier = 'browser' as const;
|
|
|
|
isAvailable(): boolean {
|
|
return LocalLLMEngine.isSupported();
|
|
}
|
|
|
|
isReady(): boolean {
|
|
return localLLM.isReady;
|
|
}
|
|
|
|
/** Trigger the one-time model download + WebGPU initialization.
|
|
* Idempotent — safe to call repeatedly. Throws EdgeLoadFailedError
|
|
* on failure (model corrupt, WebGPU OOM, etc.). */
|
|
async ensureLoaded(): Promise<void> {
|
|
try {
|
|
await loadLocalLlm();
|
|
} catch (err) {
|
|
throw new EdgeLoadFailedError(err instanceof Error ? err.message : String(err));
|
|
}
|
|
}
|
|
|
|
async generate(req: LlmTaskRequest): Promise<GenerateResult> {
|
|
await this.ensureLoaded();
|
|
|
|
const result = await localLLM.generate({
|
|
messages: req.messages as LocalChatMessage[],
|
|
temperature: req.temperature,
|
|
maxTokens: req.maxTokens,
|
|
onToken: req.onToken,
|
|
});
|
|
|
|
return {
|
|
content: result.content,
|
|
usage: {
|
|
promptTokens: result.usage.prompt_tokens,
|
|
completionTokens: result.usage.completion_tokens,
|
|
totalTokens: result.usage.total_tokens,
|
|
},
|
|
latencyMs: result.latencyMs,
|
|
};
|
|
}
|
|
}
|