From 3bef29b9c8732490afbfdf77fb143adb4cb0208e Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 2 Apr 2026 11:57:50 +0200 Subject: [PATCH] feat(local-llm): add generate utilities and reactive Svelte status Add generate.ts with streaming chat completions, JSON extraction, and text classification helpers. Add status.svelte.ts with Svelte 5 runes reactive wrapper for LLM engine state. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/local-llm/src/generate.ts | 112 ++++++++++++++++++++++++ packages/local-llm/src/status.svelte.ts | 22 +++++ 2 files changed, 134 insertions(+) create mode 100644 packages/local-llm/src/generate.ts create mode 100644 packages/local-llm/src/status.svelte.ts diff --git a/packages/local-llm/src/generate.ts b/packages/local-llm/src/generate.ts new file mode 100644 index 000000000..2ac7a0112 --- /dev/null +++ b/packages/local-llm/src/generate.ts @@ -0,0 +1,112 @@ +import { getEngine } from './engine.js'; + +export interface ChatMessage { + role: 'system' | 'user' | 'assistant'; + content: string; +} + +export interface GenerateOptions { + messages: ChatMessage[]; + temperature?: number; + maxTokens?: number; + onToken?: (token: string) => void; +} + +export interface GenerateResult { + content: string; + latencyMs: number; + usage: { + prompt_tokens: number; + completion_tokens: number; + }; +} + +export async function generate(options: GenerateOptions): Promise { + const engine = getEngine(); + if (!engine) throw new Error('No model loaded. Call loadLocalLlm() first.'); + + const { messages, temperature = 0.7, maxTokens = 1024, onToken } = options; + const start = performance.now(); + + const reply = await engine.chat.completions.create({ + messages, + temperature, + max_tokens: maxTokens, + stream: !!onToken, + stream_options: onToken ? { include_usage: true } : undefined, + }); + + let content = ''; + let promptTokens = 0; + let completionTokens = 0; + + if (Symbol.asyncIterator in Object(reply)) { + for await (const chunk of reply as AsyncIterable) { + const delta = chunk.choices?.[0]?.delta?.content; + if (delta) { + content += delta; + onToken?.(delta); + } + if (chunk.usage) { + promptTokens = chunk.usage.prompt_tokens ?? 0; + completionTokens = chunk.usage.completion_tokens ?? 0; + } + } + } else { + const completion = reply as any; + content = completion.choices?.[0]?.message?.content ?? ''; + promptTokens = completion.usage?.prompt_tokens ?? 0; + completionTokens = completion.usage?.completion_tokens ?? 0; + } + + const latencyMs = Math.round(performance.now() - start); + + return { + content, + latencyMs, + usage: { prompt_tokens: promptTokens, completion_tokens: completionTokens }, + }; +} + +export async function extractJson(text: string, instruction: string): Promise { + const result = await generate({ + messages: [ + { + role: 'system', + content: + 'You are a JSON extraction assistant. Respond ONLY with valid JSON, no explanation or markdown.', + }, + { + role: 'user', + content: `${instruction}\n\nText:\n${text}`, + }, + ], + temperature: 0.1, + maxTokens: 2048, + }); + + const jsonMatch = result.content.match(/[[{][\s\S]*[}\]]/); + if (!jsonMatch) throw new Error('No JSON found in response'); + return JSON.parse(jsonMatch[0]); +} + +export async function classify(text: string, categories: string[]): Promise { + const result = await generate({ + messages: [ + { + role: 'system', + content: `You are a text classifier. Classify the text into exactly one of these categories: ${categories.join(', ')}. Respond with ONLY the category name, nothing else.`, + }, + { + role: 'user', + content: text, + }, + ], + temperature: 0.1, + maxTokens: 50, + }); + + const response = result.content.trim().toLowerCase(); + const match = categories.find((c) => response.includes(c.toLowerCase())); + return match ?? result.content.trim(); +} diff --git a/packages/local-llm/src/status.svelte.ts b/packages/local-llm/src/status.svelte.ts new file mode 100644 index 000000000..b37057d7f --- /dev/null +++ b/packages/local-llm/src/status.svelte.ts @@ -0,0 +1,22 @@ +import { subscribe, type LlmState } from './engine.js'; + +/** + * Reactive status wrapper for use in Svelte 5 components. + * Returns an object with a `current` property that updates reactively. + */ +export function getLocalLlmStatus(): { current: LlmState } { + let state = $state({ state: 'idle' }); + + $effect(() => { + const unsub = subscribe((s) => { + state = s; + }); + return unsub; + }); + + return { + get current() { + return state; + }, + }; +}