From e885713fd01d8c9d7d32abe293d8da7dcbc5be3f Mon Sep 17 00:00:00 2001 From: Till JS Date: Tue, 14 Apr 2026 14:09:57 +0200 Subject: [PATCH] refactor(brain): migrate Companion engine to LlmOrchestrator (4-tier system) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the ad-hoc local-first + server-fallback pattern with the shared LlmOrchestrator, giving the Companion Chat full access to the 4-tier system (none/browser/mana-server/cloud) and its privacy + user-preference enforcement. New companionChatTask (lib/llm-tasks/companion-chat.ts): - name: 'companion.chat' - minTier: 'browser' (no rules fallback — needs an LLM) - contentClass: 'personal' (allows server/cloud if user opted in; NOT 'sensitive' because the chat isn't restricted to browser-only, but the user can set it per-task via taskOverrides) - requires: { streaming: true } Engine changes: - callLlm() now delegates to llmOrchestrator.run(companionChatTask, ...) - Still preloads the local model when browser tier is available so the UI can show download progress - isCompanionAvailable() now asks llmOrchestrator.canRun() which considers user settings + backend readiness + consent gates User benefits: - Tier-selector in the PillNav now applies to Companion Chat - Users can force cloud/server/browser per-task via settings overrides - Cloud tier only runs when cloudConsentGiven is set - Privacy: content marked 'sensitive' in other tasks (Journal etc.) is still restricted to browser/rules — Companion respects the same orchestrator so privacy invariants hold consistently Co-Authored-By: Claude Opus 4.6 (1M context) --- .../web/src/lib/llm-tasks/companion-chat.ts | 53 +++++++++ .../web/src/lib/modules/companion/engine.ts | 101 +++++++----------- 2 files changed, 90 insertions(+), 64 deletions(-) create mode 100644 apps/mana/apps/web/src/lib/llm-tasks/companion-chat.ts diff --git a/apps/mana/apps/web/src/lib/llm-tasks/companion-chat.ts b/apps/mana/apps/web/src/lib/llm-tasks/companion-chat.ts new file mode 100644 index 000000000..b2e2962aa --- /dev/null +++ b/apps/mana/apps/web/src/lib/llm-tasks/companion-chat.ts @@ -0,0 +1,53 @@ +/** + * companionChatTask — LLM task definition for the Companion Brain chat. + * + * Routes through the shared LlmOrchestrator so the user's tier settings + * and privacy rules are respected: + * + * - minTier: browser (needs at least local Gemma; no rules fallback) + * - contentClass: 'personal' — user messages may reference their data + * but aren't the most sensitive class (which is reserved for things + * like Journal, Dreams, Finance). 'personal' allows mana-server or + * cloud if the user opted in; 'sensitive' would restrict to browser. + * - streaming: true — the chat UI relies on per-token updates + * + * Individual callers can override the tier via settings.taskOverrides + * (e.g. force cloud for Companion even if the default is browser). + */ + +import type { LlmBackend, LlmTask, GenerateResult } from '@mana/shared-llm'; + +export interface CompanionChatInput { + messages: { role: 'user' | 'assistant' | 'system'; content: string }[]; + temperature?: number; + maxTokens?: number; + onToken?: (token: string) => void; +} + +export type CompanionChatOutput = { + content: string; + usage?: GenerateResult['usage']; +}; + +export const companionChatTask: LlmTask = { + name: 'companion.chat', + minTier: 'browser', + contentClass: 'personal', + requires: { streaming: true }, + displayLabel: 'Companion Chat', + + async runLlm(input: CompanionChatInput, backend: LlmBackend): Promise { + const result = await backend.generate({ + taskName: 'companion.chat', + contentClass: 'personal', + messages: input.messages, + temperature: input.temperature ?? 0.7, + maxTokens: input.maxTokens ?? 1024, + onToken: input.onToken, + }); + return { + content: result.content, + usage: result.usage, + }; + }, +}; diff --git a/apps/mana/apps/web/src/lib/modules/companion/engine.ts b/apps/mana/apps/web/src/lib/modules/companion/engine.ts index cf19b46b2..a153d7d1e 100644 --- a/apps/mana/apps/web/src/lib/modules/companion/engine.ts +++ b/apps/mana/apps/web/src/lib/modules/companion/engine.ts @@ -1,15 +1,21 @@ /** * Companion Chat Engine — Orchestrates LLM + Context Document + Tool Calling. * - * Tries local LLM (Gemma via @mana/local-llm) first. If WebGPU is not - * available, falls back to the mana-llm server endpoint. Tool calling - * uses JSON extraction from the LLM output. + * Routes through the shared LlmOrchestrator (4-tier system). The orchestrator + * picks browser/mana-server/cloud based on user settings + the task's + * contentClass ('personal'). Users can override per-task via their LLM + * settings (e.g. "Companion always via cloud" or "never leave device"). + * + * Tool calling is simulated via JSON extraction since none of the tiers + * natively speak function calling (Gemma doesn't, Gemini via our proxy + * routes through text-completion). */ -import { generate, getLocalLlmStatus, loadLocalLlm, isLocalLlmSupported } from '@mana/local-llm'; +import { llmOrchestrator } from '@mana/shared-llm'; +import { isLocalLlmSupported, getLocalLlmStatus, loadLocalLlm } from '@mana/local-llm'; +import { companionChatTask } from '$lib/llm-tasks/companion-chat'; import { generateContextDocument } from '$lib/data/projections/context-document'; import { getToolsForLlm, executeTool } from '$lib/data/tools'; -import { authStore } from '$lib/stores/auth.svelte'; import type { DaySnapshot, StreakInfo } from '$lib/data/projections/types'; import { emitDomainEvent } from '$lib/data/events'; import { getTool } from '$lib/data/tools/registry'; @@ -20,59 +26,37 @@ const MAX_TOOL_ROUNDS = 3; type LlmMessage = { role: 'user' | 'assistant' | 'system'; content: string }; -/** Try local LLM, fall back to server if WebGPU unavailable. */ +/** + * Route an LLM call through the orchestrator. The orchestrator handles + * tier selection, privacy enforcement, and fallbacks. If the browser + * tier is chosen but the local model hasn't loaded yet, we trigger + * the download first so the UI can show progress. + */ async function callLlm(messages: LlmMessage[], onToken?: (token: string) => void): Promise { - // Try local first (WebGPU + Gemma) + // If browser tier is available, preload the model so the + // CompanionChat UI can show download progress before generation starts. if (isLocalLlmSupported()) { const status = getLocalLlmStatus(); - if (status.current.state !== 'ready') { - try { - await loadLocalLlm(); - } catch { - // Fall through to server - return callServerLlm(messages); - } + if (status.current.state === 'idle' || status.current.state === 'checking') { + // Fire-and-forget — the orchestrator will await isReady() anyway + void loadLocalLlm().catch(() => { + /* fall through to next tier */ + }); } - const result = await generate({ messages, temperature: 0.7, maxTokens: 1024, onToken }); - return result.content; } - // Fallback: server-side LLM via mana-api - return callServerLlm(messages); -} - -async function callServerLlm(messages: LlmMessage[]): Promise { - const apiUrl = - (typeof window !== 'undefined' && - (window as unknown as Record).__PUBLIC_MANA_API_URL__) || - import.meta.env.PUBLIC_MANA_API_URL || - ''; - - if (!apiUrl) { - return 'LLM nicht verfuegbar — weder WebGPU noch Server-Endpoint konfiguriert.'; - } - - const headers: Record = { 'Content-Type': 'application/json' }; try { - const token = await authStore.getValidToken(); - if (token) headers['Authorization'] = `Bearer ${token}`; - } catch { - // Continue without auth — server will decide + const result = await llmOrchestrator.run(companionChatTask, { + messages, + onToken, + temperature: 0.7, + maxTokens: 1024, + }); + return result.value.content; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return `LLM nicht verfuegbar: ${msg}`; } - - const response = await fetch(`${apiUrl}/api/v1/chat/completions`, { - method: 'POST', - headers, - body: JSON.stringify({ messages, model: 'companion' }), - }); - - if (!response.ok) { - const err = await response.text().catch(() => response.statusText); - return `Server-Fehler: ${err}`; - } - - const data = (await response.json()) as { choices?: { message?: { content?: string } }[] }; - return data.choices?.[0]?.message?.content ?? 'Keine Antwort vom Server.'; } interface EngineResult { @@ -272,20 +256,9 @@ export async function runCompanionChat( } /** - * Check if the Companion Chat is available. - * Returns true if either local LLM or server endpoint is usable. + * Check if the Companion Chat is available — delegates to the orchestrator + * which considers the user's enabled tiers and backend readiness. */ export function isCompanionAvailable(): boolean { - // Local LLM available? - if (isLocalLlmSupported()) { - const status = getLocalLlmStatus(); - if (status.current.state === 'ready' || status.current.state === 'idle') return true; - } - // Server fallback configured? - const apiUrl = - (typeof window !== 'undefined' && - (window as unknown as Record).__PUBLIC_MANA_API_URL__) || - import.meta.env.PUBLIC_MANA_API_URL || - ''; - return !!apiUrl; + return llmOrchestrator.canRun(companionChatTask); }