refactor(brain): migrate Companion engine to LlmOrchestrator (4-tier system)

Replaces the ad-hoc local-first + server-fallback pattern with the shared LlmOrchestrator, giving the Companion Chat full access to the 4-tier system (none/browser/mana-server/cloud) and its privacy + user-preference enforcement. New companionChatTask (lib/llm-tasks/companion-chat.ts): - name: 'companion.chat' - minTier: 'browser' (no rules fallback — needs an LLM) - contentClass: 'personal' (allows server/cloud if user opted in; NOT 'sensitive' because the chat isn't restricted to browser-only, but the user can set it per-task via taskOverrides) - requires: { streaming: true } Engine changes: - callLlm() now delegates to llmOrchestrator.run(companionChatTask, ...) - Still preloads the local model when browser tier is available so the UI can show download progress - isCompanionAvailable() now asks llmOrchestrator.canRun() which considers user settings + backend readiness + consent gates User benefits: - Tier-selector in the PillNav now applies to Companion Chat - Users can force cloud/server/browser per-task via settings overrides - Cloud tier only runs when cloudConsentGiven is set - Privacy: content marked 'sensitive' in other tasks (Journal etc.) is still restricted to browser/rules — Companion respects the same orchestrator so privacy invariants hold consistently Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-15 04:41:09 +02:00 · 2026-04-14 14:09:57 +02:00 · 2026-04-14 14:09:57 +02:00 · e885713fd0
commit e885713fd0
parent 4b2007e97c
2 changed files with 90 additions and 64 deletions
--- a/apps/mana/apps/web/src/lib/llm-tasks/companion-chat.ts
+++ b/apps/mana/apps/web/src/lib/llm-tasks/companion-chat.ts
@ -0,0 +1,53 @@
+/**
+ * companionChatTask — LLM task definition for the Companion Brain chat.
+ *
+ * Routes through the shared LlmOrchestrator so the user's tier settings
+ * and privacy rules are respected:
+ *
+ * - minTier: browser (needs at least local Gemma; no rules fallback)
+ * - contentClass: 'personal' — user messages may reference their data
+ *   but aren't the most sensitive class (which is reserved for things
+ *   like Journal, Dreams, Finance). 'personal' allows mana-server or
+ *   cloud if the user opted in; 'sensitive' would restrict to browser.
+ * - streaming: true — the chat UI relies on per-token updates
+ *
+ * Individual callers can override the tier via settings.taskOverrides
+ * (e.g. force cloud for Companion even if the default is browser).
+ */
+
+import type { LlmBackend, LlmTask, GenerateResult } from '@mana/shared-llm';
+
+export interface CompanionChatInput {
+	messages: { role: 'user' | 'assistant' | 'system'; content: string }[];
+	temperature?: number;
+	maxTokens?: number;
+	onToken?: (token: string) => void;
+}
+
+export type CompanionChatOutput = {
+	content: string;
+	usage?: GenerateResult['usage'];
+};
+
+export const companionChatTask: LlmTask<CompanionChatInput, CompanionChatOutput> = {
+	name: 'companion.chat',
+	minTier: 'browser',
+	contentClass: 'personal',
+	requires: { streaming: true },
+	displayLabel: 'Companion Chat',
+
+	async runLlm(input: CompanionChatInput, backend: LlmBackend): Promise<CompanionChatOutput> {
+		const result = await backend.generate({
+			taskName: 'companion.chat',
+			contentClass: 'personal',
+			messages: input.messages,
+			temperature: input.temperature ?? 0.7,
+			maxTokens: input.maxTokens ?? 1024,
+			onToken: input.onToken,
+		});
+		return {
+			content: result.content,
+			usage: result.usage,
+		};
+	},
+};
--- a/apps/mana/apps/web/src/lib/modules/companion/engine.ts
+++ b/apps/mana/apps/web/src/lib/modules/companion/engine.ts
@ -1,15 +1,21 @@
 /**
 * Companion Chat Engine — Orchestrates LLM + Context Document + Tool Calling.
 *
- * Tries local LLM (Gemma via @mana/local-llm) first. If WebGPU is not
- * available, falls back to the mana-llm server endpoint. Tool calling
- * uses JSON extraction from the LLM output.
+ * Routes through the shared LlmOrchestrator (4-tier system). The orchestrator
+ * picks browser/mana-server/cloud based on user settings + the task's
+ * contentClass ('personal'). Users can override per-task via their LLM
+ * settings (e.g. "Companion always via cloud" or "never leave device").
+ *
+ * Tool calling is simulated via JSON extraction since none of the tiers
+ * natively speak function calling (Gemma doesn't, Gemini via our proxy
+ * routes through text-completion).
 */

-import { generate, getLocalLlmStatus, loadLocalLlm, isLocalLlmSupported } from '@mana/local-llm';
+import { llmOrchestrator } from '@mana/shared-llm';
+import { isLocalLlmSupported, getLocalLlmStatus, loadLocalLlm } from '@mana/local-llm';
+import { companionChatTask } from '$lib/llm-tasks/companion-chat';
 import { generateContextDocument } from '$lib/data/projections/context-document';
 import { getToolsForLlm, executeTool } from '$lib/data/tools';
-import { authStore } from '$lib/stores/auth.svelte';
 import type { DaySnapshot, StreakInfo } from '$lib/data/projections/types';
 import { emitDomainEvent } from '$lib/data/events';
 import { getTool } from '$lib/data/tools/registry';
@ -20,59 +26,37 @@ const MAX_TOOL_ROUNDS = 3;

 type LlmMessage = { role: 'user' | 'assistant' | 'system'; content: string };

-/** Try local LLM, fall back to server if WebGPU unavailable. */
+/**
+ * Route an LLM call through the orchestrator. The orchestrator handles
+ * tier selection, privacy enforcement, and fallbacks. If the browser
+ * tier is chosen but the local model hasn't loaded yet, we trigger
+ * the download first so the UI can show progress.
+ */
 async function callLlm(messages: LlmMessage[], onToken?: (token: string) => void): Promise<string> {
-	// Try local first (WebGPU + Gemma)
+	// If browser tier is available, preload the model so the
+	// CompanionChat UI can show download progress before generation starts.
 	if (isLocalLlmSupported()) {
 		const status = getLocalLlmStatus();
-		if (status.current.state !== 'ready') {
-			try {
-				await loadLocalLlm();
-			} catch {
-				// Fall through to server
-				return callServerLlm(messages);
-			}
+		if (status.current.state === 'idle' || status.current.state === 'checking') {
+			// Fire-and-forget — the orchestrator will await isReady() anyway
+			void loadLocalLlm().catch(() => {
+				/* fall through to next tier */
+			});
 		}
-		const result = await generate({ messages, temperature: 0.7, maxTokens: 1024, onToken });
-		return result.content;
 	}

-	// Fallback: server-side LLM via mana-api
-	return callServerLlm(messages);
-}
-
-async function callServerLlm(messages: LlmMessage[]): Promise<string> {
-	const apiUrl =
-		(typeof window !== 'undefined' &&
-			(window as unknown as Record<string, string>).__PUBLIC_MANA_API_URL__) ||
-		import.meta.env.PUBLIC_MANA_API_URL ||
-		'';
-
-	if (!apiUrl) {
-		return 'LLM nicht verfuegbar — weder WebGPU noch Server-Endpoint konfiguriert.';
-	}
-
-	const headers: Record<string, string> = { 'Content-Type': 'application/json' };
 	try {
-		const token = await authStore.getValidToken();
-		if (token) headers['Authorization'] = `Bearer ${token}`;
-	} catch {
-		// Continue without auth — server will decide
+		const result = await llmOrchestrator.run(companionChatTask, {
+			messages,
+			onToken,
+			temperature: 0.7,
+			maxTokens: 1024,
+		});
+		return result.value.content;
+	} catch (err) {
+		const msg = err instanceof Error ? err.message : String(err);
+		return `LLM nicht verfuegbar: ${msg}`;
 	}
-
-	const response = await fetch(`${apiUrl}/api/v1/chat/completions`, {
-		method: 'POST',
-		headers,
-		body: JSON.stringify({ messages, model: 'companion' }),
-	});
-
-	if (!response.ok) {
-		const err = await response.text().catch(() => response.statusText);
-		return `Server-Fehler: ${err}`;
-	}
-
-	const data = (await response.json()) as { choices?: { message?: { content?: string } }[] };
-	return data.choices?.[0]?.message?.content ?? 'Keine Antwort vom Server.';
 }

 interface EngineResult {
@ -272,20 +256,9 @@ export async function runCompanionChat(
 }

 /**
- * Check if the Companion Chat is available.
- * Returns true if either local LLM or server endpoint is usable.
+ * Check if the Companion Chat is available — delegates to the orchestrator
+ * which considers the user's enabled tiers and backend readiness.
 */
 export function isCompanionAvailable(): boolean {
-	// Local LLM available?
-	if (isLocalLlmSupported()) {
-		const status = getLocalLlmStatus();
-		if (status.current.state === 'ready' || status.current.state === 'idle') return true;
-	}
-	// Server fallback configured?
-	const apiUrl =
-		(typeof window !== 'undefined' &&
-			(window as unknown as Record<string, string>).__PUBLIC_MANA_API_URL__) ||
-		import.meta.env.PUBLIC_MANA_API_URL ||
-		'';
-	return !!apiUrl;
+	return llmOrchestrator.canRun(companionChatTask);
 }