refactor(brain): migrate Companion engine to LlmOrchestrator (4-tier system)

Replaces the ad-hoc local-first + server-fallback pattern with the
shared LlmOrchestrator, giving the Companion Chat full access to
the 4-tier system (none/browser/mana-server/cloud) and its privacy
+ user-preference enforcement.

New companionChatTask (lib/llm-tasks/companion-chat.ts):
- name: 'companion.chat'
- minTier: 'browser' (no rules fallback — needs an LLM)
- contentClass: 'personal' (allows server/cloud if user opted in;
  NOT 'sensitive' because the chat isn't restricted to browser-only,
  but the user can set it per-task via taskOverrides)
- requires: { streaming: true }

Engine changes:
- callLlm() now delegates to llmOrchestrator.run(companionChatTask, ...)
- Still preloads the local model when browser tier is available so
  the UI can show download progress
- isCompanionAvailable() now asks llmOrchestrator.canRun() which
  considers user settings + backend readiness + consent gates

User benefits:
- Tier-selector in the PillNav now applies to Companion Chat
- Users can force cloud/server/browser per-task via settings overrides
- Cloud tier only runs when cloudConsentGiven is set
- Privacy: content marked 'sensitive' in other tasks (Journal etc.)
  is still restricted to browser/rules — Companion respects the
  same orchestrator so privacy invariants hold consistently

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-14 14:09:57 +02:00
parent 4b2007e97c
commit e885713fd0
2 changed files with 90 additions and 64 deletions

View file

@ -0,0 +1,53 @@
/**
* companionChatTask LLM task definition for the Companion Brain chat.
*
* Routes through the shared LlmOrchestrator so the user's tier settings
* and privacy rules are respected:
*
* - minTier: browser (needs at least local Gemma; no rules fallback)
* - contentClass: 'personal' user messages may reference their data
* but aren't the most sensitive class (which is reserved for things
* like Journal, Dreams, Finance). 'personal' allows mana-server or
* cloud if the user opted in; 'sensitive' would restrict to browser.
* - streaming: true the chat UI relies on per-token updates
*
* Individual callers can override the tier via settings.taskOverrides
* (e.g. force cloud for Companion even if the default is browser).
*/
import type { LlmBackend, LlmTask, GenerateResult } from '@mana/shared-llm';
export interface CompanionChatInput {
messages: { role: 'user' | 'assistant' | 'system'; content: string }[];
temperature?: number;
maxTokens?: number;
onToken?: (token: string) => void;
}
export type CompanionChatOutput = {
content: string;
usage?: GenerateResult['usage'];
};
export const companionChatTask: LlmTask<CompanionChatInput, CompanionChatOutput> = {
name: 'companion.chat',
minTier: 'browser',
contentClass: 'personal',
requires: { streaming: true },
displayLabel: 'Companion Chat',
async runLlm(input: CompanionChatInput, backend: LlmBackend): Promise<CompanionChatOutput> {
const result = await backend.generate({
taskName: 'companion.chat',
contentClass: 'personal',
messages: input.messages,
temperature: input.temperature ?? 0.7,
maxTokens: input.maxTokens ?? 1024,
onToken: input.onToken,
});
return {
content: result.content,
usage: result.usage,
};
},
};

View file

@ -1,15 +1,21 @@
/**
* Companion Chat Engine Orchestrates LLM + Context Document + Tool Calling.
*
* Tries local LLM (Gemma via @mana/local-llm) first. If WebGPU is not
* available, falls back to the mana-llm server endpoint. Tool calling
* uses JSON extraction from the LLM output.
* Routes through the shared LlmOrchestrator (4-tier system). The orchestrator
* picks browser/mana-server/cloud based on user settings + the task's
* contentClass ('personal'). Users can override per-task via their LLM
* settings (e.g. "Companion always via cloud" or "never leave device").
*
* Tool calling is simulated via JSON extraction since none of the tiers
* natively speak function calling (Gemma doesn't, Gemini via our proxy
* routes through text-completion).
*/
import { generate, getLocalLlmStatus, loadLocalLlm, isLocalLlmSupported } from '@mana/local-llm';
import { llmOrchestrator } from '@mana/shared-llm';
import { isLocalLlmSupported, getLocalLlmStatus, loadLocalLlm } from '@mana/local-llm';
import { companionChatTask } from '$lib/llm-tasks/companion-chat';
import { generateContextDocument } from '$lib/data/projections/context-document';
import { getToolsForLlm, executeTool } from '$lib/data/tools';
import { authStore } from '$lib/stores/auth.svelte';
import type { DaySnapshot, StreakInfo } from '$lib/data/projections/types';
import { emitDomainEvent } from '$lib/data/events';
import { getTool } from '$lib/data/tools/registry';
@ -20,59 +26,37 @@ const MAX_TOOL_ROUNDS = 3;
type LlmMessage = { role: 'user' | 'assistant' | 'system'; content: string };
/** Try local LLM, fall back to server if WebGPU unavailable. */
/**
* Route an LLM call through the orchestrator. The orchestrator handles
* tier selection, privacy enforcement, and fallbacks. If the browser
* tier is chosen but the local model hasn't loaded yet, we trigger
* the download first so the UI can show progress.
*/
async function callLlm(messages: LlmMessage[], onToken?: (token: string) => void): Promise<string> {
// Try local first (WebGPU + Gemma)
// If browser tier is available, preload the model so the
// CompanionChat UI can show download progress before generation starts.
if (isLocalLlmSupported()) {
const status = getLocalLlmStatus();
if (status.current.state !== 'ready') {
try {
await loadLocalLlm();
} catch {
// Fall through to server
return callServerLlm(messages);
}
if (status.current.state === 'idle' || status.current.state === 'checking') {
// Fire-and-forget — the orchestrator will await isReady() anyway
void loadLocalLlm().catch(() => {
/* fall through to next tier */
});
}
const result = await generate({ messages, temperature: 0.7, maxTokens: 1024, onToken });
return result.content;
}
// Fallback: server-side LLM via mana-api
return callServerLlm(messages);
}
async function callServerLlm(messages: LlmMessage[]): Promise<string> {
const apiUrl =
(typeof window !== 'undefined' &&
(window as unknown as Record<string, string>).__PUBLIC_MANA_API_URL__) ||
import.meta.env.PUBLIC_MANA_API_URL ||
'';
if (!apiUrl) {
return 'LLM nicht verfuegbar — weder WebGPU noch Server-Endpoint konfiguriert.';
}
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
try {
const token = await authStore.getValidToken();
if (token) headers['Authorization'] = `Bearer ${token}`;
} catch {
// Continue without auth — server will decide
const result = await llmOrchestrator.run(companionChatTask, {
messages,
onToken,
temperature: 0.7,
maxTokens: 1024,
});
return result.value.content;
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
return `LLM nicht verfuegbar: ${msg}`;
}
const response = await fetch(`${apiUrl}/api/v1/chat/completions`, {
method: 'POST',
headers,
body: JSON.stringify({ messages, model: 'companion' }),
});
if (!response.ok) {
const err = await response.text().catch(() => response.statusText);
return `Server-Fehler: ${err}`;
}
const data = (await response.json()) as { choices?: { message?: { content?: string } }[] };
return data.choices?.[0]?.message?.content ?? 'Keine Antwort vom Server.';
}
interface EngineResult {
@ -272,20 +256,9 @@ export async function runCompanionChat(
}
/**
* Check if the Companion Chat is available.
* Returns true if either local LLM or server endpoint is usable.
* Check if the Companion Chat is available delegates to the orchestrator
* which considers the user's enabled tiers and backend readiness.
*/
export function isCompanionAvailable(): boolean {
// Local LLM available?
if (isLocalLlmSupported()) {
const status = getLocalLlmStatus();
if (status.current.state === 'ready' || status.current.state === 'idle') return true;
}
// Server fallback configured?
const apiUrl =
(typeof window !== 'undefined' &&
(window as unknown as Record<string, string>).__PUBLIC_MANA_API_URL__) ||
import.meta.env.PUBLIC_MANA_API_URL ||
'';
return !!apiUrl;
return llmOrchestrator.canRun(companionChatTask);
}