/** * LlmOrchestrator — routes LlmTasks across the four privacy tiers * (none / browser / mana-server / cloud) according to the user's * settings, the task's minimum tier, and the input's content class. * * Routing rules — applied in this exact order: * * 1. If the task's minTier is above the user's HIGHEST allowed tier, * we cannot run the LLM path at all. Try runRules() if defined, * else throw TierTooLowError. * * 2. If contentClass is 'sensitive', strip 'mana-server' and 'cloud' * from the candidate tier list — sensitive content NEVER leaves * the device, even if the user has these tiers enabled globally. * This is the privacy backstop the user can't accidentally * override task-by-task. * * 3. If a per-task override exists in settings.taskOverrides, use it * verbatim (still subject to rule 2 — task overrides cannot * bypass the sensitive-content backstop). * * 4. Otherwise, pick the FIRST tier from settings.allowedTiers that * (a) is in the candidate set after rules 1+2, (b) has an * available + ready backend, (c) the cloud-consent gate is * satisfied if it's the cloud tier. * * 5. Run the task on the chosen backend. * * 6. If the run throws and settings.fallbackToRulesOnError is true * and the task has a runRules() implementation, fall back to * rules. We do NOT auto-fall to a different LLM tier on error — * the user explicitly chose this tier and silently switching * providers would be a privacy/trust break. * * 7. If everything fails, throw NoTierAvailableError. UI catches it * and offers a "retry" / "switch tier" / "enter manually" prompt. */ import { BackendUnreachableError, NoTierAvailableError, ProviderBlockedError, TierTooLowError, } from './errors'; import type { LlmTask } from './task'; import type { LlmTier } from './tiers'; import { TIER_RANK } from './tiers'; import type { LlmBackend, LlmSettings, LlmTaskRequest, LlmTaskResult } from './types'; export interface LlmOrchestratorOptions { settings: LlmSettings; backends: LlmBackend[]; } export class LlmOrchestrator { private settings: LlmSettings; private backendsByTier: Map; constructor(opts: LlmOrchestratorOptions) { this.settings = opts.settings; this.backendsByTier = new Map(); for (const b of opts.backends) { this.backendsByTier.set(b.tier, b); } } /** Replace the settings object — call this when the user updates * their preferences in the settings UI. */ updateSettings(settings: LlmSettings): void { this.settings = settings; } /** Register (or replace) a backend at runtime — used by the app * to wire up the BYOK backend after initial orchestrator construction, * since BYOK needs access to app-side IndexedDB keys. */ registerBackend(backend: LlmBackend): void { this.backendsByTier.set(backend.tier, backend); } /** Remove a backend (e.g. when the user disables BYOK). */ unregisterBackend(tier: LlmTier): void { this.backendsByTier.delete(tier); } /** Public read-only view for UI components that want to react to * the current settings (e.g. the tier selector). */ getSettings(): Readonly { return this.settings; } /** * Can the user (with their current settings) run this task at all? * The UI uses this to decide whether to show a feature button as * enabled / disabled / hidden. Does NOT check backend readiness — * that's a per-call concern. Just checks "is there any conceivable * tier in the user's allowedTiers that satisfies task.minTier and * is permitted for task.contentClass?". */ canRun(task: LlmTask): boolean { // Rules-only tasks always run if they have a fallback if (task.minTier === 'none') return true; if (task.runRules) return true; const override = this.settings.taskOverrides[task.name]; const candidates = this.candidateTiers(task, override); return candidates.some((t) => { const backend = this.backendsByTier.get(t); return backend?.isAvailable() ?? false; }); } /** * Run the task. Honors the routing rules above. The returned * LlmTaskResult includes which tier actually ran, plus a trail * of tiers that were attempted and skipped before it. */ async run(task: LlmTask, input: TIn): Promise> { const start = performance.now(); const attempted: LlmTier[] = []; // Rule 1: tier-too-low check. // An explicit per-task override counts as opting-in to that tier // even if it isn't in the user's global allowedTiers — that's the // whole point of overrides (e.g. "use BYOK just for the Companion"). const override = this.settings.taskOverrides[task.name]; const effectiveMaxTier = override ? TIER_RANK[override] > TIER_RANK[this.userMaxTier()] ? override : this.userMaxTier() : this.userMaxTier(); if (TIER_RANK[task.minTier] > TIER_RANK[effectiveMaxTier]) { if (task.runRules) { const value = await task.runRules(input); return { value, source: 'none', latencyMs: Math.round(performance.now() - start), attempted: ['none'], }; } throw new TierTooLowError(task.name, task.minTier, effectiveMaxTier); } // Rules-2-3: candidate tier list and per-task override const candidates = this.candidateTiers(task, override); const orderedTiers = override ? [override].filter((t) => candidates.includes(t)) : candidates; // Rule 4-5: try the first runnable tier for (const tier of orderedTiers) { if (tier === 'none') { if (task.runRules) { const value = await task.runRules(input); return { value, source: 'none', latencyMs: Math.round(performance.now() - start), attempted: [...attempted, 'none'], }; } attempted.push('none'); continue; } // Cloud-consent gate if (tier === 'cloud' && !this.settings.cloudConsentGiven) { attempted.push('cloud'); continue; } const backend = this.backendsByTier.get(tier); if (!backend) { attempted.push(tier); continue; } if (!backend.isAvailable()) { attempted.push(tier); continue; } const ready = await backend.isReady(); if (!ready) { attempted.push(tier); continue; } try { const request = this.buildRequest(task, input); const generated = await task.runLlm(input, backend); return { value: generated, source: tier, latencyMs: Math.round(performance.now() - start), attempted: [...attempted, tier], }; // `request` is intentionally unused — the task constructs // its own LlmTaskRequest internally via runLlm. We build // it here only as a future hook for telemetry. void request; } catch (err) { attempted.push(tier); // Rule 6: rules-fallback on error if ( this.settings.fallbackToRulesOnError && task.runRules && !(err instanceof ProviderBlockedError) ) { // Provider-blocked errors should NOT silently fall to // rules — they should bubble up so the UI can offer // "retry" / "switch tier" prompts. Other errors // (network failure, OOM, model not loaded) get the // silent rules fallback. try { const value = await task.runRules(input); return { value, source: 'none', latencyMs: Math.round(performance.now() - start), attempted: [...attempted, 'none'], }; } catch { // rules fallback also failed — re-throw original throw err; } } // Re-throw provider blocks and unrecoverable errors if (err instanceof ProviderBlockedError || err instanceof BackendUnreachableError) { throw err; } // Unknown error — try the next tier in the list continue; } } throw new NoTierAvailableError(task.name, attempted); } /** Highest tier in the user's allowedTiers list (by rank). */ private userMaxTier(): LlmTier { if (this.settings.allowedTiers.length === 0) return 'none'; return this.settings.allowedTiers.reduce( (max, t) => (TIER_RANK[t] > TIER_RANK[max] ? t : max), 'none' as LlmTier ); } /** Candidate tier list after applying rules 1 + 2. * - Rule 1: only tiers >= task.minTier * - Rule 2: sensitive content excludes mana-server + cloud + byok * - If a per-task override is given, it's allowed even if not in * settings.allowedTiers (explicit per-task opt-in beats global) * Also always includes 'none' at the end if the task has runRules. */ private candidateTiers(task: LlmTask, override?: LlmTier): LlmTier[] { // Start with the user's allowed tiers, plus the override if set // (the override is an explicit per-task opt-in even if the user // hasn't enabled that tier globally). const baseTiers = override ? Array.from(new Set([...this.settings.allowedTiers, override])) : this.settings.allowedTiers; let tiers = baseTiers .filter((t) => TIER_RANK[t] >= TIER_RANK[task.minTier]) .sort((a, b) => TIER_RANK[a] - TIER_RANK[b]); // Rule 2: sensitive content backstop — only browser-local stays if (task.contentClass === 'sensitive') { tiers = tiers.filter((t) => t === 'browser'); } // 'none' is always tail-appended if the task has a rules implementation, // so the for-loop in run() naturally falls through to it. if (task.runRules && !tiers.includes('none')) { tiers.push('none'); } return tiers; } private buildRequest(task: LlmTask, _input: TIn): LlmTaskRequest { // Right now this is a placeholder — tasks build their own // LlmTaskRequest inside runLlm. Once we add token-counting // telemetry we'll move that construction up here so the // orchestrator can prepend the task metadata uniformly. return { taskName: task.name, contentClass: task.contentClass, requires: task.requires, messages: [], }; } }