diff --git a/apps/mana/apps/web/src/lib/components/llm/SourceBadge.svelte b/apps/mana/apps/web/src/lib/components/llm/SourceBadge.svelte
index a394cf40a..27aae5124 100644
--- a/apps/mana/apps/web/src/lib/components/llm/SourceBadge.svelte
+++ b/apps/mana/apps/web/src/lib/components/llm/SourceBadge.svelte
@@ -14,7 +14,7 @@
*
*/
import { llmSettingsState, tierLabel, type LlmTier } from '@mana/shared-llm';
- import { Lightning, Cpu, HardDrive, Cloud } from '@mana/shared-icons';
+ import { Lightning, Cpu, HardDrive, Cloud, Key } from '@mana/shared-icons';
interface Props {
tier: LlmTier;
@@ -39,6 +39,10 @@
color: 'border-blue-500/40 bg-blue-500/10 text-blue-600 dark:text-blue-400',
icon: HardDrive, // our infrastructure
},
+ byok: {
+ color: 'border-violet-500/40 bg-violet-500/10 text-violet-600 dark:text-violet-400',
+ icon: Key, // user-supplied key
+ },
cloud: {
color: 'border-amber-500/40 bg-amber-500/10 text-amber-600 dark:text-amber-400',
icon: Cloud, // remote
diff --git a/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte b/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte
index 7ace49162..cef405fed 100644
--- a/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte
+++ b/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte
@@ -23,6 +23,7 @@
none: 'Lokal (regelbasiert)',
browser: 'Auf deinem Gerät (Gemma 4 E2B)',
'mana-server': 'Mana-Server (Gemma 4 E4B)',
+ byok: 'Dein API-Key',
cloud: 'Google Gemini',
};
diff --git a/docs/architecture/BYOK_PLAN.md b/docs/architecture/BYOK_PLAN.md
new file mode 100644
index 000000000..4b5ec89af
--- /dev/null
+++ b/docs/architecture/BYOK_PLAN.md
@@ -0,0 +1,326 @@
+# BYOK — Bring Your Own Key
+
+> Architecture and implementation plan for user-provided API keys.
+> Status: planning (2026-04-14)
+
+## Goals
+
+- User hinterlegt eigene API-Keys (OpenAI, Anthropic, Gemini, Mistral)
+- Keys verschluesselt in IndexedDB (User-Master-Key, AES-GCM)
+- Keys verlassen das Geraet nie (Browser-direct calls)
+- Orchestrator nutzt BYOK als 5. Tier neben browser/mana-server/cloud
+- Kostenschaetzung pro Call via Pricing-Tabelle
+- Multiple Keys pro Provider (Label-based, einer `isDefault`)
+
+## Architecture
+
+```
+User (Browser)
+ |
+ v
+CompanionChat / any LLM task
+ |
+ v
+LlmOrchestrator.run(task, input)
+ |
+ v
+tier === 'byok' ?
+ |
+ v
+ByokBackend
+ |
+ v
+getByokKey(provider) [callback provided at app init]
+ |
+ v
+byokKeyVault (IndexedDB, encrypted)
+ |
+ v
+ decrypt via user master key
+ |
+ v
+ByokBackend.callProvider(provider, key, messages)
+ |
+ v
+Provider-specific adapter (openai/anthropic/gemini/mistral)
+ |
+ v
+direct HTTPS to api.openai.com / api.anthropic.com / ...
+```
+
+## Tier placement
+
+New tier order (ranked by "where data goes"):
+
+```
+none (0) — stays on device
+browser (1) — stays on device
+mana-server (2) — Mana's own infrastructure
+byok (3) — User's third-party accounts (user-controlled)
+cloud (4) — Mana's cloud (charges user's Mana credits)
+```
+
+Reasoning: `byok` sits between `mana-server` and `cloud` because it
+leaves the user's network but goes to an account the user manages
+personally. `cloud` is last because it costs the user Mana credits.
+
+## Files to create
+
+```
+packages/shared-llm/src/
+ tiers.ts → extend with 'byok'
+ types.ts → ByokKeyResolver callback type
+ backends/
+ byok.ts → ByokBackend class
+ byok-providers/
+ openai.ts → OpenAI API adapter
+ anthropic.ts → Anthropic API adapter
+ gemini.ts → Gemini REST adapter
+ mistral.ts → Mistral API adapter (OpenAI-compat)
+ types.ts → ByokProvider interface
+ pricing.ts → per-model token pricing
+ store.svelte.ts → register ByokBackend
+
+apps/mana/apps/web/src/
+ lib/byok/
+ types.ts → ByokKey interface
+ vault.ts → encrypted IndexedDB CRUD
+ store.svelte.ts → reactive Svelte store
+ init.ts → wire key resolver into ByokBackend
+ routes/(app)/settings/ai-keys/
+ +page.svelte → management UI
+
+apps/mana/apps/web/src/lib/data/database.ts
+ → add _byokKeys table (v15 schema)
+```
+
+## Data model
+
+```typescript
+// packages/shared-llm/src/backends/byok-providers/types.ts
+export type ByokProviderId = 'openai' | 'anthropic' | 'gemini' | 'mistral';
+
+export interface ByokProvider {
+ id: ByokProviderId;
+ displayName: string;
+ defaultModel: string;
+ availableModels: string[];
+ needsDangerousHeader?: boolean; // Anthropic
+ /** Call the provider with the user's key, return GenerateResult */
+ call(opts: {
+ apiKey: string;
+ model: string;
+ messages: ChatMessage[];
+ temperature?: number;
+ maxTokens?: number;
+ onToken?: (token: string) => void;
+ }): Promise;
+}
+
+// apps/mana/apps/web/src/lib/byok/types.ts
+export interface ByokKey {
+ id: string;
+ provider: ByokProviderId;
+ label: string; // "Work Anthropic"
+ keyCipher: string; // AES-GCM encrypted
+ keyIv: string; // init vector
+ model?: string; // override default model
+ isDefault: boolean;
+ createdAt: string;
+ updatedAt: string;
+ lastUsedAt?: string;
+ usageCount: number;
+ totalTokens: number;
+ deletedAt?: string;
+}
+```
+
+## Key resolver callback
+
+The backend lives in `shared-llm` but keys live in the app's IndexedDB.
+We inject a resolver at app init:
+
+```typescript
+// packages/shared-llm/src/backends/byok.ts
+export type ByokKeyResolver = (
+ provider: ByokProviderId,
+ preferredLabel?: string,
+) => Promise<{ apiKey: string; model: string } | null>;
+
+export class ByokBackend implements LlmBackend {
+ readonly tier = 'byok' as const;
+ constructor(
+ private resolver: ByokKeyResolver,
+ private providers: Map,
+ ) {}
+ // ...
+}
+
+// apps/mana/apps/web/src/lib/byok/init.ts (app init)
+import { llmOrchestrator } from '@mana/shared-llm';
+import { getKeyForProvider } from './store.svelte';
+
+llmOrchestrator.registerByokResolver(getKeyForProvider);
+```
+
+## Provider adapters
+
+### OpenAI (CORS-friendly)
+
+```typescript
+fetch('https://api.openai.com/v1/chat/completions', {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Authorization': `Bearer ${apiKey}`,
+ },
+ body: JSON.stringify({
+ model, messages, temperature, max_tokens: maxTokens, stream: true,
+ }),
+});
+// SSE streaming response
+```
+
+### Anthropic (needs dangerous header)
+
+```typescript
+fetch('https://api.anthropic.com/v1/messages', {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'x-api-key': apiKey,
+ 'anthropic-version': '2023-06-01',
+ 'anthropic-dangerous-direct-browser-access': 'true',
+ },
+ body: JSON.stringify({ model, messages, max_tokens, stream: true }),
+});
+// SSE streaming with different event schema than OpenAI
+```
+
+### Gemini (REST with key in URL)
+
+```typescript
+fetch(`https://generativelanguage.googleapis.com/v1beta/models/${model}:streamGenerateContent?key=${apiKey}`, {
+ method: 'POST',
+ body: JSON.stringify({
+ contents: messagesToGeminiFormat(messages),
+ generationConfig: { temperature, maxOutputTokens: maxTokens },
+ }),
+});
+// Different message format!
+```
+
+### Mistral (OpenAI-compatible)
+
+```typescript
+fetch('https://api.mistral.ai/v1/chat/completions', {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Authorization': `Bearer ${apiKey}`,
+ },
+ body: JSON.stringify({
+ model, messages, temperature, max_tokens: maxTokens, stream: true,
+ }),
+});
+// Same as OpenAI, can reuse adapter
+```
+
+## Pricing (for cost estimation)
+
+```typescript
+// packages/shared-llm/src/pricing.ts
+export const PRICING: Record = {
+ // OpenAI (USD per 1K tokens)
+ 'gpt-5': { inputPer1k: 0.015, outputPer1k: 0.060 },
+ 'gpt-4o': { inputPer1k: 0.005, outputPer1k: 0.020 },
+ 'gpt-4o-mini': { inputPer1k: 0.0003, outputPer1k: 0.0012 },
+ // Anthropic
+ 'claude-opus-4.6': { inputPer1k: 0.015, outputPer1k: 0.075 },
+ 'claude-sonnet-4.6': { inputPer1k: 0.003, outputPer1k: 0.015 },
+ // Gemini
+ 'gemini-2.5-pro': { inputPer1k: 0.00125, outputPer1k: 0.005 },
+ 'gemini-2.5-flash': { inputPer1k: 0.00015, outputPer1k: 0.0006 },
+ // Mistral
+ 'mistral-large-latest': { inputPer1k: 0.002, outputPer1k: 0.006 },
+ 'mistral-small-latest': { inputPer1k: 0.0002, outputPer1k: 0.0006 },
+};
+
+export function estimateCost(model: string, promptTokens: number, completionTokens: number): number {
+ const p = PRICING[model];
+ if (!p) return 0;
+ return (promptTokens / 1000) * p.inputPer1k + (completionTokens / 1000) * p.outputPer1k;
+}
+```
+
+## Privacy rules
+
+```typescript
+// In orchestrator routing
+if (task.contentClass === 'sensitive') {
+ // BYOK blocked by default — leaves device to third-party
+ candidates = candidates.filter(t => t !== 'byok');
+}
+// User can opt-in per provider via
+// settings.byok.sensitiveOptIn = ['anthropic']
+```
+
+## Settings schema extensions
+
+```typescript
+// LlmSettings (in shared-llm/src/types.ts)
+export interface LlmSettings {
+ allowedTiers: LlmTier[];
+ taskOverrides: Record; // + 'byok' now valid
+ fallbackToRulesOnError: boolean;
+ showSourceInUi: boolean;
+ cloudConsentGiven: boolean;
+ // NEW:
+ byok?: {
+ defaultProvider?: ByokProviderId;
+ sensitiveOptIn: ByokProviderId[]; // explicit consent for sensitive content
+ preferredModel?: Record; // per-provider model override
+ };
+}
+```
+
+## Implementation order
+
+**Phase 1 — Foundation (1.5h)**
+1. Extend LlmTier with 'byok' in shared-llm
+2. Create ByokKey vault (IndexedDB + encrypt/decrypt)
+3. ByokBackend skeleton with provider registry
+4. Wire into orchestrator
+
+**Phase 2 — First provider (30min)**
+5. OpenAI adapter (simplest — CORS ok)
+6. Test via companion chat
+
+**Phase 3 — More providers (1.5h)**
+7. Anthropic adapter (with dangerous-header)
+8. Gemini adapter (different message format)
+9. Mistral adapter (OpenAI-compatible, trivial)
+
+**Phase 4 — UI (1.5h)**
+10. Settings/ai-keys page
+11. Add + edit + delete key modals
+12. Usage tracking (increment on each call)
+
+**Phase 5 — Polish (30min)**
+13. Pricing table + cost estimation
+14. Companion toolbar dropdown extension (BYOK options)
+
+**Total: ~5h**
+
+## Decisions
+
+| Question | Decision |
+|----------|----------|
+| Browser-direct vs. server-proxy? | Browser-direct primary. No server-proxy fallback in v1 — if CORS blocks, show error with link to docs. |
+| Providers in v1 | OpenAI, Anthropic, Gemini, Mistral |
+| Multiple keys per provider | Yes, one `isDefault`, others by label |
+| Cost estimation | Yes, hardcoded pricing table (update manually) |
+| Ollama BYOK (self-hosted) | Skip for v1 |
+| Sensitive content + BYOK | Blocked by default, explicit per-provider opt-in |
+| Key encryption | AES-GCM-256 via user master key (existing vault) |
+| Key sync across devices | NO — keys stay device-local (user must add on each device) |
diff --git a/packages/shared-llm/src/backends/byok-providers/anthropic.ts b/packages/shared-llm/src/backends/byok-providers/anthropic.ts
new file mode 100644
index 000000000..3e6c18c80
--- /dev/null
+++ b/packages/shared-llm/src/backends/byok-providers/anthropic.ts
@@ -0,0 +1,122 @@
+/**
+ * Anthropic adapter.
+ *
+ * Differs from OpenAI:
+ * - Uses x-api-key header (not Bearer)
+ * - Needs anthropic-version header
+ * - Needs anthropic-dangerous-direct-browser-access for CORS
+ * - System prompt goes in its own `system` field, not as a message
+ * - SSE event schema is different (content_block_delta with text)
+ */
+
+import type { ByokProvider, ByokCallOptions } from './types';
+import type { GenerateResult } from '../../types';
+
+export const anthropicProvider: ByokProvider = {
+ id: 'anthropic',
+ displayName: 'Anthropic',
+ defaultModel: 'claude-sonnet-4-5',
+ availableModels: [
+ 'claude-opus-4-6',
+ 'claude-opus-4-5',
+ 'claude-sonnet-4-6',
+ 'claude-sonnet-4-5',
+ 'claude-haiku-4-5',
+ ],
+
+ async call(opts: ByokCallOptions): Promise {
+ return callAnthropic(opts);
+ },
+};
+
+async function callAnthropic(opts: ByokCallOptions): Promise {
+ const startedAt = Date.now();
+
+ // Anthropic wants system prompt separately, user/assistant inline
+ const systemMessages = opts.messages.filter((m) => m.role === 'system');
+ const chatMessages = opts.messages.filter((m) => m.role !== 'system');
+ const system = systemMessages.map((m) => m.content).join('\n\n') || undefined;
+
+ const response = await fetch('https://api.anthropic.com/v1/messages', {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'x-api-key': opts.apiKey,
+ 'anthropic-version': '2023-06-01',
+ 'anthropic-dangerous-direct-browser-access': 'true',
+ },
+ body: JSON.stringify({
+ model: opts.model,
+ system,
+ messages: chatMessages.map((m) => ({ role: m.role, content: m.content })),
+ temperature: opts.temperature ?? 0.7,
+ max_tokens: opts.maxTokens ?? 1024,
+ stream: true,
+ }),
+ });
+
+ if (!response.ok) {
+ const errText = await response.text().catch(() => response.statusText);
+ throw new Error(`Anthropic API ${response.status}: ${errText.slice(0, 300)}`);
+ }
+
+ if (!response.body) {
+ throw new Error('Anthropic API: kein Response-Body');
+ }
+
+ const reader = response.body.getReader();
+ const decoder = new TextDecoder();
+ let buffer = '';
+ let content = '';
+ let promptTokens = 0;
+ let completionTokens = 0;
+
+ while (true) {
+ const { value, done } = await reader.read();
+ if (done) break;
+ buffer += decoder.decode(value, { stream: true });
+
+ let newlineIdx: number;
+ while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
+ const line = buffer.slice(0, newlineIdx).trim();
+ buffer = buffer.slice(newlineIdx + 1);
+
+ if (!line.startsWith('data: ')) continue;
+ const payload = line.slice(6).trim();
+ if (!payload) continue;
+
+ try {
+ const parsed = JSON.parse(payload) as {
+ type?: string;
+ delta?: { type?: string; text?: string };
+ message?: { usage?: { input_tokens?: number; output_tokens?: number } };
+ usage?: { input_tokens?: number; output_tokens?: number };
+ };
+
+ if (parsed.type === 'content_block_delta' && parsed.delta?.type === 'text_delta') {
+ const token = parsed.delta.text ?? '';
+ if (token) {
+ content += token;
+ opts.onToken?.(token);
+ }
+ } else if (parsed.type === 'message_start' && parsed.message?.usage) {
+ promptTokens = parsed.message.usage.input_tokens ?? 0;
+ } else if (parsed.type === 'message_delta' && parsed.usage) {
+ completionTokens = parsed.usage.output_tokens ?? completionTokens;
+ }
+ } catch {
+ // Ignore malformed lines
+ }
+ }
+ }
+
+ return {
+ content,
+ usage: {
+ promptTokens,
+ completionTokens,
+ totalTokens: promptTokens + completionTokens,
+ },
+ latencyMs: Date.now() - startedAt,
+ };
+}
diff --git a/packages/shared-llm/src/backends/byok-providers/gemini.ts b/packages/shared-llm/src/backends/byok-providers/gemini.ts
new file mode 100644
index 000000000..94b33af6e
--- /dev/null
+++ b/packages/shared-llm/src/backends/byok-providers/gemini.ts
@@ -0,0 +1,129 @@
+/**
+ * Gemini adapter — direct REST API.
+ *
+ * Differs from OpenAI:
+ * - API key goes in query string (?key=...)
+ * - Messages use different schema: { role, parts: [{ text }] }
+ * - Roles are 'user' and 'model' (not 'assistant')
+ * - System prompt goes in `systemInstruction` field
+ * - Streaming via SSE at :streamGenerateContent endpoint
+ */
+
+import type { ByokProvider, ByokCallOptions } from './types';
+import type { GenerateResult, ChatMessage } from '../../types';
+
+export const geminiProvider: ByokProvider = {
+ id: 'gemini',
+ displayName: 'Google Gemini',
+ defaultModel: 'gemini-2.5-flash',
+ availableModels: [
+ 'gemini-2.5-pro',
+ 'gemini-2.5-flash',
+ 'gemini-2.5-flash-lite',
+ 'gemini-2.0-flash',
+ ],
+
+ async call(opts: ByokCallOptions): Promise {
+ return callGemini(opts);
+ },
+};
+
+interface GeminiMessage {
+ role: 'user' | 'model';
+ parts: { text: string }[];
+}
+
+function toGeminiMessages(messages: ChatMessage[]): {
+ system?: string;
+ contents: GeminiMessage[];
+} {
+ const systemMessages = messages.filter((m) => m.role === 'system');
+ const chatMessages = messages.filter((m) => m.role !== 'system');
+ return {
+ system: systemMessages.map((m) => m.content).join('\n\n') || undefined,
+ contents: chatMessages.map((m) => ({
+ role: m.role === 'assistant' ? 'model' : 'user',
+ parts: [{ text: m.content }],
+ })),
+ };
+}
+
+async function callGemini(opts: ByokCallOptions): Promise {
+ const startedAt = Date.now();
+ const { system, contents } = toGeminiMessages(opts.messages);
+
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/${opts.model}:streamGenerateContent?alt=sse&key=${opts.apiKey}`;
+
+ const response = await fetch(url, {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({
+ systemInstruction: system ? { parts: [{ text: system }] } : undefined,
+ contents,
+ generationConfig: {
+ temperature: opts.temperature ?? 0.7,
+ maxOutputTokens: opts.maxTokens ?? 1024,
+ },
+ }),
+ });
+
+ if (!response.ok) {
+ const errText = await response.text().catch(() => response.statusText);
+ throw new Error(`Gemini API ${response.status}: ${errText.slice(0, 300)}`);
+ }
+
+ if (!response.body) {
+ throw new Error('Gemini API: kein Response-Body');
+ }
+
+ const reader = response.body.getReader();
+ const decoder = new TextDecoder();
+ let buffer = '';
+ let content = '';
+ let promptTokens = 0;
+ let completionTokens = 0;
+
+ while (true) {
+ const { value, done } = await reader.read();
+ if (done) break;
+ buffer += decoder.decode(value, { stream: true });
+
+ let newlineIdx: number;
+ while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
+ const line = buffer.slice(0, newlineIdx).trim();
+ buffer = buffer.slice(newlineIdx + 1);
+
+ if (!line.startsWith('data: ')) continue;
+ const payload = line.slice(6).trim();
+ if (!payload) continue;
+
+ try {
+ const parsed = JSON.parse(payload) as {
+ candidates?: { content?: { parts?: { text?: string }[] } }[];
+ usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number };
+ };
+ const token = parsed.candidates?.[0]?.content?.parts?.[0]?.text ?? '';
+ if (token) {
+ content += token;
+ opts.onToken?.(token);
+ }
+ if (parsed.usageMetadata) {
+ promptTokens = parsed.usageMetadata.promptTokenCount ?? promptTokens;
+ completionTokens = parsed.usageMetadata.candidatesTokenCount ?? completionTokens;
+ }
+ } catch {
+ // Ignore malformed lines
+ }
+ }
+ }
+
+ return {
+ content,
+ usage: {
+ promptTokens,
+ completionTokens,
+ totalTokens: promptTokens + completionTokens,
+ },
+ latencyMs: Date.now() - startedAt,
+ };
+}
diff --git a/packages/shared-llm/src/backends/byok-providers/index.ts b/packages/shared-llm/src/backends/byok-providers/index.ts
new file mode 100644
index 000000000..6f67b4e57
--- /dev/null
+++ b/packages/shared-llm/src/backends/byok-providers/index.ts
@@ -0,0 +1,19 @@
+export { openaiProvider } from './openai';
+export { anthropicProvider } from './anthropic';
+export { geminiProvider } from './gemini';
+export { mistralProvider } from './mistral';
+export type { ByokProvider, ByokProviderId, ByokCallOptions } from './types';
+
+import { openaiProvider } from './openai';
+import { anthropicProvider } from './anthropic';
+import { geminiProvider } from './gemini';
+import { mistralProvider } from './mistral';
+import type { ByokProvider } from './types';
+
+/** All built-in BYOK providers. Apps can still add custom ones. */
+export const BUILTIN_BYOK_PROVIDERS: readonly ByokProvider[] = [
+ openaiProvider,
+ anthropicProvider,
+ geminiProvider,
+ mistralProvider,
+];
diff --git a/packages/shared-llm/src/backends/byok-providers/mistral.ts b/packages/shared-llm/src/backends/byok-providers/mistral.ts
new file mode 100644
index 000000000..9c9f46fb9
--- /dev/null
+++ b/packages/shared-llm/src/backends/byok-providers/mistral.ts
@@ -0,0 +1,23 @@
+import type { ByokProvider, ByokCallOptions } from './types';
+import { callOpenAiCompat } from './openai-compat';
+import type { GenerateResult } from '../../types';
+
+export const mistralProvider: ByokProvider = {
+ id: 'mistral',
+ displayName: 'Mistral AI',
+ defaultModel: 'mistral-small-latest',
+ availableModels: [
+ 'mistral-large-latest',
+ 'mistral-small-latest',
+ 'mistral-medium-latest',
+ 'open-mistral-nemo',
+ 'codestral-latest',
+ ],
+
+ async call(opts: ByokCallOptions): Promise {
+ return callOpenAiCompat(
+ { baseUrl: 'https://api.mistral.ai/v1', providerName: 'Mistral' },
+ opts
+ );
+ },
+};
diff --git a/packages/shared-llm/src/backends/byok-providers/openai-compat.ts b/packages/shared-llm/src/backends/byok-providers/openai-compat.ts
new file mode 100644
index 000000000..c07c17ac9
--- /dev/null
+++ b/packages/shared-llm/src/backends/byok-providers/openai-compat.ts
@@ -0,0 +1,101 @@
+/**
+ * OpenAI-compatible API adapter (base for OpenAI, Mistral, Groq, etc.)
+ *
+ * Uses the ChatCompletions API schema. Streaming via SSE, parsing
+ * the `data: {json}` lines, extracting `choices[0].delta.content`.
+ */
+
+import type { GenerateResult } from '../../types';
+import type { ByokCallOptions } from './types';
+
+export interface OpenAiCompatConfig {
+ baseUrl: string;
+ providerName: string; // For error messages
+ extraHeaders?: Record;
+}
+
+export async function callOpenAiCompat(
+ config: OpenAiCompatConfig,
+ opts: ByokCallOptions
+): Promise {
+ const startedAt = Date.now();
+ const url = `${config.baseUrl.replace(/\/$/, '')}/chat/completions`;
+
+ const response = await fetch(url, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ Authorization: `Bearer ${opts.apiKey}`,
+ ...(config.extraHeaders ?? {}),
+ },
+ body: JSON.stringify({
+ model: opts.model,
+ messages: opts.messages.map((m) => ({ role: m.role, content: m.content })),
+ temperature: opts.temperature ?? 0.7,
+ max_tokens: opts.maxTokens ?? 1024,
+ stream: true,
+ stream_options: { include_usage: true },
+ }),
+ });
+
+ if (!response.ok) {
+ const errText = await response.text().catch(() => response.statusText);
+ const short = errText.slice(0, 300);
+ throw new Error(`${config.providerName} API ${response.status}: ${short}`);
+ }
+
+ if (!response.body) {
+ throw new Error(`${config.providerName} API: kein Response-Body`);
+ }
+
+ const reader = response.body.getReader();
+ const decoder = new TextDecoder();
+ let buffer = '';
+ let content = '';
+ let promptTokens = 0;
+ let completionTokens = 0;
+
+ while (true) {
+ const { value, done } = await reader.read();
+ if (done) break;
+ buffer += decoder.decode(value, { stream: true });
+
+ let newlineIdx: number;
+ while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
+ const line = buffer.slice(0, newlineIdx).trim();
+ buffer = buffer.slice(newlineIdx + 1);
+
+ if (!line.startsWith('data: ')) continue;
+ const payload = line.slice(6).trim();
+ if (payload === '[DONE]') continue;
+
+ try {
+ const parsed = JSON.parse(payload) as {
+ choices?: { delta?: { content?: string } }[];
+ usage?: { prompt_tokens?: number; completion_tokens?: number };
+ };
+ const token = parsed.choices?.[0]?.delta?.content ?? '';
+ if (token) {
+ content += token;
+ opts.onToken?.(token);
+ }
+ if (parsed.usage) {
+ promptTokens = parsed.usage.prompt_tokens ?? 0;
+ completionTokens = parsed.usage.completion_tokens ?? 0;
+ }
+ } catch {
+ // Ignore malformed lines
+ }
+ }
+ }
+
+ return {
+ content,
+ usage: {
+ promptTokens,
+ completionTokens,
+ totalTokens: promptTokens + completionTokens,
+ },
+ latencyMs: Date.now() - startedAt,
+ };
+}
diff --git a/packages/shared-llm/src/backends/byok-providers/openai.ts b/packages/shared-llm/src/backends/byok-providers/openai.ts
new file mode 100644
index 000000000..694c8f1a5
--- /dev/null
+++ b/packages/shared-llm/src/backends/byok-providers/openai.ts
@@ -0,0 +1,14 @@
+import type { ByokProvider, ByokCallOptions } from './types';
+import { callOpenAiCompat } from './openai-compat';
+import type { GenerateResult } from '../../types';
+
+export const openaiProvider: ByokProvider = {
+ id: 'openai',
+ displayName: 'OpenAI',
+ defaultModel: 'gpt-4o-mini',
+ availableModels: ['gpt-5', 'gpt-5-mini', 'gpt-4o', 'gpt-4o-mini', 'gpt-4-turbo', 'o1', 'o1-mini'],
+
+ async call(opts: ByokCallOptions): Promise {
+ return callOpenAiCompat({ baseUrl: 'https://api.openai.com/v1', providerName: 'OpenAI' }, opts);
+ },
+};
diff --git a/packages/shared-llm/src/backends/byok-providers/types.ts b/packages/shared-llm/src/backends/byok-providers/types.ts
new file mode 100644
index 000000000..fde51eadf
--- /dev/null
+++ b/packages/shared-llm/src/backends/byok-providers/types.ts
@@ -0,0 +1,39 @@
+/**
+ * BYOK Provider abstraction.
+ *
+ * Each supported third-party LLM (OpenAI, Anthropic, Gemini, Mistral, ...)
+ * implements this interface. Adapters do the direct browser-to-provider
+ * fetch using the user's API key.
+ */
+
+import type { ChatMessage, GenerateResult } from '../../types';
+
+export type ByokProviderId = 'openai' | 'anthropic' | 'gemini' | 'mistral';
+
+export interface ByokProvider {
+ readonly id: ByokProviderId;
+ readonly displayName: string;
+ readonly defaultModel: string;
+ readonly availableModels: readonly string[];
+
+ /**
+ * Call the provider with the user's API key.
+ * Throws on network errors, auth errors, or content policy blocks.
+ */
+ call(opts: ByokCallOptions): Promise;
+}
+
+export interface ByokCallOptions {
+ apiKey: string;
+ model: string;
+ messages: ChatMessage[];
+ temperature?: number;
+ maxTokens?: number;
+ onToken?: (token: string) => void;
+}
+
+export interface ByokProviderError extends Error {
+ provider: ByokProviderId;
+ status?: number;
+ code?: string;
+}
diff --git a/packages/shared-llm/src/backends/byok.ts b/packages/shared-llm/src/backends/byok.ts
new file mode 100644
index 000000000..394e182db
--- /dev/null
+++ b/packages/shared-llm/src/backends/byok.ts
@@ -0,0 +1,132 @@
+/**
+ * BYOK Backend — routes LLM calls through the user's own API keys.
+ *
+ * The backend itself lives in shared-llm (so the orchestrator can
+ * instantiate it alongside browser/mana-server/cloud), but the
+ * actual keys live in the consuming app's encrypted IndexedDB.
+ *
+ * Apps inject a `ByokKeyResolver` callback at init time. The backend
+ * calls it whenever it needs a key, gets back `{ apiKey, model,
+ * provider }`, and dispatches to the matching provider adapter.
+ *
+ * If no key is configured for any provider, isAvailable() returns
+ * false and the orchestrator skips this tier.
+ */
+
+import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types';
+import type { ByokProvider, ByokProviderId } from './byok-providers/types';
+
+export interface ResolvedByokKey {
+ provider: ByokProviderId;
+ apiKey: string;
+ model: string;
+}
+
+/** App-side callback — looks up the appropriate key for a call. */
+export type ByokKeyResolver = (opts: {
+ /** Task name from LlmTaskRequest (e.g. "companion.chat") */
+ taskName: string;
+ /** Optional user-forced provider (from settings.byok.defaultProvider
+ * or from task-specific override like 'byok:anthropic') */
+ preferredProvider?: ByokProviderId;
+}) => Promise;
+
+/** Called after a successful generation so the app can increment usage counters. */
+export type ByokUsageCallback = (opts: {
+ provider: ByokProviderId;
+ model: string;
+ promptTokens: number;
+ completionTokens: number;
+ latencyMs: number;
+}) => void;
+
+export interface ByokBackendOptions {
+ resolver: ByokKeyResolver;
+ providers: readonly ByokProvider[];
+ onUsage?: ByokUsageCallback;
+}
+
+export class ByokBackend implements LlmBackend {
+ readonly tier = 'byok' as const;
+ private readonly resolver: ByokKeyResolver;
+ private readonly providers: Map;
+ private readonly onUsage?: ByokUsageCallback;
+ /** Whether at least one key has been configured. Set after first
+ * resolver call; the orchestrator uses isAvailable() to skip the
+ * tier when the user hasn't added any keys yet. */
+ private keyConfigured: boolean | null = null;
+
+ constructor(opts: ByokBackendOptions) {
+ this.resolver = opts.resolver;
+ this.providers = new Map(opts.providers.map((p) => [p.id, p]));
+ this.onUsage = opts.onUsage;
+ }
+
+ /** Inform the backend that the user has added/removed keys — flips
+ * the cached availability flag so isAvailable() re-probes on the
+ * next call. */
+ invalidateAvailability(): void {
+ this.keyConfigured = null;
+ }
+
+ isAvailable(): boolean {
+ // If we haven't probed yet, assume available and let resolver
+ // fail gracefully. After the first resolver miss we cache false.
+ return this.keyConfigured !== false;
+ }
+
+ async isReady(): Promise {
+ // Probe with a null task to see if *any* key resolves
+ try {
+ const key = await this.resolver({ taskName: '__probe__' });
+ this.keyConfigured = key !== null;
+ return this.keyConfigured;
+ } catch {
+ this.keyConfigured = false;
+ return false;
+ }
+ }
+
+ async generate(req: LlmTaskRequest): Promise {
+ // Parse optional provider override from task name (e.g. "companion.chat"
+ // with a taskOverride of "byok:anthropic" → caller should pass
+ // preferredProvider via the resolver path, not via taskName).
+ const resolved = await this.resolver({ taskName: req.taskName });
+ if (!resolved) {
+ this.keyConfigured = false;
+ throw new Error(
+ 'Kein BYOK-Schluessel konfiguriert. Bitte unter Einstellungen → KI-Keys hinterlegen.'
+ );
+ }
+ this.keyConfigured = true;
+
+ const provider = this.providers.get(resolved.provider);
+ if (!provider) {
+ throw new Error(`BYOK-Provider nicht unterstuetzt: ${resolved.provider}`);
+ }
+
+ const startedAt = Date.now();
+ const result = await provider.call({
+ apiKey: resolved.apiKey,
+ model: resolved.model,
+ messages: req.messages,
+ temperature: req.temperature,
+ maxTokens: req.maxTokens,
+ onToken: req.onToken,
+ });
+ const latencyMs = Date.now() - startedAt;
+
+ // Report usage so the app can update per-key counters
+ if (this.onUsage && result.usage) {
+ this.onUsage({
+ provider: resolved.provider,
+ model: resolved.model,
+ promptTokens: result.usage.promptTokens,
+ completionTokens: result.usage.completionTokens,
+ latencyMs,
+ });
+ }
+
+ return { ...result, latencyMs };
+ }
+}
diff --git a/packages/shared-llm/src/index.ts b/packages/shared-llm/src/index.ts
index 0055d8d5e..91c36ba25 100644
--- a/packages/shared-llm/src/index.ts
+++ b/packages/shared-llm/src/index.ts
@@ -44,6 +44,26 @@ export { LlmOrchestrator, type LlmOrchestratorOptions } from './orchestrator';
export { BrowserBackend } from './backends/browser';
export { CloudBackend, type CloudBackendOptions } from './backends/cloud';
export { ManaServerBackend, type ManaServerBackendOptions } from './backends/mana-server';
+export {
+ ByokBackend,
+ type ByokBackendOptions,
+ type ByokKeyResolver,
+ type ResolvedByokKey,
+ type ByokUsageCallback,
+} from './backends/byok';
+export {
+ BUILTIN_BYOK_PROVIDERS,
+ openaiProvider,
+ anthropicProvider,
+ geminiProvider,
+ mistralProvider,
+ type ByokProvider,
+ type ByokProviderId,
+ type ByokCallOptions,
+} from './backends/byok-providers';
+
+// Pricing
+export { MODEL_PRICING, estimateCost, formatCost, type ModelPricing } from './pricing';
// Singleton store + Svelte 5 reactive hooks
export {
diff --git a/packages/shared-llm/src/orchestrator.ts b/packages/shared-llm/src/orchestrator.ts
index aee94ec1a..7176f6a08 100644
--- a/packages/shared-llm/src/orchestrator.ts
+++ b/packages/shared-llm/src/orchestrator.ts
@@ -70,6 +70,18 @@ export class LlmOrchestrator {
this.settings = settings;
}
+ /** Register (or replace) a backend at runtime — used by the app
+ * to wire up the BYOK backend after initial orchestrator construction,
+ * since BYOK needs access to app-side IndexedDB keys. */
+ registerBackend(backend: LlmBackend): void {
+ this.backendsByTier.set(backend.tier, backend);
+ }
+
+ /** Remove a backend (e.g. when the user disables BYOK). */
+ unregisterBackend(tier: LlmTier): void {
+ this.backendsByTier.delete(tier);
+ }
+
/** Public read-only view for UI components that want to react to
* the current settings (e.g. the tier selector). */
getSettings(): Readonly {
diff --git a/packages/shared-llm/src/pricing.ts b/packages/shared-llm/src/pricing.ts
new file mode 100644
index 000000000..b1308b416
--- /dev/null
+++ b/packages/shared-llm/src/pricing.ts
@@ -0,0 +1,69 @@
+/**
+ * Per-model token pricing for BYOK cost estimation.
+ *
+ * Values in USD per 1M tokens (as published by providers as of
+ * 2026-04). Update manually when providers change pricing.
+ *
+ * Only includes models Mana exposes in the BYOK provider adapters.
+ */
+
+export interface ModelPricing {
+ /** USD per 1 million input tokens */
+ inputPerMillion: number;
+ /** USD per 1 million output tokens */
+ outputPerMillion: number;
+}
+
+export const MODEL_PRICING: Record = {
+ // ── OpenAI ──────────────────────────────────────────
+ 'gpt-5': { inputPerMillion: 15, outputPerMillion: 60 },
+ 'gpt-5-mini': { inputPerMillion: 3, outputPerMillion: 12 },
+ 'gpt-4o': { inputPerMillion: 5, outputPerMillion: 20 },
+ 'gpt-4o-mini': { inputPerMillion: 0.3, outputPerMillion: 1.2 },
+ 'gpt-4-turbo': { inputPerMillion: 10, outputPerMillion: 30 },
+ o1: { inputPerMillion: 15, outputPerMillion: 60 },
+ 'o1-mini': { inputPerMillion: 3, outputPerMillion: 12 },
+
+ // ── Anthropic ───────────────────────────────────────
+ 'claude-opus-4-6': { inputPerMillion: 15, outputPerMillion: 75 },
+ 'claude-opus-4-5': { inputPerMillion: 15, outputPerMillion: 75 },
+ 'claude-sonnet-4-6': { inputPerMillion: 3, outputPerMillion: 15 },
+ 'claude-sonnet-4-5': { inputPerMillion: 3, outputPerMillion: 15 },
+ 'claude-haiku-4-5': { inputPerMillion: 0.8, outputPerMillion: 4 },
+
+ // ── Google Gemini ───────────────────────────────────
+ 'gemini-2.5-pro': { inputPerMillion: 1.25, outputPerMillion: 5 },
+ 'gemini-2.5-flash': { inputPerMillion: 0.15, outputPerMillion: 0.6 },
+ 'gemini-2.5-flash-lite': { inputPerMillion: 0.075, outputPerMillion: 0.3 },
+ 'gemini-2.0-flash': { inputPerMillion: 0.1, outputPerMillion: 0.4 },
+
+ // ── Mistral ─────────────────────────────────────────
+ 'mistral-large-latest': { inputPerMillion: 2, outputPerMillion: 6 },
+ 'mistral-medium-latest': { inputPerMillion: 2.7, outputPerMillion: 8.1 },
+ 'mistral-small-latest': { inputPerMillion: 0.2, outputPerMillion: 0.6 },
+ 'open-mistral-nemo': { inputPerMillion: 0.15, outputPerMillion: 0.15 },
+ 'codestral-latest': { inputPerMillion: 0.3, outputPerMillion: 0.9 },
+};
+
+/** USD cost for a given call. Returns 0 if model isn't in the table. */
+export function estimateCost(
+ model: string,
+ promptTokens: number,
+ completionTokens: number
+): number {
+ const p = MODEL_PRICING[model];
+ if (!p) return 0;
+ return (
+ (promptTokens / 1_000_000) * p.inputPerMillion +
+ (completionTokens / 1_000_000) * p.outputPerMillion
+ );
+}
+
+/** Format USD value with at most 4 decimals (for small per-call amounts). */
+export function formatCost(usd: number): string {
+ if (usd === 0) return '—';
+ if (usd < 0.0001) return '< $0.0001';
+ if (usd < 0.01) return `$${usd.toFixed(4)}`;
+ if (usd < 1) return `$${usd.toFixed(3)}`;
+ return `$${usd.toFixed(2)}`;
+}
diff --git a/packages/shared-llm/src/tiers.ts b/packages/shared-llm/src/tiers.ts
index 85294da06..5334099a2 100644
--- a/packages/shared-llm/src/tiers.ts
+++ b/packages/shared-llm/src/tiers.ts
@@ -1,7 +1,7 @@
/**
* Tier definitions for the Mana LLM orchestrator.
*
- * Four tiers, ordered from most-private to least-private:
+ * Five tiers, ordered from most-private to least-private:
*
* none — Deterministic parsers / heuristics. No LLM at all.
* Always available. Zero cost. Quality varies by task.
@@ -14,26 +14,28 @@
* (currently the Mac Mini, gemma3:4b by default).
* Data leaves the device but stays in our control.
*
+ * byok — User-provided API keys (OpenAI, Anthropic, Gemini,
+ * Mistral). Browser-direct fetches. Data goes to a
+ * third-party account the user manages. User controls
+ * the provider's privacy/retention policy directly.
+ *
* cloud — services/mana-llm proxied to a third-party provider
* (Google Gemini, configured via google_api_key in the
- * mana-llm service env). Data goes to the third party.
- *
- * The numeric rank is used by the orchestrator to compare a user's
- * preferred tier against a task's minimum tier ("can the user even
- * run this task?") and is the canonical sort order for the privacy
- * gradient.
+ * mana-llm service env). Mana-managed, charges Mana
+ * credits. Data goes to the third party via Mana.
*/
-export type LlmTier = 'none' | 'browser' | 'mana-server' | 'cloud';
+export type LlmTier = 'none' | 'browser' | 'mana-server' | 'byok' | 'cloud';
export const TIER_RANK: Record = {
none: 0,
browser: 1,
'mana-server': 2,
- cloud: 3,
+ byok: 3,
+ cloud: 4,
};
-export const ALL_TIERS: readonly LlmTier[] = ['none', 'browser', 'mana-server', 'cloud'];
+export const ALL_TIERS: readonly LlmTier[] = ['none', 'browser', 'mana-server', 'byok', 'cloud'];
/** Human-readable label, kept here so backends/UI agree on naming. */
export function tierLabel(tier: LlmTier): string {
@@ -44,6 +46,8 @@ export function tierLabel(tier: LlmTier): string {
return 'Auf deinem Gerät';
case 'mana-server':
return 'Mana-Server';
+ case 'byok':
+ return 'Dein API-Key';
case 'cloud':
return 'Google Gemini';
}