diff --git a/apps/mana/apps/web/src/lib/components/llm/SourceBadge.svelte b/apps/mana/apps/web/src/lib/components/llm/SourceBadge.svelte index a394cf40a..27aae5124 100644 --- a/apps/mana/apps/web/src/lib/components/llm/SourceBadge.svelte +++ b/apps/mana/apps/web/src/lib/components/llm/SourceBadge.svelte @@ -14,7 +14,7 @@ * */ import { llmSettingsState, tierLabel, type LlmTier } from '@mana/shared-llm'; - import { Lightning, Cpu, HardDrive, Cloud } from '@mana/shared-icons'; + import { Lightning, Cpu, HardDrive, Cloud, Key } from '@mana/shared-icons'; interface Props { tier: LlmTier; @@ -39,6 +39,10 @@ color: 'border-blue-500/40 bg-blue-500/10 text-blue-600 dark:text-blue-400', icon: HardDrive, // our infrastructure }, + byok: { + color: 'border-violet-500/40 bg-violet-500/10 text-violet-600 dark:text-violet-400', + icon: Key, // user-supplied key + }, cloud: { color: 'border-amber-500/40 bg-amber-500/10 text-amber-600 dark:text-amber-400', icon: Cloud, // remote diff --git a/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte b/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte index 7ace49162..cef405fed 100644 --- a/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte +++ b/apps/mana/apps/web/src/lib/modules/memoro/views/DetailView.svelte @@ -23,6 +23,7 @@ none: 'Lokal (regelbasiert)', browser: 'Auf deinem Gerät (Gemma 4 E2B)', 'mana-server': 'Mana-Server (Gemma 4 E4B)', + byok: 'Dein API-Key', cloud: 'Google Gemini', }; diff --git a/docs/architecture/BYOK_PLAN.md b/docs/architecture/BYOK_PLAN.md new file mode 100644 index 000000000..4b5ec89af --- /dev/null +++ b/docs/architecture/BYOK_PLAN.md @@ -0,0 +1,326 @@ +# BYOK — Bring Your Own Key + +> Architecture and implementation plan for user-provided API keys. +> Status: planning (2026-04-14) + +## Goals + +- User hinterlegt eigene API-Keys (OpenAI, Anthropic, Gemini, Mistral) +- Keys verschluesselt in IndexedDB (User-Master-Key, AES-GCM) +- Keys verlassen das Geraet nie (Browser-direct calls) +- Orchestrator nutzt BYOK als 5. Tier neben browser/mana-server/cloud +- Kostenschaetzung pro Call via Pricing-Tabelle +- Multiple Keys pro Provider (Label-based, einer `isDefault`) + +## Architecture + +``` +User (Browser) + | + v +CompanionChat / any LLM task + | + v +LlmOrchestrator.run(task, input) + | + v +tier === 'byok' ? + | + v +ByokBackend + | + v +getByokKey(provider) [callback provided at app init] + | + v +byokKeyVault (IndexedDB, encrypted) + | + v + decrypt via user master key + | + v +ByokBackend.callProvider(provider, key, messages) + | + v +Provider-specific adapter (openai/anthropic/gemini/mistral) + | + v +direct HTTPS to api.openai.com / api.anthropic.com / ... +``` + +## Tier placement + +New tier order (ranked by "where data goes"): + +``` +none (0) — stays on device +browser (1) — stays on device +mana-server (2) — Mana's own infrastructure +byok (3) — User's third-party accounts (user-controlled) +cloud (4) — Mana's cloud (charges user's Mana credits) +``` + +Reasoning: `byok` sits between `mana-server` and `cloud` because it +leaves the user's network but goes to an account the user manages +personally. `cloud` is last because it costs the user Mana credits. + +## Files to create + +``` +packages/shared-llm/src/ + tiers.ts → extend with 'byok' + types.ts → ByokKeyResolver callback type + backends/ + byok.ts → ByokBackend class + byok-providers/ + openai.ts → OpenAI API adapter + anthropic.ts → Anthropic API adapter + gemini.ts → Gemini REST adapter + mistral.ts → Mistral API adapter (OpenAI-compat) + types.ts → ByokProvider interface + pricing.ts → per-model token pricing + store.svelte.ts → register ByokBackend + +apps/mana/apps/web/src/ + lib/byok/ + types.ts → ByokKey interface + vault.ts → encrypted IndexedDB CRUD + store.svelte.ts → reactive Svelte store + init.ts → wire key resolver into ByokBackend + routes/(app)/settings/ai-keys/ + +page.svelte → management UI + +apps/mana/apps/web/src/lib/data/database.ts + → add _byokKeys table (v15 schema) +``` + +## Data model + +```typescript +// packages/shared-llm/src/backends/byok-providers/types.ts +export type ByokProviderId = 'openai' | 'anthropic' | 'gemini' | 'mistral'; + +export interface ByokProvider { + id: ByokProviderId; + displayName: string; + defaultModel: string; + availableModels: string[]; + needsDangerousHeader?: boolean; // Anthropic + /** Call the provider with the user's key, return GenerateResult */ + call(opts: { + apiKey: string; + model: string; + messages: ChatMessage[]; + temperature?: number; + maxTokens?: number; + onToken?: (token: string) => void; + }): Promise; +} + +// apps/mana/apps/web/src/lib/byok/types.ts +export interface ByokKey { + id: string; + provider: ByokProviderId; + label: string; // "Work Anthropic" + keyCipher: string; // AES-GCM encrypted + keyIv: string; // init vector + model?: string; // override default model + isDefault: boolean; + createdAt: string; + updatedAt: string; + lastUsedAt?: string; + usageCount: number; + totalTokens: number; + deletedAt?: string; +} +``` + +## Key resolver callback + +The backend lives in `shared-llm` but keys live in the app's IndexedDB. +We inject a resolver at app init: + +```typescript +// packages/shared-llm/src/backends/byok.ts +export type ByokKeyResolver = ( + provider: ByokProviderId, + preferredLabel?: string, +) => Promise<{ apiKey: string; model: string } | null>; + +export class ByokBackend implements LlmBackend { + readonly tier = 'byok' as const; + constructor( + private resolver: ByokKeyResolver, + private providers: Map, + ) {} + // ... +} + +// apps/mana/apps/web/src/lib/byok/init.ts (app init) +import { llmOrchestrator } from '@mana/shared-llm'; +import { getKeyForProvider } from './store.svelte'; + +llmOrchestrator.registerByokResolver(getKeyForProvider); +``` + +## Provider adapters + +### OpenAI (CORS-friendly) + +```typescript +fetch('https://api.openai.com/v1/chat/completions', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model, messages, temperature, max_tokens: maxTokens, stream: true, + }), +}); +// SSE streaming response +``` + +### Anthropic (needs dangerous header) + +```typescript +fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-api-key': apiKey, + 'anthropic-version': '2023-06-01', + 'anthropic-dangerous-direct-browser-access': 'true', + }, + body: JSON.stringify({ model, messages, max_tokens, stream: true }), +}); +// SSE streaming with different event schema than OpenAI +``` + +### Gemini (REST with key in URL) + +```typescript +fetch(`https://generativelanguage.googleapis.com/v1beta/models/${model}:streamGenerateContent?key=${apiKey}`, { + method: 'POST', + body: JSON.stringify({ + contents: messagesToGeminiFormat(messages), + generationConfig: { temperature, maxOutputTokens: maxTokens }, + }), +}); +// Different message format! +``` + +### Mistral (OpenAI-compatible) + +```typescript +fetch('https://api.mistral.ai/v1/chat/completions', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model, messages, temperature, max_tokens: maxTokens, stream: true, + }), +}); +// Same as OpenAI, can reuse adapter +``` + +## Pricing (for cost estimation) + +```typescript +// packages/shared-llm/src/pricing.ts +export const PRICING: Record = { + // OpenAI (USD per 1K tokens) + 'gpt-5': { inputPer1k: 0.015, outputPer1k: 0.060 }, + 'gpt-4o': { inputPer1k: 0.005, outputPer1k: 0.020 }, + 'gpt-4o-mini': { inputPer1k: 0.0003, outputPer1k: 0.0012 }, + // Anthropic + 'claude-opus-4.6': { inputPer1k: 0.015, outputPer1k: 0.075 }, + 'claude-sonnet-4.6': { inputPer1k: 0.003, outputPer1k: 0.015 }, + // Gemini + 'gemini-2.5-pro': { inputPer1k: 0.00125, outputPer1k: 0.005 }, + 'gemini-2.5-flash': { inputPer1k: 0.00015, outputPer1k: 0.0006 }, + // Mistral + 'mistral-large-latest': { inputPer1k: 0.002, outputPer1k: 0.006 }, + 'mistral-small-latest': { inputPer1k: 0.0002, outputPer1k: 0.0006 }, +}; + +export function estimateCost(model: string, promptTokens: number, completionTokens: number): number { + const p = PRICING[model]; + if (!p) return 0; + return (promptTokens / 1000) * p.inputPer1k + (completionTokens / 1000) * p.outputPer1k; +} +``` + +## Privacy rules + +```typescript +// In orchestrator routing +if (task.contentClass === 'sensitive') { + // BYOK blocked by default — leaves device to third-party + candidates = candidates.filter(t => t !== 'byok'); +} +// User can opt-in per provider via +// settings.byok.sensitiveOptIn = ['anthropic'] +``` + +## Settings schema extensions + +```typescript +// LlmSettings (in shared-llm/src/types.ts) +export interface LlmSettings { + allowedTiers: LlmTier[]; + taskOverrides: Record; // + 'byok' now valid + fallbackToRulesOnError: boolean; + showSourceInUi: boolean; + cloudConsentGiven: boolean; + // NEW: + byok?: { + defaultProvider?: ByokProviderId; + sensitiveOptIn: ByokProviderId[]; // explicit consent for sensitive content + preferredModel?: Record; // per-provider model override + }; +} +``` + +## Implementation order + +**Phase 1 — Foundation (1.5h)** +1. Extend LlmTier with 'byok' in shared-llm +2. Create ByokKey vault (IndexedDB + encrypt/decrypt) +3. ByokBackend skeleton with provider registry +4. Wire into orchestrator + +**Phase 2 — First provider (30min)** +5. OpenAI adapter (simplest — CORS ok) +6. Test via companion chat + +**Phase 3 — More providers (1.5h)** +7. Anthropic adapter (with dangerous-header) +8. Gemini adapter (different message format) +9. Mistral adapter (OpenAI-compatible, trivial) + +**Phase 4 — UI (1.5h)** +10. Settings/ai-keys page +11. Add + edit + delete key modals +12. Usage tracking (increment on each call) + +**Phase 5 — Polish (30min)** +13. Pricing table + cost estimation +14. Companion toolbar dropdown extension (BYOK options) + +**Total: ~5h** + +## Decisions + +| Question | Decision | +|----------|----------| +| Browser-direct vs. server-proxy? | Browser-direct primary. No server-proxy fallback in v1 — if CORS blocks, show error with link to docs. | +| Providers in v1 | OpenAI, Anthropic, Gemini, Mistral | +| Multiple keys per provider | Yes, one `isDefault`, others by label | +| Cost estimation | Yes, hardcoded pricing table (update manually) | +| Ollama BYOK (self-hosted) | Skip for v1 | +| Sensitive content + BYOK | Blocked by default, explicit per-provider opt-in | +| Key encryption | AES-GCM-256 via user master key (existing vault) | +| Key sync across devices | NO — keys stay device-local (user must add on each device) | diff --git a/packages/shared-llm/src/backends/byok-providers/anthropic.ts b/packages/shared-llm/src/backends/byok-providers/anthropic.ts new file mode 100644 index 000000000..3e6c18c80 --- /dev/null +++ b/packages/shared-llm/src/backends/byok-providers/anthropic.ts @@ -0,0 +1,122 @@ +/** + * Anthropic adapter. + * + * Differs from OpenAI: + * - Uses x-api-key header (not Bearer) + * - Needs anthropic-version header + * - Needs anthropic-dangerous-direct-browser-access for CORS + * - System prompt goes in its own `system` field, not as a message + * - SSE event schema is different (content_block_delta with text) + */ + +import type { ByokProvider, ByokCallOptions } from './types'; +import type { GenerateResult } from '../../types'; + +export const anthropicProvider: ByokProvider = { + id: 'anthropic', + displayName: 'Anthropic', + defaultModel: 'claude-sonnet-4-5', + availableModels: [ + 'claude-opus-4-6', + 'claude-opus-4-5', + 'claude-sonnet-4-6', + 'claude-sonnet-4-5', + 'claude-haiku-4-5', + ], + + async call(opts: ByokCallOptions): Promise { + return callAnthropic(opts); + }, +}; + +async function callAnthropic(opts: ByokCallOptions): Promise { + const startedAt = Date.now(); + + // Anthropic wants system prompt separately, user/assistant inline + const systemMessages = opts.messages.filter((m) => m.role === 'system'); + const chatMessages = opts.messages.filter((m) => m.role !== 'system'); + const system = systemMessages.map((m) => m.content).join('\n\n') || undefined; + + const response = await fetch('https://api.anthropic.com/v1/messages', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-api-key': opts.apiKey, + 'anthropic-version': '2023-06-01', + 'anthropic-dangerous-direct-browser-access': 'true', + }, + body: JSON.stringify({ + model: opts.model, + system, + messages: chatMessages.map((m) => ({ role: m.role, content: m.content })), + temperature: opts.temperature ?? 0.7, + max_tokens: opts.maxTokens ?? 1024, + stream: true, + }), + }); + + if (!response.ok) { + const errText = await response.text().catch(() => response.statusText); + throw new Error(`Anthropic API ${response.status}: ${errText.slice(0, 300)}`); + } + + if (!response.body) { + throw new Error('Anthropic API: kein Response-Body'); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + let content = ''; + let promptTokens = 0; + let completionTokens = 0; + + while (true) { + const { value, done } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + + let newlineIdx: number; + while ((newlineIdx = buffer.indexOf('\n')) !== -1) { + const line = buffer.slice(0, newlineIdx).trim(); + buffer = buffer.slice(newlineIdx + 1); + + if (!line.startsWith('data: ')) continue; + const payload = line.slice(6).trim(); + if (!payload) continue; + + try { + const parsed = JSON.parse(payload) as { + type?: string; + delta?: { type?: string; text?: string }; + message?: { usage?: { input_tokens?: number; output_tokens?: number } }; + usage?: { input_tokens?: number; output_tokens?: number }; + }; + + if (parsed.type === 'content_block_delta' && parsed.delta?.type === 'text_delta') { + const token = parsed.delta.text ?? ''; + if (token) { + content += token; + opts.onToken?.(token); + } + } else if (parsed.type === 'message_start' && parsed.message?.usage) { + promptTokens = parsed.message.usage.input_tokens ?? 0; + } else if (parsed.type === 'message_delta' && parsed.usage) { + completionTokens = parsed.usage.output_tokens ?? completionTokens; + } + } catch { + // Ignore malformed lines + } + } + } + + return { + content, + usage: { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }, + latencyMs: Date.now() - startedAt, + }; +} diff --git a/packages/shared-llm/src/backends/byok-providers/gemini.ts b/packages/shared-llm/src/backends/byok-providers/gemini.ts new file mode 100644 index 000000000..94b33af6e --- /dev/null +++ b/packages/shared-llm/src/backends/byok-providers/gemini.ts @@ -0,0 +1,129 @@ +/** + * Gemini adapter — direct REST API. + * + * Differs from OpenAI: + * - API key goes in query string (?key=...) + * - Messages use different schema: { role, parts: [{ text }] } + * - Roles are 'user' and 'model' (not 'assistant') + * - System prompt goes in `systemInstruction` field + * - Streaming via SSE at :streamGenerateContent endpoint + */ + +import type { ByokProvider, ByokCallOptions } from './types'; +import type { GenerateResult, ChatMessage } from '../../types'; + +export const geminiProvider: ByokProvider = { + id: 'gemini', + displayName: 'Google Gemini', + defaultModel: 'gemini-2.5-flash', + availableModels: [ + 'gemini-2.5-pro', + 'gemini-2.5-flash', + 'gemini-2.5-flash-lite', + 'gemini-2.0-flash', + ], + + async call(opts: ByokCallOptions): Promise { + return callGemini(opts); + }, +}; + +interface GeminiMessage { + role: 'user' | 'model'; + parts: { text: string }[]; +} + +function toGeminiMessages(messages: ChatMessage[]): { + system?: string; + contents: GeminiMessage[]; +} { + const systemMessages = messages.filter((m) => m.role === 'system'); + const chatMessages = messages.filter((m) => m.role !== 'system'); + return { + system: systemMessages.map((m) => m.content).join('\n\n') || undefined, + contents: chatMessages.map((m) => ({ + role: m.role === 'assistant' ? 'model' : 'user', + parts: [{ text: m.content }], + })), + }; +} + +async function callGemini(opts: ByokCallOptions): Promise { + const startedAt = Date.now(); + const { system, contents } = toGeminiMessages(opts.messages); + + const url = `https://generativelanguage.googleapis.com/v1beta/models/${opts.model}:streamGenerateContent?alt=sse&key=${opts.apiKey}`; + + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + systemInstruction: system ? { parts: [{ text: system }] } : undefined, + contents, + generationConfig: { + temperature: opts.temperature ?? 0.7, + maxOutputTokens: opts.maxTokens ?? 1024, + }, + }), + }); + + if (!response.ok) { + const errText = await response.text().catch(() => response.statusText); + throw new Error(`Gemini API ${response.status}: ${errText.slice(0, 300)}`); + } + + if (!response.body) { + throw new Error('Gemini API: kein Response-Body'); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + let content = ''; + let promptTokens = 0; + let completionTokens = 0; + + while (true) { + const { value, done } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + + let newlineIdx: number; + while ((newlineIdx = buffer.indexOf('\n')) !== -1) { + const line = buffer.slice(0, newlineIdx).trim(); + buffer = buffer.slice(newlineIdx + 1); + + if (!line.startsWith('data: ')) continue; + const payload = line.slice(6).trim(); + if (!payload) continue; + + try { + const parsed = JSON.parse(payload) as { + candidates?: { content?: { parts?: { text?: string }[] } }[]; + usageMetadata?: { promptTokenCount?: number; candidatesTokenCount?: number }; + }; + const token = parsed.candidates?.[0]?.content?.parts?.[0]?.text ?? ''; + if (token) { + content += token; + opts.onToken?.(token); + } + if (parsed.usageMetadata) { + promptTokens = parsed.usageMetadata.promptTokenCount ?? promptTokens; + completionTokens = parsed.usageMetadata.candidatesTokenCount ?? completionTokens; + } + } catch { + // Ignore malformed lines + } + } + } + + return { + content, + usage: { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }, + latencyMs: Date.now() - startedAt, + }; +} diff --git a/packages/shared-llm/src/backends/byok-providers/index.ts b/packages/shared-llm/src/backends/byok-providers/index.ts new file mode 100644 index 000000000..6f67b4e57 --- /dev/null +++ b/packages/shared-llm/src/backends/byok-providers/index.ts @@ -0,0 +1,19 @@ +export { openaiProvider } from './openai'; +export { anthropicProvider } from './anthropic'; +export { geminiProvider } from './gemini'; +export { mistralProvider } from './mistral'; +export type { ByokProvider, ByokProviderId, ByokCallOptions } from './types'; + +import { openaiProvider } from './openai'; +import { anthropicProvider } from './anthropic'; +import { geminiProvider } from './gemini'; +import { mistralProvider } from './mistral'; +import type { ByokProvider } from './types'; + +/** All built-in BYOK providers. Apps can still add custom ones. */ +export const BUILTIN_BYOK_PROVIDERS: readonly ByokProvider[] = [ + openaiProvider, + anthropicProvider, + geminiProvider, + mistralProvider, +]; diff --git a/packages/shared-llm/src/backends/byok-providers/mistral.ts b/packages/shared-llm/src/backends/byok-providers/mistral.ts new file mode 100644 index 000000000..9c9f46fb9 --- /dev/null +++ b/packages/shared-llm/src/backends/byok-providers/mistral.ts @@ -0,0 +1,23 @@ +import type { ByokProvider, ByokCallOptions } from './types'; +import { callOpenAiCompat } from './openai-compat'; +import type { GenerateResult } from '../../types'; + +export const mistralProvider: ByokProvider = { + id: 'mistral', + displayName: 'Mistral AI', + defaultModel: 'mistral-small-latest', + availableModels: [ + 'mistral-large-latest', + 'mistral-small-latest', + 'mistral-medium-latest', + 'open-mistral-nemo', + 'codestral-latest', + ], + + async call(opts: ByokCallOptions): Promise { + return callOpenAiCompat( + { baseUrl: 'https://api.mistral.ai/v1', providerName: 'Mistral' }, + opts + ); + }, +}; diff --git a/packages/shared-llm/src/backends/byok-providers/openai-compat.ts b/packages/shared-llm/src/backends/byok-providers/openai-compat.ts new file mode 100644 index 000000000..c07c17ac9 --- /dev/null +++ b/packages/shared-llm/src/backends/byok-providers/openai-compat.ts @@ -0,0 +1,101 @@ +/** + * OpenAI-compatible API adapter (base for OpenAI, Mistral, Groq, etc.) + * + * Uses the ChatCompletions API schema. Streaming via SSE, parsing + * the `data: {json}` lines, extracting `choices[0].delta.content`. + */ + +import type { GenerateResult } from '../../types'; +import type { ByokCallOptions } from './types'; + +export interface OpenAiCompatConfig { + baseUrl: string; + providerName: string; // For error messages + extraHeaders?: Record; +} + +export async function callOpenAiCompat( + config: OpenAiCompatConfig, + opts: ByokCallOptions +): Promise { + const startedAt = Date.now(); + const url = `${config.baseUrl.replace(/\/$/, '')}/chat/completions`; + + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${opts.apiKey}`, + ...(config.extraHeaders ?? {}), + }, + body: JSON.stringify({ + model: opts.model, + messages: opts.messages.map((m) => ({ role: m.role, content: m.content })), + temperature: opts.temperature ?? 0.7, + max_tokens: opts.maxTokens ?? 1024, + stream: true, + stream_options: { include_usage: true }, + }), + }); + + if (!response.ok) { + const errText = await response.text().catch(() => response.statusText); + const short = errText.slice(0, 300); + throw new Error(`${config.providerName} API ${response.status}: ${short}`); + } + + if (!response.body) { + throw new Error(`${config.providerName} API: kein Response-Body`); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ''; + let content = ''; + let promptTokens = 0; + let completionTokens = 0; + + while (true) { + const { value, done } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + + let newlineIdx: number; + while ((newlineIdx = buffer.indexOf('\n')) !== -1) { + const line = buffer.slice(0, newlineIdx).trim(); + buffer = buffer.slice(newlineIdx + 1); + + if (!line.startsWith('data: ')) continue; + const payload = line.slice(6).trim(); + if (payload === '[DONE]') continue; + + try { + const parsed = JSON.parse(payload) as { + choices?: { delta?: { content?: string } }[]; + usage?: { prompt_tokens?: number; completion_tokens?: number }; + }; + const token = parsed.choices?.[0]?.delta?.content ?? ''; + if (token) { + content += token; + opts.onToken?.(token); + } + if (parsed.usage) { + promptTokens = parsed.usage.prompt_tokens ?? 0; + completionTokens = parsed.usage.completion_tokens ?? 0; + } + } catch { + // Ignore malformed lines + } + } + } + + return { + content, + usage: { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }, + latencyMs: Date.now() - startedAt, + }; +} diff --git a/packages/shared-llm/src/backends/byok-providers/openai.ts b/packages/shared-llm/src/backends/byok-providers/openai.ts new file mode 100644 index 000000000..694c8f1a5 --- /dev/null +++ b/packages/shared-llm/src/backends/byok-providers/openai.ts @@ -0,0 +1,14 @@ +import type { ByokProvider, ByokCallOptions } from './types'; +import { callOpenAiCompat } from './openai-compat'; +import type { GenerateResult } from '../../types'; + +export const openaiProvider: ByokProvider = { + id: 'openai', + displayName: 'OpenAI', + defaultModel: 'gpt-4o-mini', + availableModels: ['gpt-5', 'gpt-5-mini', 'gpt-4o', 'gpt-4o-mini', 'gpt-4-turbo', 'o1', 'o1-mini'], + + async call(opts: ByokCallOptions): Promise { + return callOpenAiCompat({ baseUrl: 'https://api.openai.com/v1', providerName: 'OpenAI' }, opts); + }, +}; diff --git a/packages/shared-llm/src/backends/byok-providers/types.ts b/packages/shared-llm/src/backends/byok-providers/types.ts new file mode 100644 index 000000000..fde51eadf --- /dev/null +++ b/packages/shared-llm/src/backends/byok-providers/types.ts @@ -0,0 +1,39 @@ +/** + * BYOK Provider abstraction. + * + * Each supported third-party LLM (OpenAI, Anthropic, Gemini, Mistral, ...) + * implements this interface. Adapters do the direct browser-to-provider + * fetch using the user's API key. + */ + +import type { ChatMessage, GenerateResult } from '../../types'; + +export type ByokProviderId = 'openai' | 'anthropic' | 'gemini' | 'mistral'; + +export interface ByokProvider { + readonly id: ByokProviderId; + readonly displayName: string; + readonly defaultModel: string; + readonly availableModels: readonly string[]; + + /** + * Call the provider with the user's API key. + * Throws on network errors, auth errors, or content policy blocks. + */ + call(opts: ByokCallOptions): Promise; +} + +export interface ByokCallOptions { + apiKey: string; + model: string; + messages: ChatMessage[]; + temperature?: number; + maxTokens?: number; + onToken?: (token: string) => void; +} + +export interface ByokProviderError extends Error { + provider: ByokProviderId; + status?: number; + code?: string; +} diff --git a/packages/shared-llm/src/backends/byok.ts b/packages/shared-llm/src/backends/byok.ts new file mode 100644 index 000000000..394e182db --- /dev/null +++ b/packages/shared-llm/src/backends/byok.ts @@ -0,0 +1,132 @@ +/** + * BYOK Backend — routes LLM calls through the user's own API keys. + * + * The backend itself lives in shared-llm (so the orchestrator can + * instantiate it alongside browser/mana-server/cloud), but the + * actual keys live in the consuming app's encrypted IndexedDB. + * + * Apps inject a `ByokKeyResolver` callback at init time. The backend + * calls it whenever it needs a key, gets back `{ apiKey, model, + * provider }`, and dispatches to the matching provider adapter. + * + * If no key is configured for any provider, isAvailable() returns + * false and the orchestrator skips this tier. + */ + +import type { GenerateResult, LlmBackend, LlmTaskRequest } from '../types'; +import type { ByokProvider, ByokProviderId } from './byok-providers/types'; + +export interface ResolvedByokKey { + provider: ByokProviderId; + apiKey: string; + model: string; +} + +/** App-side callback — looks up the appropriate key for a call. */ +export type ByokKeyResolver = (opts: { + /** Task name from LlmTaskRequest (e.g. "companion.chat") */ + taskName: string; + /** Optional user-forced provider (from settings.byok.defaultProvider + * or from task-specific override like 'byok:anthropic') */ + preferredProvider?: ByokProviderId; +}) => Promise; + +/** Called after a successful generation so the app can increment usage counters. */ +export type ByokUsageCallback = (opts: { + provider: ByokProviderId; + model: string; + promptTokens: number; + completionTokens: number; + latencyMs: number; +}) => void; + +export interface ByokBackendOptions { + resolver: ByokKeyResolver; + providers: readonly ByokProvider[]; + onUsage?: ByokUsageCallback; +} + +export class ByokBackend implements LlmBackend { + readonly tier = 'byok' as const; + private readonly resolver: ByokKeyResolver; + private readonly providers: Map; + private readonly onUsage?: ByokUsageCallback; + /** Whether at least one key has been configured. Set after first + * resolver call; the orchestrator uses isAvailable() to skip the + * tier when the user hasn't added any keys yet. */ + private keyConfigured: boolean | null = null; + + constructor(opts: ByokBackendOptions) { + this.resolver = opts.resolver; + this.providers = new Map(opts.providers.map((p) => [p.id, p])); + this.onUsage = opts.onUsage; + } + + /** Inform the backend that the user has added/removed keys — flips + * the cached availability flag so isAvailable() re-probes on the + * next call. */ + invalidateAvailability(): void { + this.keyConfigured = null; + } + + isAvailable(): boolean { + // If we haven't probed yet, assume available and let resolver + // fail gracefully. After the first resolver miss we cache false. + return this.keyConfigured !== false; + } + + async isReady(): Promise { + // Probe with a null task to see if *any* key resolves + try { + const key = await this.resolver({ taskName: '__probe__' }); + this.keyConfigured = key !== null; + return this.keyConfigured; + } catch { + this.keyConfigured = false; + return false; + } + } + + async generate(req: LlmTaskRequest): Promise { + // Parse optional provider override from task name (e.g. "companion.chat" + // with a taskOverride of "byok:anthropic" → caller should pass + // preferredProvider via the resolver path, not via taskName). + const resolved = await this.resolver({ taskName: req.taskName }); + if (!resolved) { + this.keyConfigured = false; + throw new Error( + 'Kein BYOK-Schluessel konfiguriert. Bitte unter Einstellungen → KI-Keys hinterlegen.' + ); + } + this.keyConfigured = true; + + const provider = this.providers.get(resolved.provider); + if (!provider) { + throw new Error(`BYOK-Provider nicht unterstuetzt: ${resolved.provider}`); + } + + const startedAt = Date.now(); + const result = await provider.call({ + apiKey: resolved.apiKey, + model: resolved.model, + messages: req.messages, + temperature: req.temperature, + maxTokens: req.maxTokens, + onToken: req.onToken, + }); + const latencyMs = Date.now() - startedAt; + + // Report usage so the app can update per-key counters + if (this.onUsage && result.usage) { + this.onUsage({ + provider: resolved.provider, + model: resolved.model, + promptTokens: result.usage.promptTokens, + completionTokens: result.usage.completionTokens, + latencyMs, + }); + } + + return { ...result, latencyMs }; + } +} diff --git a/packages/shared-llm/src/index.ts b/packages/shared-llm/src/index.ts index 0055d8d5e..91c36ba25 100644 --- a/packages/shared-llm/src/index.ts +++ b/packages/shared-llm/src/index.ts @@ -44,6 +44,26 @@ export { LlmOrchestrator, type LlmOrchestratorOptions } from './orchestrator'; export { BrowserBackend } from './backends/browser'; export { CloudBackend, type CloudBackendOptions } from './backends/cloud'; export { ManaServerBackend, type ManaServerBackendOptions } from './backends/mana-server'; +export { + ByokBackend, + type ByokBackendOptions, + type ByokKeyResolver, + type ResolvedByokKey, + type ByokUsageCallback, +} from './backends/byok'; +export { + BUILTIN_BYOK_PROVIDERS, + openaiProvider, + anthropicProvider, + geminiProvider, + mistralProvider, + type ByokProvider, + type ByokProviderId, + type ByokCallOptions, +} from './backends/byok-providers'; + +// Pricing +export { MODEL_PRICING, estimateCost, formatCost, type ModelPricing } from './pricing'; // Singleton store + Svelte 5 reactive hooks export { diff --git a/packages/shared-llm/src/orchestrator.ts b/packages/shared-llm/src/orchestrator.ts index aee94ec1a..7176f6a08 100644 --- a/packages/shared-llm/src/orchestrator.ts +++ b/packages/shared-llm/src/orchestrator.ts @@ -70,6 +70,18 @@ export class LlmOrchestrator { this.settings = settings; } + /** Register (or replace) a backend at runtime — used by the app + * to wire up the BYOK backend after initial orchestrator construction, + * since BYOK needs access to app-side IndexedDB keys. */ + registerBackend(backend: LlmBackend): void { + this.backendsByTier.set(backend.tier, backend); + } + + /** Remove a backend (e.g. when the user disables BYOK). */ + unregisterBackend(tier: LlmTier): void { + this.backendsByTier.delete(tier); + } + /** Public read-only view for UI components that want to react to * the current settings (e.g. the tier selector). */ getSettings(): Readonly { diff --git a/packages/shared-llm/src/pricing.ts b/packages/shared-llm/src/pricing.ts new file mode 100644 index 000000000..b1308b416 --- /dev/null +++ b/packages/shared-llm/src/pricing.ts @@ -0,0 +1,69 @@ +/** + * Per-model token pricing for BYOK cost estimation. + * + * Values in USD per 1M tokens (as published by providers as of + * 2026-04). Update manually when providers change pricing. + * + * Only includes models Mana exposes in the BYOK provider adapters. + */ + +export interface ModelPricing { + /** USD per 1 million input tokens */ + inputPerMillion: number; + /** USD per 1 million output tokens */ + outputPerMillion: number; +} + +export const MODEL_PRICING: Record = { + // ── OpenAI ────────────────────────────────────────── + 'gpt-5': { inputPerMillion: 15, outputPerMillion: 60 }, + 'gpt-5-mini': { inputPerMillion: 3, outputPerMillion: 12 }, + 'gpt-4o': { inputPerMillion: 5, outputPerMillion: 20 }, + 'gpt-4o-mini': { inputPerMillion: 0.3, outputPerMillion: 1.2 }, + 'gpt-4-turbo': { inputPerMillion: 10, outputPerMillion: 30 }, + o1: { inputPerMillion: 15, outputPerMillion: 60 }, + 'o1-mini': { inputPerMillion: 3, outputPerMillion: 12 }, + + // ── Anthropic ─────────────────────────────────────── + 'claude-opus-4-6': { inputPerMillion: 15, outputPerMillion: 75 }, + 'claude-opus-4-5': { inputPerMillion: 15, outputPerMillion: 75 }, + 'claude-sonnet-4-6': { inputPerMillion: 3, outputPerMillion: 15 }, + 'claude-sonnet-4-5': { inputPerMillion: 3, outputPerMillion: 15 }, + 'claude-haiku-4-5': { inputPerMillion: 0.8, outputPerMillion: 4 }, + + // ── Google Gemini ─────────────────────────────────── + 'gemini-2.5-pro': { inputPerMillion: 1.25, outputPerMillion: 5 }, + 'gemini-2.5-flash': { inputPerMillion: 0.15, outputPerMillion: 0.6 }, + 'gemini-2.5-flash-lite': { inputPerMillion: 0.075, outputPerMillion: 0.3 }, + 'gemini-2.0-flash': { inputPerMillion: 0.1, outputPerMillion: 0.4 }, + + // ── Mistral ───────────────────────────────────────── + 'mistral-large-latest': { inputPerMillion: 2, outputPerMillion: 6 }, + 'mistral-medium-latest': { inputPerMillion: 2.7, outputPerMillion: 8.1 }, + 'mistral-small-latest': { inputPerMillion: 0.2, outputPerMillion: 0.6 }, + 'open-mistral-nemo': { inputPerMillion: 0.15, outputPerMillion: 0.15 }, + 'codestral-latest': { inputPerMillion: 0.3, outputPerMillion: 0.9 }, +}; + +/** USD cost for a given call. Returns 0 if model isn't in the table. */ +export function estimateCost( + model: string, + promptTokens: number, + completionTokens: number +): number { + const p = MODEL_PRICING[model]; + if (!p) return 0; + return ( + (promptTokens / 1_000_000) * p.inputPerMillion + + (completionTokens / 1_000_000) * p.outputPerMillion + ); +} + +/** Format USD value with at most 4 decimals (for small per-call amounts). */ +export function formatCost(usd: number): string { + if (usd === 0) return '—'; + if (usd < 0.0001) return '< $0.0001'; + if (usd < 0.01) return `$${usd.toFixed(4)}`; + if (usd < 1) return `$${usd.toFixed(3)}`; + return `$${usd.toFixed(2)}`; +} diff --git a/packages/shared-llm/src/tiers.ts b/packages/shared-llm/src/tiers.ts index 85294da06..5334099a2 100644 --- a/packages/shared-llm/src/tiers.ts +++ b/packages/shared-llm/src/tiers.ts @@ -1,7 +1,7 @@ /** * Tier definitions for the Mana LLM orchestrator. * - * Four tiers, ordered from most-private to least-private: + * Five tiers, ordered from most-private to least-private: * * none — Deterministic parsers / heuristics. No LLM at all. * Always available. Zero cost. Quality varies by task. @@ -14,26 +14,28 @@ * (currently the Mac Mini, gemma3:4b by default). * Data leaves the device but stays in our control. * + * byok — User-provided API keys (OpenAI, Anthropic, Gemini, + * Mistral). Browser-direct fetches. Data goes to a + * third-party account the user manages. User controls + * the provider's privacy/retention policy directly. + * * cloud — services/mana-llm proxied to a third-party provider * (Google Gemini, configured via google_api_key in the - * mana-llm service env). Data goes to the third party. - * - * The numeric rank is used by the orchestrator to compare a user's - * preferred tier against a task's minimum tier ("can the user even - * run this task?") and is the canonical sort order for the privacy - * gradient. + * mana-llm service env). Mana-managed, charges Mana + * credits. Data goes to the third party via Mana. */ -export type LlmTier = 'none' | 'browser' | 'mana-server' | 'cloud'; +export type LlmTier = 'none' | 'browser' | 'mana-server' | 'byok' | 'cloud'; export const TIER_RANK: Record = { none: 0, browser: 1, 'mana-server': 2, - cloud: 3, + byok: 3, + cloud: 4, }; -export const ALL_TIERS: readonly LlmTier[] = ['none', 'browser', 'mana-server', 'cloud']; +export const ALL_TIERS: readonly LlmTier[] = ['none', 'browser', 'mana-server', 'byok', 'cloud']; /** Human-readable label, kept here so backends/UI agree on naming. */ export function tierLabel(tier: LlmTier): string { @@ -44,6 +46,8 @@ export function tierLabel(tier: LlmTier): string { return 'Auf deinem Gerät'; case 'mana-server': return 'Mana-Server'; + case 'byok': + return 'Dein API-Key'; case 'cloud': return 'Google Gemini'; }