From f46d1328d82debf2768b1a8e563b2416222f0add Mon Sep 17 00:00:00 2001 From: Till JS Date: Tue, 7 Apr 2026 22:05:49 +0200 Subject: [PATCH] =?UTF-8?q?feat(mana-auth):=20phase=209=20milestone=202=20?= =?UTF-8?q?=E2=80=94=20vault=20recovery=20wrap=20+=20zero-knowledge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Server-side support for the Phase 9 zero-knowledge opt-in. Adds the recovery-wrap columns + four new vault operations + the routes that expose them. Schema (sql/003_recovery_wrap.sql) ---------------------------------- Adds to auth.encryption_vaults: - recovery_wrapped_mk text (NULL until set) - recovery_iv text (NULL until set) - recovery_format_version smallint NOT NULL DEFAULT 1 - recovery_set_at timestamptz - zero_knowledge boolean NOT NULL DEFAULT false Drops NOT NULL from wrapped_mk + wrap_iv (a vault in zero-knowledge mode has no server-side wrap at all). Three CHECK constraints enforce the invariant at the DB level so no service bug can leave a vault in an inconsistent state: - encryption_vaults_has_wrap — at least one of (wrapped_mk, recovery_wrapped_mk) is set - encryption_vaults_wrap_iv_pair — ciphertext + IV are paired (both NULL or both set) on each wrap form - encryption_vaults_zk_consistency — zero_knowledge=true implies wrapped_mk IS NULL AND recovery_wrapped_mk IS NOT NULL If a code-level bug ever tried to enable ZK without a recovery wrap, or to leave both wraps empty, Postgres would reject the UPDATE. Drizzle schema (db/schema/encryption-vaults.ts) ----------------------------------------------- Mirrors the migration: wrappedMk + wrapIv become nullable, the four new columns added with the right defaults. Inline doc comment explains the zero-knowledge fork. Service (services/encryption-vault/index.ts) -------------------------------------------- VaultFetchResult gains optional `requiresRecoveryCode` / `recoveryWrappedMk` / `recoveryIv` so the route handler can serialize the right shape. masterKey becomes Uint8Array | null (null in ZK mode). Existing methods updated: - init: branches on row.zeroKnowledge — returns the recovery blob instead of an unwrapped MK if the user is already in ZK mode - getMasterKey: same fork, with audit context "zk-recovery-blob" - rotate: throws ZeroKnowledgeRotateForbidden in ZK mode (the server can't re-wrap a key it can't read). Also wipes any stale recovery wrap on rotation — the new MK has nothing to do with the old one, so the old recovery code would unwrap into garbage. New methods: - setRecoveryWrap(userId, { recoveryWrappedMk, recoveryIv }, ctx) Stores (or replaces) the user's recovery wrap. Idempotent. - clearRecoveryWrap(userId, ctx) Removes the recovery wrap. Forbidden if ZK is active (would lock the user out) — throws ZeroKnowledgeActiveError → 409. - enableZeroKnowledge(userId, ctx) NULLs out wrapped_mk + wrap_iv, sets zero_knowledge=true. Requires a recovery wrap to already be present — throws RecoveryWrapMissingError → 400 otherwise. Idempotent on already-on. - disableZeroKnowledge(userId, mkBytes, ctx) Inverse: takes a freshly-unwrapped MK from the client, KEK-wraps it, stores as wrapped_mk, flips zero_knowledge=false. The client is the only entity that can supply the MK at this point, since the server can't decrypt the recovery wrap. Three new error classes: - RecoveryWrapMissingError → 400 RECOVERY_WRAP_MISSING - ZeroKnowledgeActiveError → 409 ZK_ACTIVE - ZeroKnowledgeRotateForbidden → 409 ZK_ROTATE_FORBIDDEN Audit action union extended with: - 'recovery_set' | 'recovery_clear' | 'zk_enable' | 'zk_disable' Routes (routes/encryption-vault.ts) ----------------------------------- GET /key + POST /init now share a serializeFetchResult helper that returns either: - { masterKey, formatVersion, kekId } (standard) - { requiresRecoveryCode: true, recoveryWrappedMk, (ZK mode) recoveryIv, formatVersion } Three new routes: - POST /recovery-wrap — body: { recoveryWrappedMk, recoveryIv } Stores the wrap. Validates both fields are non-empty strings. - DELETE /recovery-wrap — Removes the wrap. 409 if ZK active. - POST /zero-knowledge — body: { enable: boolean, masterKey?: base64 } enable=true: flip on (no body MK needed) enable=false: flip off (MK required) Validates the MK decodes to exactly 32 bytes. Wipes the bytes after handing them to the service. POST /rotate now catches ZeroKnowledgeRotateForbidden → 409 ZK_ROTATE_FORBIDDEN so the client can show "disable zero-knowledge first". Co-Authored-By: Claude Opus 4.6 (1M context) --- services/mana-auth/sql/003_recovery_wrap.sql | 86 +++++ .../src/db/schema/encryption-vaults.ts | 62 +++- .../mana-auth/src/routes/encryption-vault.ts | 222 ++++++++++-- .../src/services/encryption-vault/index.ts | 316 +++++++++++++++++- 4 files changed, 647 insertions(+), 39 deletions(-) create mode 100644 services/mana-auth/sql/003_recovery_wrap.sql diff --git a/services/mana-auth/sql/003_recovery_wrap.sql b/services/mana-auth/sql/003_recovery_wrap.sql new file mode 100644 index 000000000..ad2ec205d --- /dev/null +++ b/services/mana-auth/sql/003_recovery_wrap.sql @@ -0,0 +1,86 @@ +-- Migration: encryption_vaults recovery wrap + zero-knowledge mode +-- +-- Phase 9 of the encryption rollout. Adds three new columns + makes +-- wrapped_mk nullable so a user can opt into "true zero-knowledge" +-- mode where the server can no longer decrypt their data. +-- +-- The opt-in flow is: +-- 1. Client generates a 32-byte recovery secret (client-only) +-- 2. Client wraps the existing master key with a recovery-derived key +-- 3. Client posts the wrapped MK + IV to /me/encryption-vault/recovery-wrap +-- 4. The server stores recovery_wrapped_mk + recovery_iv (both NULLABLE +-- until the user enables the recovery wrap; both NOT NULL once set) +-- 5. Client posts /me/encryption-vault/zero-knowledge with `enable: true` +-- The server NULLs out wrapped_mk + wrap_iv, sets zero_knowledge=true. +-- The server can no longer decrypt the user's data. +-- 6. On the next unlock, GET /key returns the recovery_wrapped_mk blob +-- with `requiresRecoveryCode: true`. The client prompts the user for +-- the recovery code, derives the wrap key, unwraps locally. +-- +-- The "disable" flow is the inverse: the client unwraps locally, generates +-- a new server-side wrapped_mk via a fresh KEK wrap, and posts it back. +-- +-- Idempotent: re-running on a partially-migrated DB is safe. + +-- ─── Add new columns ────────────────────────────────────────── +ALTER TABLE auth.encryption_vaults + ADD COLUMN IF NOT EXISTS recovery_wrapped_mk TEXT, + ADD COLUMN IF NOT EXISTS recovery_iv TEXT, + ADD COLUMN IF NOT EXISTS recovery_format_version SMALLINT NOT NULL DEFAULT 1, + ADD COLUMN IF NOT EXISTS recovery_set_at TIMESTAMPTZ, + ADD COLUMN IF NOT EXISTS zero_knowledge BOOLEAN NOT NULL DEFAULT false; + +-- ─── Make wrapped_mk + wrap_iv nullable ─────────────────────── +-- These were NOT NULL in the Phase 2 migration. After Phase 9, a vault +-- in zero-knowledge mode has no server-side wrap at all, so both columns +-- have to allow NULL. Existing rows are unaffected (they have non-NULL +-- values; the constraint just relaxes). + +ALTER TABLE auth.encryption_vaults + ALTER COLUMN wrapped_mk DROP NOT NULL, + ALTER COLUMN wrap_iv DROP NOT NULL; + +-- ─── Sanity constraint ──────────────────────────────────────── +-- A vault row must have AT LEAST one usable wrap form, otherwise the +-- user has lost access to their data and we should have rejected the +-- mutation that left the row in this state. The check enforces that +-- at least one of (wrapped_mk, recovery_wrapped_mk) is populated. + +ALTER TABLE auth.encryption_vaults + DROP CONSTRAINT IF EXISTS encryption_vaults_has_wrap; + +ALTER TABLE auth.encryption_vaults + ADD CONSTRAINT encryption_vaults_has_wrap + CHECK (wrapped_mk IS NOT NULL OR recovery_wrapped_mk IS NOT NULL); + +-- ─── Cross-field consistency ────────────────────────────────── +-- If recovery_wrapped_mk is set, recovery_iv must also be set. +-- If wrapped_mk is set, wrap_iv must also be set. + +ALTER TABLE auth.encryption_vaults + DROP CONSTRAINT IF EXISTS encryption_vaults_wrap_iv_pair; + +ALTER TABLE auth.encryption_vaults + ADD CONSTRAINT encryption_vaults_wrap_iv_pair + CHECK ( + (wrapped_mk IS NULL) = (wrap_iv IS NULL) + AND + (recovery_wrapped_mk IS NULL) = (recovery_iv IS NULL) + ); + +-- ─── Zero-knowledge implies the server wrap is gone ─────────── +-- If a vault is in zero-knowledge mode, the KEK-wrapped MK MUST be +-- absent — otherwise the "server can no longer decrypt" promise is +-- a lie. The recovery wrap MUST be present, otherwise the user is +-- locked out. + +ALTER TABLE auth.encryption_vaults + DROP CONSTRAINT IF EXISTS encryption_vaults_zk_consistency; + +ALTER TABLE auth.encryption_vaults + ADD CONSTRAINT encryption_vaults_zk_consistency + CHECK ( + (zero_knowledge = false) + OR + (zero_knowledge = true AND wrapped_mk IS NULL AND recovery_wrapped_mk IS NOT NULL) + ); diff --git a/services/mana-auth/src/db/schema/encryption-vaults.ts b/services/mana-auth/src/db/schema/encryption-vaults.ts index b5bcfd722..1603e0d05 100644 --- a/services/mana-auth/src/db/schema/encryption-vaults.ts +++ b/services/mana-auth/src/db/schema/encryption-vaults.ts @@ -1,4 +1,4 @@ -import { text, timestamp, smallint, integer, index } from 'drizzle-orm/pg-core'; +import { text, timestamp, smallint, integer, boolean, index } from 'drizzle-orm/pg-core'; import { authSchema, users } from './auth'; /** @@ -35,15 +35,24 @@ export const encryptionVaults = authSchema.table( .primaryKey() .references(() => users.id, { onDelete: 'cascade' }), - /** AES-GCM ciphertext of the raw 32-byte master key. Includes the - * 16-byte authentication tag at the tail (Web Crypto convention). */ - wrappedMk: text('wrapped_mk').notNull(), + /** AES-GCM ciphertext of the raw 32-byte master key, wrapped with + * the server-side KEK. Includes the 16-byte authentication tag at + * the tail (Web Crypto convention). + * + * NULLABLE since Phase 9: a vault in zero-knowledge mode has no + * server-side wrap. The CHECK constraint + * `encryption_vaults_has_wrap` ensures at least one of + * (wrapped_mk, recovery_wrapped_mk) is always populated so the + * user can never be locked out. */ + wrappedMk: text('wrapped_mk'), - /** 12-byte IV used for the wrap operation. Stored base64. */ - wrapIv: text('wrap_iv').notNull(), + /** 12-byte IV used for the wrap operation. Stored base64. NULLABLE + * in lockstep with wrappedMk. */ + wrapIv: text('wrap_iv'), - /** Wire format version. Lets us migrate to a different KDF or AEAD - * later without rewriting every existing row at once. */ + /** Wire format version of the KEK wrap. Lets us migrate to a + * different KDF or AEAD later without rewriting every existing + * row at once. */ formatVersion: smallint('format_version').notNull().default(1), /** KEK identifier — currently always 'env-v1' (the env-loaded KEK). @@ -52,6 +61,43 @@ export const encryptionVaults = authSchema.table( * KEK to unwrap with. */ kekId: text('kek_id').notNull().default('env-v1'), + // ─── Phase 9: Recovery wrap (zero-knowledge opt-in) ─── + // + // recovery_wrapped_mk holds the same master key, wrapped with a + // key derived from the user's 32-byte recovery secret via HKDF. + // The server NEVER sees the recovery secret itself — it only + // accepts the already-sealed blob from the client. The client + // generates + displays the recovery code at setup time and the + // user is responsible for backing it up. + // + // When zero_knowledge=true: + // - wrapped_mk + wrap_iv are NULL (the KEK wrap is gone) + // - recovery_wrapped_mk + recovery_iv are NOT NULL + // - GET /key returns the recovery blob, NOT a plaintext MK + // - The server is computationally incapable of decrypting the + // user's data even with full DB + KEK access + + /** AES-GCM ciphertext of the raw 32-byte master key, wrapped with + * the user's recovery-derived key. NULL until the user opts into + * recovery wrap via POST /recovery-wrap. */ + recoveryWrappedMk: text('recovery_wrapped_mk'), + + /** 12-byte IV for the recovery wrap. Stored base64. Paired with + * recoveryWrappedMk via the encryption_vaults_wrap_iv_pair + * constraint. */ + recoveryIv: text('recovery_iv'), + + /** Wire format version of the recovery wrap. */ + recoveryFormatVersion: smallint('recovery_format_version').notNull().default(1), + + /** Timestamp of when the user first set their recovery wrap. */ + recoverySetAt: timestamp('recovery_set_at', { withTimezone: true }), + + /** True iff the user has opted into zero-knowledge mode. When set, + * the server-side wrapped_mk is gone and the user MUST provide + * their recovery code to unlock the vault. */ + zeroKnowledge: boolean('zero_knowledge').notNull().default(false), + createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(), rotatedAt: timestamp('rotated_at', { withTimezone: true }), }, diff --git a/services/mana-auth/src/routes/encryption-vault.ts b/services/mana-auth/src/routes/encryption-vault.ts index 1ed1de4c8..9e67da780 100644 --- a/services/mana-auth/src/routes/encryption-vault.ts +++ b/services/mana-auth/src/routes/encryption-vault.ts @@ -29,6 +29,9 @@ import type { AuthUser } from '../middleware/jwt-auth'; import { EncryptionVaultService, VaultNotFoundError, + RecoveryWrapMissingError, + ZeroKnowledgeActiveError, + ZeroKnowledgeRotateForbidden, type AuditContext, } from '../services/encryption-vault'; @@ -47,30 +50,22 @@ export function createEncryptionVaultRoutes(vaultService: EncryptionVaultService const ctx = readAuditContext(c); const result = await vaultService.init(user.userId, ctx); - - return c.json({ - masterKey: bytesToBase64(result.masterKey), - formatVersion: result.formatVersion, - kekId: result.kekId, - }); + return c.json(serializeFetchResult(result)); }); // ─── GET /key ──────────────────────────────────────────── // The hot path: every Phase 3 client calls this immediately after - // login. Returns the unwrapped MK as base64 over HTTPS. The vault - // service writes a `fetch` audit row on success, `failed_fetch` on - // any error path. + // login. Returns either the unwrapped MK as base64 (standard mode) + // OR the recovery-wrapped blob with `requiresRecoveryCode: true` + // (zero-knowledge mode — Phase 9). The vault service writes a + // `fetch` audit row on success, `failed_fetch` on any error path. app.get('/key', async (c) => { const user = c.get('user'); const ctx = readAuditContext(c); try { const result = await vaultService.getMasterKey(user.userId, ctx); - return c.json({ - masterKey: bytesToBase64(result.masterKey), - formatVersion: result.formatVersion, - kekId: result.kekId, - }); + return c.json(serializeFetchResult(result)); } catch (err) { if (err instanceof VaultNotFoundError) { return c.json({ error: 'vault not initialised', code: 'VAULT_NOT_INITIALISED' }, 404); @@ -83,23 +78,199 @@ export function createEncryptionVaultRoutes(vaultService: EncryptionVaultService // Destructive. Mints a fresh MK and overwrites the wrap. The old MK // is gone forever. Routes do NOT enforce a 2FA challenge here — // that's a UX decision the front-end has to enforce before calling. - // (Future: add a `requires2fa: true` flag and short-circuit here if - // the JWT lacks a recent step-up claim.) + // Forbidden in zero-knowledge mode (returns 409); the client has to + // disable ZK first. app.post('/rotate', async (c) => { const user = c.get('user'); const ctx = readAuditContext(c); - const result = await vaultService.rotate(user.userId, ctx); - return c.json({ - masterKey: bytesToBase64(result.masterKey), - formatVersion: result.formatVersion, - kekId: result.kekId, - }); + try { + const result = await vaultService.rotate(user.userId, ctx); + return c.json(serializeFetchResult(result)); + } catch (err) { + if (err instanceof ZeroKnowledgeRotateForbidden) { + return c.json( + { + error: 'cannot rotate in zero-knowledge mode', + code: 'ZK_ROTATE_FORBIDDEN', + }, + 409 + ); + } + throw err; + } + }); + + // ─── POST /recovery-wrap ───────────────────────────────── + // Phase 9. Stores (or replaces) the user's recovery wrap. The + // client wraps the master key with a recovery-derived key locally + // and posts only the resulting ciphertext + IV. The recovery secret + // itself NEVER touches the wire — that's the entire point of the + // zero-knowledge design. + // + // This endpoint by itself does NOT enable zero-knowledge mode. The + // client has to follow up with POST /zero-knowledge after the user + // confirms they have backed up the recovery code. + app.post('/recovery-wrap', async (c) => { + const user = c.get('user'); + const ctx = readAuditContext(c); + + const body = await c.req.json().catch(() => null); + if ( + !body || + typeof body.recoveryWrappedMk !== 'string' || + typeof body.recoveryIv !== 'string' || + !body.recoveryWrappedMk || + !body.recoveryIv + ) { + return c.json( + { + error: 'recoveryWrappedMk and recoveryIv are required (base64 strings)', + code: 'BAD_REQUEST', + }, + 400 + ); + } + + try { + await vaultService.setRecoveryWrap( + user.userId, + { recoveryWrappedMk: body.recoveryWrappedMk, recoveryIv: body.recoveryIv }, + ctx + ); + return c.json({ ok: true }); + } catch (err) { + if (err instanceof VaultNotFoundError) { + return c.json({ error: 'vault not initialised', code: 'VAULT_NOT_INITIALISED' }, 404); + } + throw err; + } + }); + + // ─── DELETE /recovery-wrap ─────────────────────────────── + // Removes the recovery wrap. Forbidden in zero-knowledge mode + // (would lock the user out). Returns 409 with code ZK_ACTIVE in + // that case. + app.delete('/recovery-wrap', async (c) => { + const user = c.get('user'); + const ctx = readAuditContext(c); + + try { + await vaultService.clearRecoveryWrap(user.userId, ctx); + return c.json({ ok: true }); + } catch (err) { + if (err instanceof VaultNotFoundError) { + return c.json({ error: 'vault not initialised', code: 'VAULT_NOT_INITIALISED' }, 404); + } + if (err instanceof ZeroKnowledgeActiveError) { + return c.json( + { + error: 'cannot clear recovery wrap while zero-knowledge is active', + code: 'ZK_ACTIVE', + }, + 409 + ); + } + throw err; + } + }); + + // ─── POST /zero-knowledge ──────────────────────────────── + // Toggles zero-knowledge mode. Body shape: + // { enable: true } → flip on (requires recovery wrap) + // { enable: false, masterKey: base64 } → flip off (re-wrap with KEK) + // + // Enabling is destructive: the server-side wrapped_mk is NULLed out + // and the server can no longer decrypt the user's data. The client + // MUST have already called POST /recovery-wrap before calling this + // — otherwise the server returns 400 RECOVERY_WRAP_MISSING. + // + // Disabling requires the client to supply the freshly-unwrapped MK + // (from the recovery code unwrap) so the server can re-wrap it + // with the KEK. The user has to be unlocked at the moment of + // disable. + app.post('/zero-knowledge', async (c) => { + const user = c.get('user'); + const ctx = readAuditContext(c); + + const body = (await c.req.json().catch(() => null)) as { + enable?: boolean; + masterKey?: string; + } | null; + + if (!body || typeof body.enable !== 'boolean') { + return c.json({ error: '`enable: boolean` is required', code: 'BAD_REQUEST' }, 400); + } + + try { + if (body.enable) { + await vaultService.enableZeroKnowledge(user.userId, ctx); + return c.json({ ok: true, zeroKnowledge: true }); + } else { + if (typeof body.masterKey !== 'string' || !body.masterKey) { + return c.json( + { + error: '`masterKey: base64` is required when disabling zero-knowledge', + code: 'BAD_REQUEST', + }, + 400 + ); + } + const mkBytes = base64ToBytes(body.masterKey); + if (mkBytes.length !== 32) { + return c.json({ error: 'masterKey must decode to 32 bytes', code: 'BAD_REQUEST' }, 400); + } + await vaultService.disableZeroKnowledge(user.userId, mkBytes, ctx); + // Best-effort wipe of the bytes once we've handed them off. + mkBytes.fill(0); + return c.json({ ok: true, zeroKnowledge: false }); + } + } catch (err) { + if (err instanceof VaultNotFoundError) { + return c.json({ error: 'vault not initialised', code: 'VAULT_NOT_INITIALISED' }, 404); + } + if (err instanceof RecoveryWrapMissingError) { + return c.json( + { + error: 'set a recovery wrap before enabling zero-knowledge', + code: 'RECOVERY_WRAP_MISSING', + }, + 400 + ); + } + throw err; + } }); return app; } +/** Maps the service's VaultFetchResult into the JSON response shape. + * Branches on `requiresRecoveryCode` so the route handler doesn't + * duplicate the field-juggling. */ +function serializeFetchResult(result: { + masterKey: Uint8Array | null; + formatVersion: number; + kekId: string; + requiresRecoveryCode?: boolean; + recoveryWrappedMk?: string; + recoveryIv?: string; +}): Record { + if (result.requiresRecoveryCode) { + return { + requiresRecoveryCode: true, + recoveryWrappedMk: result.recoveryWrappedMk, + recoveryIv: result.recoveryIv, + formatVersion: result.formatVersion, + }; + } + return { + masterKey: bytesToBase64(result.masterKey!), + formatVersion: result.formatVersion, + kekId: result.kekId, + }; +} + // ─── Helpers ───────────────────────────────────────────────── function readAuditContext(c: AppContext): AuditContext { @@ -117,3 +288,10 @@ function bytesToBase64(bytes: Uint8Array): string { for (let i = 0; i < bytes.length; i++) bin += String.fromCharCode(bytes[i]); return btoa(bin); } + +function base64ToBytes(b64: string): Uint8Array { + const bin = atob(b64); + const out = new Uint8Array(bin.length); + for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i); + return out; +} diff --git a/services/mana-auth/src/services/encryption-vault/index.ts b/services/mana-auth/src/services/encryption-vault/index.ts index 4b55ab63b..ca0f6912d 100644 --- a/services/mana-auth/src/services/encryption-vault/index.ts +++ b/services/mana-auth/src/services/encryption-vault/index.ts @@ -41,14 +41,37 @@ export interface AuditContext { export interface VaultFetchResult { /** Raw 32 bytes of the unwrapped master key. Caller must base64-encode - * before placing in the JSON response body. */ - masterKey: Uint8Array; + * before placing in the JSON response body. + * + * null in zero-knowledge mode — the server cannot unwrap the MK + * itself and must return the recovery-wrapped blob instead. The + * route handler reads `requiresRecoveryCode` to know which branch + * to send to the client. */ + masterKey: Uint8Array | null; /** Format version of the wrap currently in storage — bumps if we ever * migrate the wire format. The client doesn't usually care, but the * rotate flow uses it to know whether a re-wrap is needed. */ formatVersion: number; - /** Which KEK produced the wrapped value. */ + /** Which KEK produced the wrapped value. Empty string in zero-knowledge + * mode (no KEK wrap exists). */ kekId: string; + /** True if the vault is in zero-knowledge mode and the client must + * provide a recovery code to unwrap. When set, masterKey is null + * and the recovery* fields are populated instead. */ + requiresRecoveryCode?: boolean; + /** Recovery wrap ciphertext (only set when requiresRecoveryCode). */ + recoveryWrappedMk?: string; + /** Recovery wrap IV (only set when requiresRecoveryCode). */ + recoveryIv?: string; +} + +/** Input for setting (or replacing) the recovery wrap. The client wraps + * the master key locally with a key derived from the recovery secret + * and sends only the resulting ciphertext + IV. The recovery secret + * itself NEVER touches the wire. */ +export interface RecoveryWrapInput { + recoveryWrappedMk: string; + recoveryIv: string; } export class EncryptionVaultService { @@ -73,13 +96,28 @@ export class EncryptionVaultService { .limit(1); if (existing.length > 0) { - // Already initialised — fall through to a regular fetch. - const masterKey = await unwrapMasterKey(existing[0].wrappedMk, existing[0].wrapIv); + // Already initialised. If the user is in zero-knowledge mode, + // the server can no longer hand out the plaintext master key + // — the route handler will return the recovery blob instead. + const row = existing[0]; + if (row.zeroKnowledge) { + await this.writeAudit(tx, userId, 'init', ctx, 200, 'already-exists-zk'); + return { + masterKey: null, + formatVersion: row.recoveryFormatVersion, + kekId: '', + requiresRecoveryCode: true, + recoveryWrappedMk: row.recoveryWrappedMk!, + recoveryIv: row.recoveryIv!, + }; + } + + const masterKey = await unwrapMasterKey(row.wrappedMk!, row.wrapIv!); await this.writeAudit(tx, userId, 'init', ctx, 200, 'already-exists'); return { masterKey, - formatVersion: existing[0].formatVersion, - kekId: existing[0].kekId, + formatVersion: row.formatVersion, + kekId: row.kekId, }; } @@ -119,9 +157,26 @@ export class EncryptionVaultService { } const row = rows[0]; + + // Zero-knowledge fork: the server CANNOT decrypt the MK and + // must return the recovery blob for the client to unwrap. + // `requiresRecoveryCode` flips the route handler's response + // shape — it sends the recovery wrap instead of a base64 MK. + if (row.zeroKnowledge) { + await this.writeAudit(tx, userId, 'fetch', ctx, 200, 'zk-recovery-blob'); + return { + masterKey: null, + formatVersion: row.recoveryFormatVersion, + kekId: '', + requiresRecoveryCode: true, + recoveryWrappedMk: row.recoveryWrappedMk!, + recoveryIv: row.recoveryIv!, + }; + } + let masterKey: Uint8Array; try { - masterKey = await unwrapMasterKey(row.wrappedMk, row.wrapIv); + masterKey = await unwrapMasterKey(row.wrappedMk!, row.wrapIv!); } catch (err) { // Auth-tag mismatch, wrong KEK, malformed row — all the same // to the caller (500), but we want a clear audit trail. @@ -154,6 +209,20 @@ export class EncryptionVaultService { */ async rotate(userId: string, ctx: AuditContext = {}): Promise { return this.withUserScope(userId, async (tx) => { + // Rotate is forbidden in zero-knowledge mode — the server can't + // re-wrap a key it can't read. The client has to disable + // zero-knowledge first (which restores a server-side wrap), + // then call rotate, then re-enable if desired. + const existing = await tx + .select() + .from(encryptionVaults) + .where(eq(encryptionVaults.userId, userId)) + .limit(1); + if (existing.length > 0 && existing[0].zeroKnowledge) { + await this.writeAudit(tx, userId, 'rotate', ctx, 409, 'zk-rotate-forbidden'); + throw new ZeroKnowledgeRotateForbidden(userId); + } + const mkBytes = generateMasterKey(); const { wrappedMk, wrapIv } = await wrapMasterKey(mkBytes); @@ -164,6 +233,13 @@ export class EncryptionVaultService { wrapIv, kekId: activeKekId(), rotatedAt: new Date(), + // Rotation also wipes any existing recovery wrap — the + // new MK has nothing to do with the old one, so the old + // recovery code would unwrap into garbage. The user has + // to set up a fresh recovery code after rotating. + recoveryWrappedMk: null, + recoveryIv: null, + recoverySetAt: null, }) .where(eq(encryptionVaults.userId, userId)) .returning(); @@ -186,6 +262,186 @@ export class EncryptionVaultService { }); } + // ─── Phase 9: Recovery Wrap + Zero-Knowledge ───────────── + + /** + * Stores (or replaces) the user's recovery wrap. The client builds + * the wrap locally — derives a key from the recovery secret, AES-GCM + * encrypts the master key, sends only the resulting ciphertext + IV. + * The recovery secret itself NEVER touches the wire. + * + * Storing a recovery wrap does NOT enable zero-knowledge mode by + * itself — the user has to follow up with `enableZeroKnowledge` to + * actually delete the server-side wrap. This two-step setup gives + * the UI room to confirm the recovery code is backed up before + * making the rotation irreversible. + * + * Idempotent: calling twice replaces the previous recovery wrap. + * Use case: user re-prints the recovery code with a fresh secret. + */ + async setRecoveryWrap( + userId: string, + input: RecoveryWrapInput, + ctx: AuditContext = {} + ): Promise { + return this.withUserScope(userId, async (tx) => { + const updated = await tx + .update(encryptionVaults) + .set({ + recoveryWrappedMk: input.recoveryWrappedMk, + recoveryIv: input.recoveryIv, + recoveryFormatVersion: 1, + recoverySetAt: new Date(), + }) + .where(eq(encryptionVaults.userId, userId)) + .returning(); + + if (updated.length === 0) { + await this.writeAudit(tx, userId, 'recovery_set', ctx, 404, 'no-vault'); + throw new VaultNotFoundError(userId); + } + + await this.writeAudit(tx, userId, 'recovery_set', ctx, 200, null); + }); + } + + /** + * Removes the recovery wrap. Forbidden in zero-knowledge mode (would + * leave the user with no usable wrap and no way to unlock). + */ + async clearRecoveryWrap(userId: string, ctx: AuditContext = {}): Promise { + return this.withUserScope(userId, async (tx) => { + const existing = await tx + .select() + .from(encryptionVaults) + .where(eq(encryptionVaults.userId, userId)) + .limit(1); + + if (existing.length === 0) { + await this.writeAudit(tx, userId, 'recovery_clear', ctx, 404, 'no-vault'); + throw new VaultNotFoundError(userId); + } + if (existing[0].zeroKnowledge) { + await this.writeAudit(tx, userId, 'recovery_clear', ctx, 409, 'zk-active'); + throw new ZeroKnowledgeActiveError(userId); + } + + await tx + .update(encryptionVaults) + .set({ + recoveryWrappedMk: null, + recoveryIv: null, + recoverySetAt: null, + }) + .where(eq(encryptionVaults.userId, userId)); + + await this.writeAudit(tx, userId, 'recovery_clear', ctx, 200, null); + }); + } + + /** + * Enables zero-knowledge mode. NULLs out wrapped_mk + wrap_iv, + * sets zero_knowledge=true. After this, the server is computationally + * incapable of decrypting the user's data — even with full DB + + * KEK access — until the user provides the recovery code on the + * next unlock. + * + * Precondition: a recovery wrap MUST already be stored. Without it, + * enabling zero-knowledge would lock the user out forever (the CHECK + * constraint enforces this at the DB level too). + * + * This is the destructive step. The UI should require an explicit + * confirmation modal — there is no undo without first calling + * `disableZeroKnowledge`, which itself requires a freshly-unwrapped + * MK from the client side. + */ + async enableZeroKnowledge(userId: string, ctx: AuditContext = {}): Promise { + return this.withUserScope(userId, async (tx) => { + const rows = await tx + .select() + .from(encryptionVaults) + .where(eq(encryptionVaults.userId, userId)) + .limit(1); + + if (rows.length === 0) { + await this.writeAudit(tx, userId, 'zk_enable', ctx, 404, 'no-vault'); + throw new VaultNotFoundError(userId); + } + if (rows[0].zeroKnowledge) { + // Already enabled — idempotent no-op so retried calls don't + // look like errors. + await this.writeAudit(tx, userId, 'zk_enable', ctx, 200, 'already-enabled'); + return; + } + if (!rows[0].recoveryWrappedMk || !rows[0].recoveryIv) { + await this.writeAudit(tx, userId, 'zk_enable', ctx, 400, 'no-recovery-wrap'); + throw new RecoveryWrapMissingError(userId); + } + + await tx + .update(encryptionVaults) + .set({ + zeroKnowledge: true, + wrappedMk: null, + wrapIv: null, + }) + .where(eq(encryptionVaults.userId, userId)); + + await this.writeAudit(tx, userId, 'zk_enable', ctx, 200, null); + }); + } + + /** + * Disables zero-knowledge mode. The client must hand back a fresh + * KEK-friendly master key (i.e. the same MK it just unwrapped with + * the recovery code, re-supplied so the server can KEK-wrap it). + * + * Why doesn't the server generate a new MK? Because that would + * orphan all existing encrypted data. The user-side workflow is: + * 1. Unlock with recovery code (client now has the plaintext MK) + * 2. POST /zero-knowledge/disable with `{ masterKey: base64(MK) }` + * 3. Server KEK-wraps the supplied MK and stores it as wrapped_mk + * 4. zero_knowledge flips back to false + * + * The client SHOULD memzero its copy of the MK bytes after the call. + */ + async disableZeroKnowledge( + userId: string, + mkBytes: Uint8Array, + ctx: AuditContext = {} + ): Promise { + return this.withUserScope(userId, async (tx) => { + const rows = await tx + .select() + .from(encryptionVaults) + .where(eq(encryptionVaults.userId, userId)) + .limit(1); + + if (rows.length === 0) { + await this.writeAudit(tx, userId, 'zk_disable', ctx, 404, 'no-vault'); + throw new VaultNotFoundError(userId); + } + if (!rows[0].zeroKnowledge) { + await this.writeAudit(tx, userId, 'zk_disable', ctx, 200, 'already-disabled'); + return; + } + + const { wrappedMk, wrapIv } = await wrapMasterKey(mkBytes); + + await tx + .update(encryptionVaults) + .set({ + zeroKnowledge: false, + wrappedMk, + wrapIv, + kekId: activeKekId(), + }) + .where(eq(encryptionVaults.userId, userId)); + + await this.writeAudit(tx, userId, 'zk_disable', ctx, 200, null); + }); + } + // ─── Internals ─────────────────────────────────────────── /** @@ -215,7 +471,15 @@ export class EncryptionVaultService { private async writeAudit( tx: Parameters[0]>[0], userId: string, - action: 'init' | 'fetch' | 'rotate' | 'failed_fetch', + action: + | 'init' + | 'fetch' + | 'rotate' + | 'failed_fetch' + | 'recovery_set' + | 'recovery_clear' + | 'zk_enable' + | 'zk_disable', ctx: AuditContext, status: number, context: string | null @@ -245,5 +509,39 @@ export class VaultNotFoundError extends Error { } } +/** + * Thrown when the client tries to enable zero-knowledge mode without + * first storing a recovery wrap. Routes convert to 400. + */ +export class RecoveryWrapMissingError extends Error { + constructor(public userId: string) { + super(`cannot enable zero-knowledge mode: no recovery wrap stored for user ${userId}`); + this.name = 'RecoveryWrapMissingError'; + } +} + +/** + * Thrown when the client tries to clear the recovery wrap while + * zero-knowledge mode is active (would lock the user out). Routes + * convert to 409. + */ +export class ZeroKnowledgeActiveError extends Error { + constructor(public userId: string) { + super(`cannot clear recovery wrap while zero-knowledge mode is active for user ${userId}`); + this.name = 'ZeroKnowledgeActiveError'; + } +} + +/** + * Thrown when rotate() is called on a vault in zero-knowledge mode. + * Routes convert to 409. + */ +export class ZeroKnowledgeRotateForbidden extends Error { + constructor(public userId: string) { + super(`cannot rotate master key in zero-knowledge mode for user ${userId}`); + this.name = 'ZeroKnowledgeRotateForbidden'; + } +} + /** Re-export the type for route handlers. */ export type { EncryptionVault };