From c7af693c6dd7386ff8a18d632384206c14b7ef7a Mon Sep 17 00:00:00 2001 From: Till JS Date: Mon, 20 Apr 2026 14:36:32 +0200 Subject: [PATCH] =?UTF-8?q?feat(crypto):=20Phase=20C=20=E2=80=94=20build-t?= =?UTF-8?q?ime=20registry=20=E2=86=94=20Dexie=20audit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before: adding a new Dexie table left the encryption decision implicit. If you forgot to register it, the table silently shipped in plaintext forever — no error, no warning, no footprint anywhere. The architecture audit flagged this as the root of Concern 1. - `scripts/audit-crypto-registry.mjs` parses database.ts's `.stores()` blocks and registry.ts's entries, then enforces three invariants: 1. Every Dexie table is either in the encryption registry OR in the new `plaintext-allowlist.ts` — one conscious classification per table. 2. No dead registry entries (referring to tables that no longer exist in Dexie). 3. No table appears in both — single authoritative source. - `plaintext-allowlist.ts` auto-seeded from current state. 105 entries, each tagged `// TODO: audit` as an invitation to review whether the table truly holds nothing sensitive. The allowlist is intentionally a separate file so additions are reviewable on their own (not buried inside database.ts schema bumps). - Wired into `pnpm run check:crypto` + CI validate job — a new table now fails the PR check instead of slipping past review. - `check:crypto:seed` regenerates the allowlist if ever needed. Verified: drift simulation (removing aiMissions from the allowlist) fails the audit with a clear message pointing at the missing classification. Current state passes: 187 Dexie tables, 82 encrypted, 105 explicit plaintext. Concern 1 is now fully closed (A: typed registry entries, B: dev-mode runtime drift check, C: build-time audit enforcing coverage). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/ci.yml | 3 + .../lib/data/crypto/plaintext-allowlist.ts | 125 ++++++++ .../apps/web/src/lib/data/crypto/registry.ts | 8 +- package.json | 2 + scripts/audit-crypto-registry.mjs | 271 ++++++++++++++++++ 5 files changed, 406 insertions(+), 3 deletions(-) create mode 100644 apps/mana/apps/web/src/lib/data/crypto/plaintext-allowlist.ts create mode 100755 scripts/audit-crypto-registry.mjs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eb057949a..097031b40 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -440,6 +440,9 @@ jobs: - name: Validate monorepo best practices run: pnpm run validate:monorepo + - name: Audit crypto registry (Dexie ↔ registry ↔ allowlist) + run: pnpm run check:crypto + - name: Type check run: pnpm run type-check diff --git a/apps/mana/apps/web/src/lib/data/crypto/plaintext-allowlist.ts b/apps/mana/apps/web/src/lib/data/crypto/plaintext-allowlist.ts new file mode 100644 index 000000000..9b9272e16 --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/crypto/plaintext-allowlist.ts @@ -0,0 +1,125 @@ +/** + * Plaintext allowlist — Dexie tables that are intentionally NOT encrypted. + * + * Counterpart to ENCRYPTION_REGISTRY in crypto/registry.ts. The audit script + * (`scripts/audit-crypto-registry.mjs`, wired as `pnpm run check:crypto`) + * fails if a Dexie table is in neither list. + * + * Why a separate file: adding a table here is a conscious security decision + * ("this genuinely holds no user-sensitive data") and should be reviewable + * as its own diff, not buried inside database.ts. + * + * Auto-seeded from current state on 2026-04-20 — every entry below was + * introduced before the audit script existed. The `// TODO: audit` markers + * are an invitation to review each one: does this table really hold nothing + * that would embarrass the user if it leaked? If not, move it to the + * encryption registry. + */ + +export const PLAINTEXT_ALLOWLIST: readonly string[] = [ + 'achievements', // TODO: audit + 'activities', // TODO: audit + 'aiMissions', // TODO: audit + 'albumItems', // TODO: audit + 'albums', // TODO: audit + 'automations', // TODO: audit + 'boardViews', // TODO: audit + 'budgets', // TODO: audit + 'calculations', // TODO: audit + 'calendars', // TODO: audit + 'ccFavorites', // TODO: audit + 'ccLocationTags', // TODO: audit + 'ccLocations', // TODO: audit + 'cities', // TODO: audit + 'companionConversations', // TODO: audit + 'companionGoals', // TODO: audit + 'companionMessages', // TODO: audit + 'contactTags', // TODO: audit + 'contextSpaces', // TODO: audit + 'conversationTags', // TODO: audit + 'customQuotes', // TODO: audit + 'dashboardConfigs', // TODO: audit + 'deckTags', // TODO: audit + 'documentTags', // TODO: audit + 'dreamTags', // TODO: audit + 'entryTags', // TODO: audit + 'eventInvitations', // TODO: audit + 'eventItems', // TODO: audit + 'eventTags', // TODO: audit + 'fileTags', // TODO: audit + 'financeCategories', // TODO: audit + 'foodFavorites', // TODO: audit + 'globalTags', // TODO: audit + 'goals', // TODO: audit + 'guideCollections', // TODO: audit + 'guideTags', // TODO: audit + 'habitLogs', // TODO: audit + 'habits', // TODO: audit + 'imageTags', // TODO: audit + 'invCategories', // TODO: audit + 'invCollections', // TODO: audit + 'invItemTags', // TODO: audit + 'invLocations', // TODO: audit + 'linkTags', // TODO: audit + 'manaLinks', // TODO: audit + 'markers', // TODO: audit + 'mealTags', // TODO: audit + 'memoSpaces', // TODO: audit + 'memoTags', // TODO: audit + 'memoroSpaces', // TODO: audit + 'moodTags', // TODO: audit + 'moods', // TODO: audit + 'mukkeProjects', // TODO: audit + 'newsCachedFeed', // TODO: audit + 'noteTags', // TODO: audit + 'pendingProposals', // TODO: audit + 'periodSymptoms', // TODO: audit + 'photoFavorites', // TODO: audit + 'photoMediaTags', // TODO: audit + 'placeTags', // TODO: audit + 'plantPhotos', // TODO: audit + 'plantTags', // TODO: audit + 'playlistSongs', // TODO: audit + 'presiDeckTags', // TODO: audit + 'qCollections', // TODO: audit + 'questionTags', // TODO: audit + 'quizAttempts', // TODO: audit + 'quotesFavorites', // TODO: audit + 'quotesListTags', // TODO: audit + 'quotesLists', // TODO: audit + 'reminders', // TODO: audit + 'ritualLogs', // TODO: audit + 'ritualSteps', // TODO: audit + 'rituals', // TODO: audit + 'runs', // TODO: audit + 'savedFormulas', // TODO: audit + 'sequences', // TODO: audit + 'skillTags', // TODO: audit + 'skills', // TODO: audit + 'songTags', // TODO: audit + 'spaceMembers', // TODO: audit + 'storageFolders', // TODO: audit + 'tagGroups', // TODO: audit + 'taskLabels', // TODO: audit + 'timeAlarms', // TODO: audit + 'timeBlockTags', // TODO: audit + 'timeClients', // TODO: audit + 'timeCountdownTimers', // TODO: audit + 'timeEntries', // TODO: audit + 'timeProjects', // TODO: audit + 'timeSettings', // TODO: audit + 'timeTemplates', // TODO: audit + 'timeWorldClocks', // TODO: audit + 'todoProjects', // TODO: audit + 'uloadFolders', // TODO: audit + 'uloadTags', // TODO: audit + 'userSettings', // TODO: audit + 'wateringLogs', // TODO: audit + 'wateringSchedules', // TODO: audit + 'wetterLocations', // TODO: audit + 'wetterSettings', // TODO: audit + 'wishesItems', // TODO: audit + 'wishesLists', // TODO: audit + 'wishesPriceChecks', // TODO: audit + 'workbenchScenes', // TODO: audit +]; diff --git a/apps/mana/apps/web/src/lib/data/crypto/registry.ts b/apps/mana/apps/web/src/lib/data/crypto/registry.ts index 5ec8d5bdb..7d26da2f8 100644 --- a/apps/mana/apps/web/src/lib/data/crypto/registry.ts +++ b/apps/mana/apps/web/src/lib/data/crypto/registry.ts @@ -3,9 +3,11 @@ * tables get encrypted. * * Strict allowlist semantics: anything not listed here stays plaintext. - * Adding a new module = adding an entry here. Forgetting to add a field - * means it ships in plaintext, which is the safer failure mode than the - * inverse (a typo'd field name silently failing to decrypt). + * Adding a new module = adding an entry here OR an entry in + * `plaintext-allowlist.ts` (explicit "this table genuinely holds no + * sensitive data"). The `pnpm run check:crypto` audit script enforces + * that every Dexie table appears in exactly one of the two — forgetting + * a new table now fails CI instead of silently shipping plaintext. * * Why a central registry instead of per-module config? * - One pull request to audit ahead of a release: "what is encrypted?" diff --git a/package.json b/package.json index a49ac2f02..300fe83d5 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,8 @@ "format:check": "prettier --config .prettierrc.json --check \"**/*.{ts,tsx,js,jsx,json,md,svelte,astro}\"", "check:status": "bash scripts/check-status.sh", "validate:dockerfiles": "node scripts/validate-dockerfiles.mjs", + "check:crypto": "node scripts/audit-crypto-registry.mjs", + "check:crypto:seed": "node scripts/audit-crypto-registry.mjs --seed", "audit:deps": "node scripts/audit-workspace-deps.mjs", "audit:modules": "node scripts/audit-modules.mjs", "audit:coupling": "node scripts/audit-module-coupling.mjs", diff --git a/scripts/audit-crypto-registry.mjs b/scripts/audit-crypto-registry.mjs new file mode 100755 index 000000000..ed26ad776 --- /dev/null +++ b/scripts/audit-crypto-registry.mjs @@ -0,0 +1,271 @@ +#!/usr/bin/env node +/** + * Audit the encryption-registry ↔ Dexie-schema contract. + * + * Why: without this script, adding a new sensitive table silently ships + * in plaintext — the registry is the only thing that knows which fields + * to encrypt, and forgetting to register a table leaves no footprint + * anywhere. See docs/plans/crypto-audit-phase-c.md in the architecture + * audit. + * + * Invariants enforced: + * 1. Every table declared in Dexie (database.ts) is accounted for as + * either (a) an encryption-registry entry — fields will be encrypted — + * or (b) an explicit plaintext-allowlist entry — someone made a + * conscious call that nothing here needs encryption. + * 2. Every encryption-registry entry refers to a table that still + * exists in Dexie — no dead entries drifting out of sync with the + * schema. + * 3. A table never appears in both the registry AND the allowlist — + * one authoritative classification. + * + * Zero deps — runs as plain Node ESM. + * + * Usage: + * node scripts/audit-crypto-registry.mjs # audit, exit 1 on violation + * node scripts/audit-crypto-registry.mjs --seed # print an allowlist seeded from current state + */ + +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = join(__dirname, '..'); + +const DATABASE_TS = join(REPO_ROOT, 'apps/mana/apps/web/src/lib/data/database.ts'); +const REGISTRY_TS = join(REPO_ROOT, 'apps/mana/apps/web/src/lib/data/crypto/registry.ts'); +const ALLOWLIST_PATH = join( + REPO_ROOT, + 'apps/mana/apps/web/src/lib/data/crypto/plaintext-allowlist.ts' +); + +/** + * Extracts all Dexie table names declared across every `.version(N).stores({...})` + * block. A table is "live" if it appears at least once with a non-null schema + * spec. Dexie uses `tableName: null` to drop a table in a later version — we + * treat the latest mention as authoritative. + * + * We deliberately skip the internal sync-infrastructure tables (names starting + * with `_`): they're not user data and are managed by the sync engine, not + * the module stores. + */ +function extractDexieTables(source) { + // Capture every `db.version(N).stores({ ... })` body. Using [\s\S] because + // bodies span multiple lines; lazy match so consecutive blocks don't merge. + const storesRegex = /db\.version\(\d+\)\.stores\(\{([\s\S]*?)\}\)/g; + const seen = new Map(); // tableName → { version, isNull } + let match; + let blockIdx = 0; + while ((match = storesRegex.exec(source)) !== null) { + blockIdx++; + const body = match[1]; + // Each entry is `tableName: 'spec'` or `tableName: null`. + // Comments (// ...) in between are allowed; strip them first. + const stripped = body.replace(/\/\/[^\n]*/g, ''); + const entryRegex = /(\w+)\s*:\s*(null\b|['"])/g; + let e; + while ((e = entryRegex.exec(stripped)) !== null) { + const name = e[1]; + const isNull = e[2] === 'null'; + seen.set(name, { block: blockIdx, isNull }); + } + } + return [...seen.entries()] + .filter(([name, meta]) => !meta.isNull && !name.startsWith('_')) + .map(([name]) => name) + .sort(); +} + +/** + * Extracts the registered table names from ENCRYPTION_REGISTRY. Accepts both + * shapes that can legally appear: + * messages: entry(['messageText']), + * conversations: { enabled: true, fields: ['title'] }, + * Handles block-comment prefixes and trailing comments. + */ +function extractRegistryKeys(source) { + // Isolate the ENCRYPTION_REGISTRY literal body. + const start = source.indexOf('ENCRYPTION_REGISTRY'); + if (start < 0) throw new Error('ENCRYPTION_REGISTRY not found in registry.ts'); + const braceOpen = source.indexOf('{', start); + if (braceOpen < 0) throw new Error('Registry opening brace not found'); + // Walk to matching closing brace (ignoring braces in strings/comments is + // overkill here — the registry content is well-behaved). + let depth = 0; + let braceClose = -1; + for (let i = braceOpen; i < source.length; i++) { + const c = source[i]; + if (c === '{') depth++; + else if (c === '}') { + depth--; + if (depth === 0) { + braceClose = i; + break; + } + } + } + if (braceClose < 0) throw new Error('Registry closing brace not found'); + const body = source.slice(braceOpen + 1, braceClose); + // Strip line and block comments. + const stripped = body.replace(/\/\*[\s\S]*?\*\//g, '').replace(/\/\/[^\n]*/g, ''); + // Collect keys at depth 0 (direct children of the registry object). + const keys = new Set(); + let depth2 = 0; + let buf = ''; + for (let i = 0; i < stripped.length; i++) { + const c = stripped[i]; + if (c === '{' || c === '[' || c === '(') { + if (depth2 === 0) buf += c; // keep for parseTopLevel() + depth2++; + } else if (c === '}' || c === ']' || c === ')') { + depth2--; + if (depth2 === 0) buf += c; + } else if (depth2 === 0) { + buf += c; + } + } + // Now `buf` has only depth-0 material. Split on commas and extract keys. + for (const seg of buf.split(',')) { + const m = seg.match(/^\s*([A-Za-z_$][\w$]*)\s*:/); + if (m) keys.add(m[1]); + } + return [...keys].sort(); +} + +/** + * Loads the plaintext allowlist. Returns an array of strings. If the file + * doesn't exist yet, returns []. The file is a plain TS module exporting + * `PLAINTEXT_ALLOWLIST` as a string-array literal — we parse it with a + * coarse regex rather than importing it (this script is plain Node, not + * TypeScript-aware). + */ +function loadAllowlist() { + let source; + try { + source = readFileSync(ALLOWLIST_PATH, 'utf8'); + } catch (err) { + if (err.code === 'ENOENT') return []; + throw err; + } + // Strip comments so the match doesn't pick up commented-out entries. + const stripped = source.replace(/\/\*[\s\S]*?\*\//g, '').replace(/\/\/[^\n]*/g, ''); + // Anchor on `= [` to avoid matching the `string[]` type annotation's `[]`. + const m = stripped.match(/PLAINTEXT_ALLOWLIST[\s\S]*?=\s*\[([\s\S]*?)\]/); + if (!m) return []; + const out = new Set(); + const entryRe = /['"]([^'"]+)['"]/g; + let e; + while ((e = entryRe.exec(m[1])) !== null) out.add(e[1]); + return [...out].sort(); +} + +function audit() { + const dbSource = readFileSync(DATABASE_TS, 'utf8'); + const regSource = readFileSync(REGISTRY_TS, 'utf8'); + const dexieTables = extractDexieTables(dbSource); + const registryKeys = new Set(extractRegistryKeys(regSource)); + const allowlist = new Set(loadAllowlist()); + + const violations = []; + + // Invariant 1: every Dexie table is classified somewhere. + for (const t of dexieTables) { + const inRegistry = registryKeys.has(t); + const inAllowlist = allowlist.has(t); + if (!inRegistry && !inAllowlist) { + violations.push( + `UNCLASSIFIED: Dexie table '${t}' is neither encrypted (registry.ts) ` + + `nor explicitly plaintext (plaintext-allowlist.ts). ` + + `Pick one — if unsure, default to encrypting it.` + ); + } + if (inRegistry && inAllowlist) { + violations.push( + `DOUBLE-CLASSIFIED: table '${t}' is in both registry.ts AND ` + + `plaintext-allowlist.ts. Remove it from one.` + ); + } + } + + // Invariant 2: every registry entry corresponds to a real Dexie table. + const dexieSet = new Set(dexieTables); + for (const k of registryKeys) { + if (!dexieSet.has(k)) { + violations.push( + `DEAD REGISTRY ENTRY: '${k}' is registered for encryption but no longer ` + + `exists in database.ts. Remove the registry entry or re-add the Dexie table.` + ); + } + } + // Invariant 2b: same for allowlist. + for (const a of allowlist) { + if (!dexieSet.has(a)) { + violations.push( + `DEAD ALLOWLIST ENTRY: '${a}' is in plaintext-allowlist.ts but no longer ` + + `exists in database.ts. Remove it.` + ); + } + } + + if (violations.length > 0) { + console.error(`\n✗ Crypto registry audit FAILED (${violations.length} violation(s)):\n`); + for (const v of violations) console.error(` • ${v}`); + console.error( + `\nSummary: ${dexieTables.length} Dexie tables, ${registryKeys.size} registry entries, ` + + `${allowlist.size} allowlist entries.\n` + ); + process.exit(1); + } + + console.log( + `✓ Crypto registry audit passed: ${dexieTables.length} Dexie tables all classified ` + + `(${registryKeys.size} encrypted, ${allowlist.size} allowlisted plaintext).` + ); +} + +/** + * Emit a starter allowlist based on the current gap between Dexie and the + * registry. Writes to stdout — redirect into plaintext-allowlist.ts and + * commit. Every entry gets a TODO marker so subsequent contributors know + * these were auto-seeded, not audited. + */ +function seed() { + const dbSource = readFileSync(DATABASE_TS, 'utf8'); + const regSource = readFileSync(REGISTRY_TS, 'utf8'); + const dexieTables = extractDexieTables(dbSource); + const registryKeys = new Set(extractRegistryKeys(regSource)); + const unclassified = dexieTables.filter((t) => !registryKeys.has(t)); + + const header = [ + '/**', + ' * Plaintext allowlist — Dexie tables that are intentionally NOT encrypted.', + ' *', + ' * Counterpart to ENCRYPTION_REGISTRY in crypto/registry.ts. The audit script', + ' * (`scripts/audit-crypto-registry.mjs`, wired as `pnpm run check:crypto`)', + ' * fails if a Dexie table is in neither list.', + ' *', + ' * Why a separate file: adding a table here is a conscious security decision', + ' * ("this genuinely holds no user-sensitive data") and should be reviewable', + ' * as its own diff, not buried inside database.ts.', + ' *', + ' * Auto-seeded from current state on 2026-04-20 — every entry below was', + ' * introduced before the audit script existed. The `// TODO: audit` markers', + ' * are an invitation to review each one: does this table really hold nothing', + ' * that would embarrass the user if it leaked? If not, move it to the', + ' * encryption registry.', + ' */', + '', + 'export const PLAINTEXT_ALLOWLIST: readonly string[] = [', + ].join('\n'); + const body = unclassified.map((t) => `\t'${t}', // TODO: audit`).join('\n'); + const footer = '\n];\n'; + process.stdout.write(header + '\n' + body + footer); +} + +const arg = process.argv[2]; +if (arg === '--seed') { + seed(); +} else { + audit(); +}