From eb8fac23ecf3d1aa6141ff4658f77bea2f248716 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 23 Apr 2026 15:34:52 +0200 Subject: [PATCH] fix(personas): exact tool_use_id pairing + CI drift audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two loose ends from M3/M4: 1. Tool_use_id-based error attribution in the persona-runner ----------------------------------------------------------- The previous collectActionsFromMessage() flipped the *most recent* ActionRow to 'error' when a tool_result carried is_error:true. That was fine as long as Claude invoked tools strictly in sequence, but when the planner pipelines multiple tools in one turn, a later tool_result carries an earlier tool_use_id — the last-action fallback mis- attributes the error. runMainTurn() now keeps a tool_use_id → action-index Map for the duration of the tick. On tool_use we stash block.id, on tool_result we look up the exact ActionRow via tool_use_id and flip that one. The "flip last" path survives as a pure fallback if a future SDK ever ships a block without an id. 2. New audit:encrypted-tools script ----------------------------------- scripts/audit-encrypted-tools.ts — loads registerAllModules() and apps/mana/…/crypto/registry.ts, diffs every ToolSpec.encryptedFields against the authoritative web-app ENCRYPTION_REGISTRY. Catches three classes of drift: - missing-table : tool declares a table the web-app doesn't encrypt - field-drift : both agree a table is encrypted but the field lists differ (half-encryption in the wire is silent death) - disabled : web-app has enabled:false while the tool still encrypts — advisory warning, not a fail Negative-tested by injecting a deliberate drift on todo.create + todo.list (shortened ENCRYPTED_FIELDS to ['title']); the auditor flagged both tools with full field diffs, restore returned to green. Wired into `pnpm run validate:all` so the contract survives future edits on either side. Fills the M4 audit gap noted in project_mana_mcp_personas.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- package.json | 3 +- scripts/audit-encrypted-tools.ts | 124 ++++++++++++++++++ .../src/runner/claude-session.ts | 35 +++-- 3 files changed, 150 insertions(+), 12 deletions(-) create mode 100644 scripts/audit-encrypted-tools.ts diff --git a/package.json b/package.json index d533e60c3..5a7fd9631 100644 --- a/package.json +++ b/package.json @@ -24,9 +24,10 @@ "validate:theme-variables": "node scripts/validate-theme-variables.mjs", "validate:theme-utilities": "node scripts/validate-theme-utilities.mjs", "validate:theme-parity": "node scripts/validate-theme-parity.mjs", - "validate:all": "pnpm run validate:turbo && pnpm run validate:pg-schema && pnpm run validate:theme-variables && pnpm run validate:theme-utilities && pnpm run validate:theme-parity && pnpm run check:crypto", + "validate:all": "pnpm run validate:turbo && pnpm run validate:pg-schema && pnpm run validate:theme-variables && pnpm run validate:theme-utilities && pnpm run validate:theme-parity && pnpm run check:crypto && pnpm run audit:encrypted-tools", "check:crypto": "node scripts/audit-crypto-registry.mjs", "check:crypto:seed": "node scripts/audit-crypto-registry.mjs --seed", + "audit:encrypted-tools": "bun run scripts/audit-encrypted-tools.ts", "audit:deps": "node scripts/audit-workspace-deps.mjs", "audit:modules": "node scripts/audit-modules.mjs", "audit:coupling": "node scripts/audit-module-coupling.mjs", diff --git a/scripts/audit-encrypted-tools.ts b/scripts/audit-encrypted-tools.ts new file mode 100644 index 000000000..d0e119385 --- /dev/null +++ b/scripts/audit-encrypted-tools.ts @@ -0,0 +1,124 @@ +#!/usr/bin/env bun +/** + * Audit the encryption-fields contract between the web-app + * `ENCRYPTION_REGISTRY` (Dexie hook) and each `ToolSpec.encryptedFields` + * declaration in `@mana/tool-registry`. + * + * Why: when a server-side tool encrypts fewer fields than the web app, + * records written by the persona-runner look "encrypted enough" on the + * wire but arrive plaintext in the columns the web app expected to + * encrypt — silent data leak into sync rows. When the tool encrypts + * MORE than the web app, the web app can't decrypt its own reads. + * + * Invariants: + * 1. For every ToolSpec with `encryptedFields: {table, fields}`, the + * web-app registry has an entry for `table`. + * 2. The set of fields declared on the spec matches exactly — same + * names, same cardinality. Extra / missing fields on either side + * fail the audit. + * 3. (Advisory) If the web-app registry marks a table as + * `enabled: false`, encrypted tool handlers are pointless — flag + * but don't fail (opt-in rollout may intentionally stage these). + * + * Usage: + * bun run scripts/audit-encrypted-tools.ts # exit 1 on drift + * pnpm run audit:encrypted-tools # same + * + * Wired into `pnpm run validate:all`. + */ + +import { + registerAllModules, + getRegistry, + __resetRegistryForTests, +} from '../packages/mana-tool-registry/src/index.ts'; +import { ENCRYPTION_REGISTRY } from '../apps/mana/apps/web/src/lib/data/crypto/registry.ts'; + +interface Violation { + tool: string; + kind: 'missing-table' | 'field-drift' | 'disabled'; + detail: string; +} + +function auditEncryptedTools(): Violation[] { + __resetRegistryForTests(); + registerAllModules(); + + const violations: Violation[] = []; + + for (const tool of getRegistry()) { + if (!tool.encryptedFields) continue; + + const { table, fields } = tool.encryptedFields; + const webAppEntry = ENCRYPTION_REGISTRY[table]; + + if (!webAppEntry) { + violations.push({ + tool: tool.name, + kind: 'missing-table', + detail: `table "${table}" has no entry in apps/mana/.../crypto/registry.ts`, + }); + continue; + } + + const actual = [...fields].sort().join(','); + const expected = [...webAppEntry.fields].sort().join(','); + + if (actual !== expected) { + violations.push({ + tool: tool.name, + kind: 'field-drift', + detail: + `tool encrypts [${actual}], web-app encrypts [${expected}] — ` + + `both sides must agree or the record is half-encrypted`, + }); + continue; + } + + if (webAppEntry.enabled === false) { + violations.push({ + tool: tool.name, + kind: 'disabled', + detail: + `web-app registry has enabled:false for "${table}" — ` + + `tool will encrypt but app will write/read plaintext, wire records will mix`, + }); + } + } + + return violations; +} + +const violations = auditEncryptedTools(); + +if (violations.length === 0) { + const total = getRegistry().filter((t) => t.encryptedFields).length; + console.log(`✓ encrypted-tools audit: ${total} tool(s) match the web-app registry.`); + process.exit(0); +} + +// Split by kind for readability +const fatals = violations.filter((v) => v.kind !== 'disabled'); +const warnings = violations.filter((v) => v.kind === 'disabled'); + +if (warnings.length > 0) { + console.warn(`⚠ ${warnings.length} advisory warning(s):`); + for (const w of warnings) console.warn(` · ${w.tool}: ${w.detail}`); + console.warn(''); +} + +if (fatals.length === 0) { + console.log('✓ no hard violations.'); + process.exit(0); +} + +console.error(`✗ encrypted-tools audit found ${fatals.length} violation(s):`); +for (const v of fatals) { + console.error(` · ${v.tool} [${v.kind}]`); + console.error(` ${v.detail}`); +} +console.error(''); +console.error( + 'Fix: either update the ToolSpec.encryptedFields or the web-app registry — both must agree.' +); +process.exit(1); diff --git a/services/mana-persona-runner/src/runner/claude-session.ts b/services/mana-persona-runner/src/runner/claude-session.ts index a7d9f8a70..27547e383 100644 --- a/services/mana-persona-runner/src/runner/claude-session.ts +++ b/services/mana-persona-runner/src/runner/claude-session.ts @@ -96,8 +96,16 @@ export async function runMainTurn(input: SessionInput): Promise { }, }); + // Per-turn Map so tool_result blocks can flip the *right* ActionRow + // (the one whose tool_use_id they reference), not "the most recent". + // Anthropic's stream interleaves tool_use and tool_result blocks + // deterministically, but when Claude pipelines multiple tools in one + // assistant turn, a later tool_result carries an earlier tool_use_id + // — the last-action fallback gets that wrong. + const toolUseIndex = new Map(); + for await (const msg of q as AsyncIterable) { - collectActionsFromMessage(msg, input.tickId, actions, modulesUsed); + collectActionsFromMessage(msg, input.tickId, actions, modulesUsed, toolUseIndex); } return { actions, feedback: [], modulesUsed }; @@ -143,7 +151,8 @@ function collectActionsFromMessage( msg: SDKMessage, tickId: string, actions: ActionRow[], - modulesUsed: Set + modulesUsed: Set, + toolUseIndex: Map ): void { // SDKMessage is a big union; we only care about assistant messages // that contain tool_use blocks, and user messages that contain @@ -168,19 +177,23 @@ function collectActionsFromMessage( inputHash: hashInput(block.input), result: 'ok', // provisional; rewritten on matching tool_result if it was an error }); + // Record the Anthropic-assigned id so a later tool_result can + // find its row even if other tools finished in between. + const toolUseId = typeof block.id === 'string' ? block.id : null; + if (toolUseId) toolUseIndex.set(toolUseId, actions.length - 1); } else if (blockType === 'tool_result') { const isError = block.is_error === true; if (!isError) continue; - // Flip the most recent action that matches this tool_use_id. const toolUseId = typeof block.tool_use_id === 'string' ? block.tool_use_id : null; - if (!toolUseId) continue; - // We didn't store tool_use_id (would require pairing state); cheap - // fallback: mark the last action as error. Good enough for the - // audit dashboard; precise attribution lands in a later iteration. - const last = actions[actions.length - 1]; - if (last) { - last.result = 'error'; - last.errorMessage = stringifyBlock(block); + // Exact pairing via tool_use_id → fall back to last action only + // if the id is missing (shouldn't happen with the current SDK, + // but the fallback keeps the pipeline honest if Anthropic ever + // ships a block without an id). + const idx = toolUseId !== null ? toolUseIndex.get(toolUseId) : undefined; + const target = idx !== undefined ? actions[idx] : actions[actions.length - 1]; + if (target) { + target.result = 'error'; + target.errorMessage = stringifyBlock(block); } } }