fix(personas): exact tool_use_id pairing + CI drift audit

Two loose ends from M3/M4: 1. Tool_use_id-based error attribution in the persona-runner ----------------------------------------------------------- The previous collectActionsFromMessage() flipped the *most recent* ActionRow to 'error' when a tool_result carried is_error:true. That was fine as long as Claude invoked tools strictly in sequence, but when the planner pipelines multiple tools in one turn, a later tool_result carries an earlier tool_use_id — the last-action fallback mis- attributes the error. runMainTurn() now keeps a tool_use_id → action-index Map for the duration of the tick. On tool_use we stash block.id, on tool_result we look up the exact ActionRow via tool_use_id and flip that one. The "flip last" path survives as a pure fallback if a future SDK ever ships a block without an id. 2. New audit:encrypted-tools script ----------------------------------- scripts/audit-encrypted-tools.ts — loads registerAllModules() and apps/mana/…/crypto/registry.ts, diffs every ToolSpec.encryptedFields against the authoritative web-app ENCRYPTION_REGISTRY. Catches three classes of drift: - missing-table : tool declares a table the web-app doesn't encrypt - field-drift : both agree a table is encrypted but the field lists differ (half-encryption in the wire is silent death) - disabled : web-app has enabled:false while the tool still encrypts — advisory warning, not a fail Negative-tested by injecting a deliberate drift on todo.create + todo.list (shortened ENCRYPTED_FIELDS to ['title']); the auditor flagged both tools with full field diffs, restore returned to green. Wired into `pnpm run validate:all` so the contract survives future edits on either side. Fills the M4 audit gap noted in project_mana_mcp_personas.md. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 19:01:08 +02:00 · 2026-04-23 15:34:52 +02:00 · 2026-04-23 15:34:52 +02:00 · eb8fac23ec
commit eb8fac23ec
parent 703ef69ca9
3 changed files with 150 additions and 12 deletions
--- a/package.json
+++ b/package.json
@ -24,9 +24,10 @@
 		"validate:theme-variables": "node scripts/validate-theme-variables.mjs",
 		"validate:theme-utilities": "node scripts/validate-theme-utilities.mjs",
 		"validate:theme-parity": "node scripts/validate-theme-parity.mjs",
-		"validate:all": "pnpm run validate:turbo && pnpm run validate:pg-schema && pnpm run validate:theme-variables && pnpm run validate:theme-utilities && pnpm run validate:theme-parity && pnpm run check:crypto",
+		"validate:all": "pnpm run validate:turbo && pnpm run validate:pg-schema && pnpm run validate:theme-variables && pnpm run validate:theme-utilities && pnpm run validate:theme-parity && pnpm run check:crypto && pnpm run audit:encrypted-tools",
 		"check:crypto": "node scripts/audit-crypto-registry.mjs",
 		"check:crypto:seed": "node scripts/audit-crypto-registry.mjs --seed",
+		"audit:encrypted-tools": "bun run scripts/audit-encrypted-tools.ts",
 		"audit:deps": "node scripts/audit-workspace-deps.mjs",
 		"audit:modules": "node scripts/audit-modules.mjs",
 		"audit:coupling": "node scripts/audit-module-coupling.mjs",
--- a/scripts/audit-encrypted-tools.ts
+++ b/scripts/audit-encrypted-tools.ts
@ -0,0 +1,124 @@
+#!/usr/bin/env bun
+/**
+ * Audit the encryption-fields contract between the web-app
+ * `ENCRYPTION_REGISTRY` (Dexie hook) and each `ToolSpec.encryptedFields`
+ * declaration in `@mana/tool-registry`.
+ *
+ * Why: when a server-side tool encrypts fewer fields than the web app,
+ * records written by the persona-runner look "encrypted enough" on the
+ * wire but arrive plaintext in the columns the web app expected to
+ * encrypt — silent data leak into sync rows. When the tool encrypts
+ * MORE than the web app, the web app can't decrypt its own reads.
+ *
+ * Invariants:
+ *   1. For every ToolSpec with `encryptedFields: {table, fields}`, the
+ *      web-app registry has an entry for `table`.
+ *   2. The set of fields declared on the spec matches exactly — same
+ *      names, same cardinality. Extra / missing fields on either side
+ *      fail the audit.
+ *   3. (Advisory) If the web-app registry marks a table as
+ *      `enabled: false`, encrypted tool handlers are pointless — flag
+ *      but don't fail (opt-in rollout may intentionally stage these).
+ *
+ * Usage:
+ *   bun run scripts/audit-encrypted-tools.ts            # exit 1 on drift
+ *   pnpm run audit:encrypted-tools                      # same
+ *
+ * Wired into `pnpm run validate:all`.
+ */
+
+import {
+	registerAllModules,
+	getRegistry,
+	__resetRegistryForTests,
+} from '../packages/mana-tool-registry/src/index.ts';
+import { ENCRYPTION_REGISTRY } from '../apps/mana/apps/web/src/lib/data/crypto/registry.ts';
+
+interface Violation {
+	tool: string;
+	kind: 'missing-table' | 'field-drift' | 'disabled';
+	detail: string;
+}
+
+function auditEncryptedTools(): Violation[] {
+	__resetRegistryForTests();
+	registerAllModules();
+
+	const violations: Violation[] = [];
+
+	for (const tool of getRegistry()) {
+		if (!tool.encryptedFields) continue;
+
+		const { table, fields } = tool.encryptedFields;
+		const webAppEntry = ENCRYPTION_REGISTRY[table];
+
+		if (!webAppEntry) {
+			violations.push({
+				tool: tool.name,
+				kind: 'missing-table',
+				detail: `table "${table}" has no entry in apps/mana/.../crypto/registry.ts`,
+			});
+			continue;
+		}
+
+		const actual = [...fields].sort().join(',');
+		const expected = [...webAppEntry.fields].sort().join(',');
+
+		if (actual !== expected) {
+			violations.push({
+				tool: tool.name,
+				kind: 'field-drift',
+				detail:
+					`tool encrypts [${actual}], web-app encrypts [${expected}] — ` +
+					`both sides must agree or the record is half-encrypted`,
+			});
+			continue;
+		}
+
+		if (webAppEntry.enabled === false) {
+			violations.push({
+				tool: tool.name,
+				kind: 'disabled',
+				detail:
+					`web-app registry has enabled:false for "${table}" — ` +
+					`tool will encrypt but app will write/read plaintext, wire records will mix`,
+			});
+		}
+	}
+
+	return violations;
+}
+
+const violations = auditEncryptedTools();
+
+if (violations.length === 0) {
+	const total = getRegistry().filter((t) => t.encryptedFields).length;
+	console.log(`✓ encrypted-tools audit: ${total} tool(s) match the web-app registry.`);
+	process.exit(0);
+}
+
+// Split by kind for readability
+const fatals = violations.filter((v) => v.kind !== 'disabled');
+const warnings = violations.filter((v) => v.kind === 'disabled');
+
+if (warnings.length > 0) {
+	console.warn(`⚠ ${warnings.length} advisory warning(s):`);
+	for (const w of warnings) console.warn(`   · ${w.tool}: ${w.detail}`);
+	console.warn('');
+}
+
+if (fatals.length === 0) {
+	console.log('✓ no hard violations.');
+	process.exit(0);
+}
+
+console.error(`✗ encrypted-tools audit found ${fatals.length} violation(s):`);
+for (const v of fatals) {
+	console.error(`   · ${v.tool}  [${v.kind}]`);
+	console.error(`       ${v.detail}`);
+}
+console.error('');
+console.error(
+	'Fix: either update the ToolSpec.encryptedFields or the web-app registry — both must agree.'
+);
+process.exit(1);
--- a/services/mana-persona-runner/src/runner/claude-session.ts
+++ b/services/mana-persona-runner/src/runner/claude-session.ts
@ -96,8 +96,16 @@ export async function runMainTurn(input: SessionInput): Promise<SessionResult> {
 		},
 	});

+	// Per-turn Map so tool_result blocks can flip the *right* ActionRow
+	// (the one whose tool_use_id they reference), not "the most recent".
+	// Anthropic's stream interleaves tool_use and tool_result blocks
+	// deterministically, but when Claude pipelines multiple tools in one
+	// assistant turn, a later tool_result carries an earlier tool_use_id
+	// — the last-action fallback gets that wrong.
+	const toolUseIndex = new Map<string, number>();
+
 	for await (const msg of q as AsyncIterable<SDKMessage>) {
-		collectActionsFromMessage(msg, input.tickId, actions, modulesUsed);
+		collectActionsFromMessage(msg, input.tickId, actions, modulesUsed, toolUseIndex);
 	}

 	return { actions, feedback: [], modulesUsed };
@ -143,7 +151,8 @@ function collectActionsFromMessage(
 	msg: SDKMessage,
 	tickId: string,
 	actions: ActionRow[],
-	modulesUsed: Set<string>
+	modulesUsed: Set<string>,
+	toolUseIndex: Map<string, number>
 ): void {
 	// SDKMessage is a big union; we only care about assistant messages
 	// that contain tool_use blocks, and user messages that contain
@ -168,19 +177,23 @@ function collectActionsFromMessage(
 				inputHash: hashInput(block.input),
 				result: 'ok', // provisional; rewritten on matching tool_result if it was an error
 			});
+			// Record the Anthropic-assigned id so a later tool_result can
+			// find its row even if other tools finished in between.
+			const toolUseId = typeof block.id === 'string' ? block.id : null;
+			if (toolUseId) toolUseIndex.set(toolUseId, actions.length - 1);
 		} else if (blockType === 'tool_result') {
 			const isError = block.is_error === true;
 			if (!isError) continue;
-			// Flip the most recent action that matches this tool_use_id.
 			const toolUseId = typeof block.tool_use_id === 'string' ? block.tool_use_id : null;
-			if (!toolUseId) continue;
-			// We didn't store tool_use_id (would require pairing state); cheap
-			// fallback: mark the last action as error. Good enough for the
-			// audit dashboard; precise attribution lands in a later iteration.
-			const last = actions[actions.length - 1];
-			if (last) {
-				last.result = 'error';
-				last.errorMessage = stringifyBlock(block);
+			// Exact pairing via tool_use_id → fall back to last action only
+			// if the id is missing (shouldn't happen with the current SDK,
+			// but the fallback keeps the pipeline honest if Anthropic ever
+			// ships a block without an id).
+			const idx = toolUseId !== null ? toolUseIndex.get(toolUseId) : undefined;
+			const target = idx !== undefined ? actions[idx] : actions[actions.length - 1];
+			if (target) {
+				target.result = 'error';
+				target.errorMessage = stringifyBlock(block);
 			}
 		}
 	}