mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:01:08 +02:00
fix(personas): exact tool_use_id pairing + CI drift audit
Two loose ends from M3/M4:
1. Tool_use_id-based error attribution in the persona-runner
-----------------------------------------------------------
The previous collectActionsFromMessage() flipped the *most recent*
ActionRow to 'error' when a tool_result carried is_error:true. That was
fine as long as Claude invoked tools strictly in sequence, but when
the planner pipelines multiple tools in one turn, a later tool_result
carries an earlier tool_use_id — the last-action fallback mis-
attributes the error.
runMainTurn() now keeps a tool_use_id → action-index Map for the
duration of the tick. On tool_use we stash block.id, on tool_result we
look up the exact ActionRow via tool_use_id and flip that one. The
"flip last" path survives as a pure fallback if a future SDK ever
ships a block without an id.
2. New audit:encrypted-tools script
-----------------------------------
scripts/audit-encrypted-tools.ts — loads registerAllModules() and
apps/mana/…/crypto/registry.ts, diffs every ToolSpec.encryptedFields
against the authoritative web-app ENCRYPTION_REGISTRY.
Catches three classes of drift:
- missing-table : tool declares a table the web-app doesn't encrypt
- field-drift : both agree a table is encrypted but the field lists
differ (half-encryption in the wire is silent death)
- disabled : web-app has enabled:false while the tool still
encrypts — advisory warning, not a fail
Negative-tested by injecting a deliberate drift on todo.create +
todo.list (shortened ENCRYPTED_FIELDS to ['title']); the auditor
flagged both tools with full field diffs, restore returned to green.
Wired into `pnpm run validate:all` so the contract survives future
edits on either side. Fills the M4 audit gap noted in
project_mana_mcp_personas.md.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
703ef69ca9
commit
eb8fac23ec
3 changed files with 150 additions and 12 deletions
|
|
@ -24,9 +24,10 @@
|
||||||
"validate:theme-variables": "node scripts/validate-theme-variables.mjs",
|
"validate:theme-variables": "node scripts/validate-theme-variables.mjs",
|
||||||
"validate:theme-utilities": "node scripts/validate-theme-utilities.mjs",
|
"validate:theme-utilities": "node scripts/validate-theme-utilities.mjs",
|
||||||
"validate:theme-parity": "node scripts/validate-theme-parity.mjs",
|
"validate:theme-parity": "node scripts/validate-theme-parity.mjs",
|
||||||
"validate:all": "pnpm run validate:turbo && pnpm run validate:pg-schema && pnpm run validate:theme-variables && pnpm run validate:theme-utilities && pnpm run validate:theme-parity && pnpm run check:crypto",
|
"validate:all": "pnpm run validate:turbo && pnpm run validate:pg-schema && pnpm run validate:theme-variables && pnpm run validate:theme-utilities && pnpm run validate:theme-parity && pnpm run check:crypto && pnpm run audit:encrypted-tools",
|
||||||
"check:crypto": "node scripts/audit-crypto-registry.mjs",
|
"check:crypto": "node scripts/audit-crypto-registry.mjs",
|
||||||
"check:crypto:seed": "node scripts/audit-crypto-registry.mjs --seed",
|
"check:crypto:seed": "node scripts/audit-crypto-registry.mjs --seed",
|
||||||
|
"audit:encrypted-tools": "bun run scripts/audit-encrypted-tools.ts",
|
||||||
"audit:deps": "node scripts/audit-workspace-deps.mjs",
|
"audit:deps": "node scripts/audit-workspace-deps.mjs",
|
||||||
"audit:modules": "node scripts/audit-modules.mjs",
|
"audit:modules": "node scripts/audit-modules.mjs",
|
||||||
"audit:coupling": "node scripts/audit-module-coupling.mjs",
|
"audit:coupling": "node scripts/audit-module-coupling.mjs",
|
||||||
|
|
|
||||||
124
scripts/audit-encrypted-tools.ts
Normal file
124
scripts/audit-encrypted-tools.ts
Normal file
|
|
@ -0,0 +1,124 @@
|
||||||
|
#!/usr/bin/env bun
|
||||||
|
/**
|
||||||
|
* Audit the encryption-fields contract between the web-app
|
||||||
|
* `ENCRYPTION_REGISTRY` (Dexie hook) and each `ToolSpec.encryptedFields`
|
||||||
|
* declaration in `@mana/tool-registry`.
|
||||||
|
*
|
||||||
|
* Why: when a server-side tool encrypts fewer fields than the web app,
|
||||||
|
* records written by the persona-runner look "encrypted enough" on the
|
||||||
|
* wire but arrive plaintext in the columns the web app expected to
|
||||||
|
* encrypt — silent data leak into sync rows. When the tool encrypts
|
||||||
|
* MORE than the web app, the web app can't decrypt its own reads.
|
||||||
|
*
|
||||||
|
* Invariants:
|
||||||
|
* 1. For every ToolSpec with `encryptedFields: {table, fields}`, the
|
||||||
|
* web-app registry has an entry for `table`.
|
||||||
|
* 2. The set of fields declared on the spec matches exactly — same
|
||||||
|
* names, same cardinality. Extra / missing fields on either side
|
||||||
|
* fail the audit.
|
||||||
|
* 3. (Advisory) If the web-app registry marks a table as
|
||||||
|
* `enabled: false`, encrypted tool handlers are pointless — flag
|
||||||
|
* but don't fail (opt-in rollout may intentionally stage these).
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* bun run scripts/audit-encrypted-tools.ts # exit 1 on drift
|
||||||
|
* pnpm run audit:encrypted-tools # same
|
||||||
|
*
|
||||||
|
* Wired into `pnpm run validate:all`.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
registerAllModules,
|
||||||
|
getRegistry,
|
||||||
|
__resetRegistryForTests,
|
||||||
|
} from '../packages/mana-tool-registry/src/index.ts';
|
||||||
|
import { ENCRYPTION_REGISTRY } from '../apps/mana/apps/web/src/lib/data/crypto/registry.ts';
|
||||||
|
|
||||||
|
interface Violation {
|
||||||
|
tool: string;
|
||||||
|
kind: 'missing-table' | 'field-drift' | 'disabled';
|
||||||
|
detail: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function auditEncryptedTools(): Violation[] {
|
||||||
|
__resetRegistryForTests();
|
||||||
|
registerAllModules();
|
||||||
|
|
||||||
|
const violations: Violation[] = [];
|
||||||
|
|
||||||
|
for (const tool of getRegistry()) {
|
||||||
|
if (!tool.encryptedFields) continue;
|
||||||
|
|
||||||
|
const { table, fields } = tool.encryptedFields;
|
||||||
|
const webAppEntry = ENCRYPTION_REGISTRY[table];
|
||||||
|
|
||||||
|
if (!webAppEntry) {
|
||||||
|
violations.push({
|
||||||
|
tool: tool.name,
|
||||||
|
kind: 'missing-table',
|
||||||
|
detail: `table "${table}" has no entry in apps/mana/.../crypto/registry.ts`,
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const actual = [...fields].sort().join(',');
|
||||||
|
const expected = [...webAppEntry.fields].sort().join(',');
|
||||||
|
|
||||||
|
if (actual !== expected) {
|
||||||
|
violations.push({
|
||||||
|
tool: tool.name,
|
||||||
|
kind: 'field-drift',
|
||||||
|
detail:
|
||||||
|
`tool encrypts [${actual}], web-app encrypts [${expected}] — ` +
|
||||||
|
`both sides must agree or the record is half-encrypted`,
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (webAppEntry.enabled === false) {
|
||||||
|
violations.push({
|
||||||
|
tool: tool.name,
|
||||||
|
kind: 'disabled',
|
||||||
|
detail:
|
||||||
|
`web-app registry has enabled:false for "${table}" — ` +
|
||||||
|
`tool will encrypt but app will write/read plaintext, wire records will mix`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return violations;
|
||||||
|
}
|
||||||
|
|
||||||
|
const violations = auditEncryptedTools();
|
||||||
|
|
||||||
|
if (violations.length === 0) {
|
||||||
|
const total = getRegistry().filter((t) => t.encryptedFields).length;
|
||||||
|
console.log(`✓ encrypted-tools audit: ${total} tool(s) match the web-app registry.`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split by kind for readability
|
||||||
|
const fatals = violations.filter((v) => v.kind !== 'disabled');
|
||||||
|
const warnings = violations.filter((v) => v.kind === 'disabled');
|
||||||
|
|
||||||
|
if (warnings.length > 0) {
|
||||||
|
console.warn(`⚠ ${warnings.length} advisory warning(s):`);
|
||||||
|
for (const w of warnings) console.warn(` · ${w.tool}: ${w.detail}`);
|
||||||
|
console.warn('');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fatals.length === 0) {
|
||||||
|
console.log('✓ no hard violations.');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.error(`✗ encrypted-tools audit found ${fatals.length} violation(s):`);
|
||||||
|
for (const v of fatals) {
|
||||||
|
console.error(` · ${v.tool} [${v.kind}]`);
|
||||||
|
console.error(` ${v.detail}`);
|
||||||
|
}
|
||||||
|
console.error('');
|
||||||
|
console.error(
|
||||||
|
'Fix: either update the ToolSpec.encryptedFields or the web-app registry — both must agree.'
|
||||||
|
);
|
||||||
|
process.exit(1);
|
||||||
|
|
@ -96,8 +96,16 @@ export async function runMainTurn(input: SessionInput): Promise<SessionResult> {
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Per-turn Map so tool_result blocks can flip the *right* ActionRow
|
||||||
|
// (the one whose tool_use_id they reference), not "the most recent".
|
||||||
|
// Anthropic's stream interleaves tool_use and tool_result blocks
|
||||||
|
// deterministically, but when Claude pipelines multiple tools in one
|
||||||
|
// assistant turn, a later tool_result carries an earlier tool_use_id
|
||||||
|
// — the last-action fallback gets that wrong.
|
||||||
|
const toolUseIndex = new Map<string, number>();
|
||||||
|
|
||||||
for await (const msg of q as AsyncIterable<SDKMessage>) {
|
for await (const msg of q as AsyncIterable<SDKMessage>) {
|
||||||
collectActionsFromMessage(msg, input.tickId, actions, modulesUsed);
|
collectActionsFromMessage(msg, input.tickId, actions, modulesUsed, toolUseIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
return { actions, feedback: [], modulesUsed };
|
return { actions, feedback: [], modulesUsed };
|
||||||
|
|
@ -143,7 +151,8 @@ function collectActionsFromMessage(
|
||||||
msg: SDKMessage,
|
msg: SDKMessage,
|
||||||
tickId: string,
|
tickId: string,
|
||||||
actions: ActionRow[],
|
actions: ActionRow[],
|
||||||
modulesUsed: Set<string>
|
modulesUsed: Set<string>,
|
||||||
|
toolUseIndex: Map<string, number>
|
||||||
): void {
|
): void {
|
||||||
// SDKMessage is a big union; we only care about assistant messages
|
// SDKMessage is a big union; we only care about assistant messages
|
||||||
// that contain tool_use blocks, and user messages that contain
|
// that contain tool_use blocks, and user messages that contain
|
||||||
|
|
@ -168,19 +177,23 @@ function collectActionsFromMessage(
|
||||||
inputHash: hashInput(block.input),
|
inputHash: hashInput(block.input),
|
||||||
result: 'ok', // provisional; rewritten on matching tool_result if it was an error
|
result: 'ok', // provisional; rewritten on matching tool_result if it was an error
|
||||||
});
|
});
|
||||||
|
// Record the Anthropic-assigned id so a later tool_result can
|
||||||
|
// find its row even if other tools finished in between.
|
||||||
|
const toolUseId = typeof block.id === 'string' ? block.id : null;
|
||||||
|
if (toolUseId) toolUseIndex.set(toolUseId, actions.length - 1);
|
||||||
} else if (blockType === 'tool_result') {
|
} else if (blockType === 'tool_result') {
|
||||||
const isError = block.is_error === true;
|
const isError = block.is_error === true;
|
||||||
if (!isError) continue;
|
if (!isError) continue;
|
||||||
// Flip the most recent action that matches this tool_use_id.
|
|
||||||
const toolUseId = typeof block.tool_use_id === 'string' ? block.tool_use_id : null;
|
const toolUseId = typeof block.tool_use_id === 'string' ? block.tool_use_id : null;
|
||||||
if (!toolUseId) continue;
|
// Exact pairing via tool_use_id → fall back to last action only
|
||||||
// We didn't store tool_use_id (would require pairing state); cheap
|
// if the id is missing (shouldn't happen with the current SDK,
|
||||||
// fallback: mark the last action as error. Good enough for the
|
// but the fallback keeps the pipeline honest if Anthropic ever
|
||||||
// audit dashboard; precise attribution lands in a later iteration.
|
// ships a block without an id).
|
||||||
const last = actions[actions.length - 1];
|
const idx = toolUseId !== null ? toolUseIndex.get(toolUseId) : undefined;
|
||||||
if (last) {
|
const target = idx !== undefined ? actions[idx] : actions[actions.length - 1];
|
||||||
last.result = 'error';
|
if (target) {
|
||||||
last.errorMessage = stringifyBlock(block);
|
target.result = 'error';
|
||||||
|
target.errorMessage = stringifyBlock(block);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue