From e38257b93d1ebd320bbd8e0d821ced7fc36d4cb9 Mon Sep 17 00:00:00 2001 From: Till JS Date: Tue, 14 Apr 2026 20:48:53 +0200 Subject: [PATCH] feat(ai): policy-gated tool executor with pendingProposals lifecycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the staging layer that turns AI-attributed tool calls into user-reviewed proposals instead of direct writes. - `data/ai/policy.ts` — per-tool AiPolicy (`auto` | `propose` | `deny`) with module-level defaults and a global fallback. `user` and `system` actors always bypass (they ARE the decision / are trusted subsystems). - `data/ai/proposals/` — Proposal + Intent types, store with create/list/approve/reject/expire. Proposals are local-only (do NOT sync); the approved write syncs through the normal module path. - `tools/executor.ts` routes by actor+policy: `auto` runs directly under `runAsAsync(actor, ...)`, `propose` stages a Proposal carrying rationale + mission metadata, `deny` refuses. `executeToolRaw` bypasses the policy gate — used only on the approval path where consent already exists. Default policy is conservative: read-only and append-only self-state (log_drink, log_meal) auto-execute; everything that mutates user-visible records defaults to propose. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../apps/web/src/lib/data/ai/policy.test.ts | 60 ++++++++ apps/mana/apps/web/src/lib/data/ai/policy.ts | 105 +++++++++++++ .../src/lib/data/ai/proposals/store.test.ts | 138 +++++++++++++++++ .../web/src/lib/data/ai/proposals/store.ts | 145 ++++++++++++++++++ .../web/src/lib/data/ai/proposals/types.ts | 62 ++++++++ .../web/src/lib/data/tools/executor.test.ts | 100 +++++++++++- .../apps/web/src/lib/data/tools/executor.ts | 126 ++++++++++++--- 7 files changed, 716 insertions(+), 20 deletions(-) create mode 100644 apps/mana/apps/web/src/lib/data/ai/policy.test.ts create mode 100644 apps/mana/apps/web/src/lib/data/ai/policy.ts create mode 100644 apps/mana/apps/web/src/lib/data/ai/proposals/store.test.ts create mode 100644 apps/mana/apps/web/src/lib/data/ai/proposals/store.ts create mode 100644 apps/mana/apps/web/src/lib/data/ai/proposals/types.ts diff --git a/apps/mana/apps/web/src/lib/data/ai/policy.test.ts b/apps/mana/apps/web/src/lib/data/ai/policy.test.ts new file mode 100644 index 000000000..e39baab6c --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/policy.test.ts @@ -0,0 +1,60 @@ +import { describe, it, expect } from 'vitest'; +import { resolvePolicy, setAiPolicy, DEFAULT_AI_POLICY } from './policy'; +import { registerTools } from '../tools/registry'; +import type { Actor } from '../events/actor'; + +const AI: Actor = { kind: 'ai', missionId: 'm', iterationId: 'i', rationale: 'r' }; +const USER: Actor = { kind: 'user' }; +const SYSTEM: Actor = { kind: 'system', source: 'projection' }; + +describe('resolvePolicy', () => { + it('always returns auto for user actors', () => { + expect(resolvePolicy('create_task', USER)).toBe('auto'); + expect(resolvePolicy('anything', USER)).toBe('auto'); + }); + + it('always returns auto for system actors', () => { + expect(resolvePolicy('create_task', SYSTEM)).toBe('auto'); + }); + + it('uses per-tool entries for ai actors', () => { + expect(resolvePolicy('create_task', AI)).toBe('propose'); + expect(resolvePolicy('get_task_stats', AI)).toBe('auto'); + expect(resolvePolicy('log_drink', AI)).toBe('auto'); + }); + + it('falls back to the global ai default when no entry matches', () => { + expect(resolvePolicy('unregistered_tool', AI)).toBe(DEFAULT_AI_POLICY.defaultForAi); + }); + + it('uses module defaults when configured and no per-tool entry exists', () => { + registerTools([ + { + name: 'policy_test_custom', + module: 'policyTest', + description: 'd', + parameters: [], + async execute() { + return { success: true, message: 'ok' }; + }, + }, + ]); + const restore = setAiPolicy({ + tools: {}, + defaultsByModule: { policyTest: 'deny' }, + defaultForAi: 'auto', + }); + try { + expect(resolvePolicy('policy_test_custom', AI)).toBe('deny'); + } finally { + restore(); + } + }); + + it('setAiPolicy returns a restorer', () => { + const restore = setAiPolicy({ tools: {}, defaultForAi: 'deny' }); + expect(resolvePolicy('create_task', AI)).toBe('deny'); + restore(); + expect(resolvePolicy('create_task', AI)).toBe('propose'); + }); +}); diff --git a/apps/mana/apps/web/src/lib/data/ai/policy.ts b/apps/mana/apps/web/src/lib/data/ai/policy.ts new file mode 100644 index 000000000..224f67913 --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/policy.ts @@ -0,0 +1,105 @@ +/** + * AI policy — per-tool decision of what an AI-attributed call should do. + * + * `auto` — execute immediately with the `ai` actor propagated into the + * write path. Use for read-only queries and append-only + * self-state logs where the AI can't do damage. + * `propose` — record a {@link Proposal} in the `pendingProposals` table + * instead of executing. User reviews + approves in the module + * UI, which then runs the stored intent. + * `deny` — refuse the call outright. Use for destructive operations + * the AI must never touch (e.g. account deletion). + * + * Policy applies only when the caller is an `ai` actor. `user` writes always + * bypass this layer (they ARE the approval). `system` writes (projections, + * rules, migrations) also bypass — they are trusted-by-subsystem. + * + * The default config below is deliberately conservative: anything that + * mutates a user-visible record defaults to `propose`. Loosen per-tool as + * the UX matures and trust is earned. + */ + +import { getTool } from '../tools/registry'; +import type { Actor } from '../events/actor'; + +export type PolicyDecision = 'auto' | 'propose' | 'deny'; + +export interface AiPolicy { + /** Tool-name → decision. Checked first. */ + readonly tools: Readonly>; + /** Module-name → decision. Checked when no per-tool entry exists. */ + readonly defaultsByModule?: Readonly>; + /** Global fallback when neither tool nor module has an entry. */ + readonly defaultForAi: PolicyDecision; +} + +export const DEFAULT_AI_POLICY: AiPolicy = { + tools: { + // ── Read-only / harmless → auto ─────────────────────── + get_task_stats: 'auto', + list_tasks: 'auto', + get_todays_events: 'auto', + get_drink_progress: 'auto', + nutrition_summary: 'auto', + get_places: 'auto', + location_log: 'auto', + + // ── Append-only self-state logs → auto ──────────────── + // These are fast-feedback user-logged values (drink, meal). The AI + // proposing "did you drink water?" then the user confirming + AI + // logging it should not require a second approval step. + log_drink: 'auto', + log_meal: 'auto', + + // ── Mutating user-visible records → propose ─────────── + create_task: 'propose', + complete_task: 'propose', + complete_tasks_by_title: 'propose', + create_event: 'propose', + create_place: 'propose', + visit_place: 'propose', + undo_drink: 'propose', + }, + defaultForAi: 'propose', +}; + +let activePolicy: AiPolicy = DEFAULT_AI_POLICY; + +/** Test / settings hook — replace the active policy. Returns a restorer. */ +export function setAiPolicy(policy: AiPolicy): () => void { + const previous = activePolicy; + activePolicy = policy; + return () => { + activePolicy = previous; + }; +} + +export function getAiPolicy(): AiPolicy { + return activePolicy; +} + +/** + * Resolve the policy decision for a tool invocation by a given actor. + * + * user → always `auto` (user IS the decision) + * system → always `auto` (trusted subsystem) + * ai → tools[name] ?? defaultsByModule[tool.module] ?? defaultForAi + */ +export function resolvePolicy( + toolName: string, + actor: Actor, + policy: AiPolicy = activePolicy +): PolicyDecision { + if (actor.kind !== 'ai') return 'auto'; + + const byTool = policy.tools[toolName]; + if (byTool) return byTool; + + const moduleDefaults = policy.defaultsByModule; + if (moduleDefaults) { + const tool = getTool(toolName); + if (tool && moduleDefaults[tool.module]) return moduleDefaults[tool.module]; + } + + return policy.defaultForAi; +} diff --git a/apps/mana/apps/web/src/lib/data/ai/proposals/store.test.ts b/apps/mana/apps/web/src/lib/data/ai/proposals/store.test.ts new file mode 100644 index 000000000..575964c28 --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/proposals/store.test.ts @@ -0,0 +1,138 @@ +import 'fake-indexeddb/auto'; +import { describe, it, expect, beforeEach, vi } from 'vitest'; + +vi.mock('$lib/stores/funnel-tracking', () => ({ trackFirstContent: vi.fn() })); +vi.mock('$lib/triggers/registry', () => ({ fire: vi.fn() })); +vi.mock('$lib/triggers/inline-suggest', () => ({ + checkInlineSuggestion: vi.fn().mockResolvedValue(null), +})); + +import { db } from '../../database'; +import { registerTools } from '../../tools/registry'; +import { + createProposal, + listProposals, + approveProposal, + rejectProposal, + expireOldProposals, + getProposal, +} from './store'; +import { PROPOSALS_TABLE } from './types'; +import type { Actor } from '../../events/actor'; + +const AI: Extract = { + kind: 'ai', + missionId: 'mission-1', + iterationId: 'iter-1', + rationale: 'test run', +}; + +let executed: { name: string; params: Record }[] = []; + +registerTools([ + { + name: 'proposal_test_echo', + module: 'proposalTest', + description: 'Records invocation for assertions', + parameters: [{ name: 'value', type: 'string', description: 'v', required: true }], + async execute(params) { + executed.push({ name: 'proposal_test_echo', params: { ...params } }); + return { success: true, message: `echo ${params.value}` }; + }, + }, +]); + +beforeEach(async () => { + executed = []; + await db.table(PROPOSALS_TABLE).clear(); +}); + +describe('proposal lifecycle', () => { + it('creates a pending proposal', async () => { + const p = await createProposal({ + actor: AI, + intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'a' } }, + rationale: 'because', + }); + expect(p.status).toBe('pending'); + expect(p.missionId).toBe('mission-1'); + expect(p.rationale).toBe('because'); + expect(await getProposal(p.id)).toBeTruthy(); + }); + + it('lists pending proposals by filter', async () => { + await createProposal({ + actor: AI, + intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'a' } }, + rationale: 'r', + }); + await createProposal({ + actor: { ...AI, missionId: 'mission-2' }, + intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'b' } }, + rationale: 'r', + }); + const all = await listProposals({ status: 'pending' }); + expect(all).toHaveLength(2); + const m2 = await listProposals({ missionId: 'mission-2' }); + expect(m2).toHaveLength(1); + }); + + it('approving runs the intent and marks the proposal approved', async () => { + const p = await createProposal({ + actor: AI, + intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'go' } }, + rationale: 'r', + }); + const { proposal, result } = await approveProposal(p.id, 'looks good'); + expect(result.success).toBe(true); + expect(executed).toEqual([{ name: 'proposal_test_echo', params: { value: 'go' } }]); + expect(proposal.status).toBe('approved'); + expect(proposal.userFeedback).toBe('looks good'); + + const persisted = await getProposal(p.id); + expect(persisted?.status).toBe('approved'); + expect(persisted?.decidedBy).toBe('user'); + }); + + it('rejecting stores feedback and does not execute the intent', async () => { + const p = await createProposal({ + actor: AI, + intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'x' } }, + rationale: 'r', + }); + await rejectProposal(p.id, 'not now'); + const persisted = await getProposal(p.id); + expect(persisted?.status).toBe('rejected'); + expect(persisted?.userFeedback).toBe('not now'); + expect(executed).toHaveLength(0); + }); + + it('refuses to approve a non-pending proposal', async () => { + const p = await createProposal({ + actor: AI, + intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'x' } }, + rationale: 'r', + }); + await rejectProposal(p.id); + await expect(approveProposal(p.id)).rejects.toThrow(/rejected/); + }); + + it('expires proposals past their expiresAt', async () => { + await createProposal({ + actor: AI, + intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'x' } }, + rationale: 'r', + expiresAt: '2020-01-01T00:00:00.000Z', + }); + await createProposal({ + actor: AI, + intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'y' } }, + rationale: 'r', + expiresAt: '2099-01-01T00:00:00.000Z', + }); + const count = await expireOldProposals(new Date('2026-04-14T00:00:00.000Z')); + expect(count).toBe(1); + const pending = await listProposals({ status: 'pending' }); + expect(pending).toHaveLength(1); + }); +}); diff --git a/apps/mana/apps/web/src/lib/data/ai/proposals/store.ts b/apps/mana/apps/web/src/lib/data/ai/proposals/store.ts new file mode 100644 index 000000000..021e91c3b --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/proposals/store.ts @@ -0,0 +1,145 @@ +/** + * Proposal store — create, list, approve, reject, expire. + * + * Approval re-runs the tool the AI originally called, but this time forces + * the executor into `auto` mode so the stored intent can't bounce back into + * another proposal. Rejection just marks the row — the AI sees the feedback + * on the next planner pass via `listProposals({ status: 'rejected' })`. + */ + +import { db } from '../../database'; +import { runAsAsync } from '../../events/actor'; +import type { Actor } from '../../events/actor'; +import type { ToolResult } from '../../tools/types'; +import type { Intent, Proposal, ProposalStatus } from './types'; +import { PROPOSALS_TABLE } from './types'; + +const table = () => db.table(PROPOSALS_TABLE); + +export interface CreateProposalInput { + actor: Extract; + intent: Intent; + rationale: string; + /** ISO timestamp. Falsy → no auto-expiry. */ + expiresAt?: string; +} + +export async function createProposal(input: CreateProposalInput): Promise { + const proposal: Proposal = { + id: crypto.randomUUID(), + createdAt: new Date().toISOString(), + expiresAt: input.expiresAt, + status: 'pending', + actor: input.actor, + missionId: input.actor.missionId, + iterationId: input.actor.iterationId, + rationale: input.rationale, + intent: input.intent, + }; + await table().add(proposal); + return proposal; +} + +export async function getProposal(id: string): Promise { + return table().get(id); +} + +export interface ListProposalsFilter { + status?: ProposalStatus; + missionId?: string; +} + +export async function listProposals(filter: ListProposalsFilter = {}): Promise { + let coll = table().orderBy('createdAt').reverse(); + if (filter.status) coll = coll.filter((p) => p.status === filter.status); + if (filter.missionId) coll = coll.filter((p) => p.missionId === filter.missionId); + return coll.toArray(); +} + +/** + * Approve a pending proposal. Runs the stored intent with the AI actor + * re-installed so downstream events and records carry the original + * mission/iteration attribution — critical for the Workbench timeline. + * + * The executor is forced into `auto` by construction: the approved + * `executeTool` call re-reads policy, which would again say `propose` — + * so we bypass policy by calling the tool implementation directly under + * the `ai` actor instead of routing through the policy-gated executor. + */ +export async function approveProposal( + id: string, + userFeedback?: string +): Promise<{ proposal: Proposal; result: ToolResult }> { + const proposal = await getProposal(id); + if (!proposal) throw new Error(`Proposal not found: ${id}`); + if (proposal.status !== 'pending') { + throw new Error(`Proposal ${id} is ${proposal.status}, cannot approve`); + } + + const result = await runApprovedIntent(proposal); + + const updated: Partial = { + status: 'approved', + decidedAt: new Date().toISOString(), + decidedBy: 'user', + userFeedback, + }; + await table().update(id, updated); + return { proposal: { ...proposal, ...updated }, result }; +} + +export async function rejectProposal(id: string, userFeedback?: string): Promise { + const proposal = await getProposal(id); + if (!proposal) throw new Error(`Proposal not found: ${id}`); + if (proposal.status !== 'pending') { + throw new Error(`Proposal ${id} is ${proposal.status}, cannot reject`); + } + const updated: Partial = { + status: 'rejected', + decidedAt: new Date().toISOString(), + decidedBy: 'user', + userFeedback, + }; + await table().update(id, updated); + return { ...proposal, ...updated }; +} + +/** + * Mark any pending proposal whose `expiresAt` has passed as expired. Fire + * this from a low-frequency tick (e.g. on app focus); cheap, indexed scan. + */ +export async function expireOldProposals(now: Date = new Date()): Promise { + const cutoff = now.toISOString(); + const stale = await table() + .where('status') + .equals('pending') + .filter((p) => typeof p.expiresAt === 'string' && p.expiresAt < cutoff) + .toArray(); + + for (const p of stale) { + await table().update(p.id, { + status: 'expired', + decidedAt: cutoff, + decidedBy: 'auto-expire', + }); + } + return stale.length; +} + +/** + * Run the intent under the original AI actor, bypassing policy. The user + * has consented via approval; re-entering the policy gate would bounce the + * call straight back into a new proposal. + * + * The `executor` import is lazy: `tools/executor.ts` imports this file's + * `createProposal`, so a top-level import here would form a cycle. + */ +async function runApprovedIntent(proposal: Proposal): Promise { + return runAsAsync(proposal.actor, async () => { + if (proposal.intent.kind === 'toolCall') { + const { executeToolRaw } = await import('../../tools/executor'); + return executeToolRaw(proposal.intent.toolName, proposal.intent.params); + } + throw new Error(`Unsupported intent kind: ${(proposal.intent as { kind: string }).kind}`); + }); +} diff --git a/apps/mana/apps/web/src/lib/data/ai/proposals/types.ts b/apps/mana/apps/web/src/lib/data/ai/proposals/types.ts new file mode 100644 index 000000000..a9942dce5 --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/proposals/types.ts @@ -0,0 +1,62 @@ +/** + * Proposals — staged AI intents awaiting user approval. + * + * When an AI-attributed tool call hits a policy of `propose`, the executor + * records a {@link Proposal} instead of performing the underlying mutation. + * The proposal sits in the local `pendingProposals` Dexie table until the + * user approves it (→ run the intent), rejects it, or it auto-expires. + * + * Proposals are intentionally local-only — they do not sync through + * mana-sync. The approved mutation syncs normally once executed, so + * other devices see the resulting write without ever seeing the proposal + * state machine. + */ + +import type { Actor } from '../../events/actor'; + +/** Lifecycle states a proposal can be in. */ +export type ProposalStatus = 'pending' | 'approved' | 'rejected' | 'expired'; + +/** + * Structured description of what the AI wants to happen if the proposal is + * approved. Start with `toolCall` (execute the named tool with params) and + * extend the union with `patch` / `create` variants once module UIs need + * to render field-level diffs inline. + */ +export type Intent = ToolCallIntent; + +export interface ToolCallIntent { + readonly kind: 'toolCall'; + readonly toolName: string; + readonly params: Record; +} + +export interface Proposal { + readonly id: string; + readonly createdAt: string; + readonly expiresAt?: string; + readonly status: ProposalStatus; + + /** + * The AI actor that submitted this proposal. Always `kind: 'ai'` by + * construction — `resolvePolicy` never routes user/system writes here. + */ + readonly actor: Actor; + /** Mirrors `actor.missionId` for index-based queries of "all proposals in mission X". */ + readonly missionId?: string; + /** Mirrors `actor.iterationId`. */ + readonly iterationId?: string; + /** The AI's stated reason for the change — surfaced in the approval UI. */ + readonly rationale: string; + + /** What runs on approve. */ + readonly intent: Intent; + + /** Set when the proposal leaves the `pending` state. */ + readonly decidedAt?: string; + readonly decidedBy?: 'user' | 'auto-expire'; + /** Free-text feedback from the user, captured on approve or reject. */ + readonly userFeedback?: string; +} + +export const PROPOSALS_TABLE = 'pendingProposals'; diff --git a/apps/mana/apps/web/src/lib/data/tools/executor.test.ts b/apps/mana/apps/web/src/lib/data/tools/executor.test.ts index 7363c0176..9c8b4831a 100644 --- a/apps/mana/apps/web/src/lib/data/tools/executor.test.ts +++ b/apps/mana/apps/web/src/lib/data/tools/executor.test.ts @@ -1,8 +1,23 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import 'fake-indexeddb/auto'; +import { describe, it, expect, beforeEach, vi } from 'vitest'; + +vi.mock('$lib/stores/funnel-tracking', () => ({ trackFirstContent: vi.fn() })); +vi.mock('$lib/triggers/registry', () => ({ fire: vi.fn() })); +vi.mock('$lib/triggers/inline-suggest', () => ({ + checkInlineSuggestion: vi.fn().mockResolvedValue(null), +})); + import { executeTool } from './executor'; import { registerTools, getTools } from './registry'; +import { setAiPolicy } from '../ai/policy'; +import { listProposals, approveProposal } from '../ai/proposals/store'; +import { PROPOSALS_TABLE } from '../ai/proposals/types'; +import { db } from '../database'; +import type { Actor } from '../events/actor'; import type { ModuleTool } from './types'; +const AI: Actor = { kind: 'ai', missionId: 'm-1', iterationId: 'it-1', rationale: 'because' }; + // Reset registry between tests by reloading — registry uses module-level array // Instead, we just register test tools and rely on dedup const testTools: ModuleTool[] = [ @@ -102,3 +117,86 @@ describe('Tool Executor', () => { expect(result.success).toBe(true); }); }); + +describe('Tool Executor — AI policy routing', () => { + beforeEach(async () => { + await db.table(PROPOSALS_TABLE).clear(); + }); + + it('runs a tool directly for user actors regardless of name', async () => { + // test_echo has no policy entry — user default is always auto + const result = await executeTool('test_echo', { text: 'hi' }); + expect(result.success).toBe(true); + expect(result.message).toBe('echo: hi'); + }); + + it('stages a proposal when ai actor hits a propose-policy tool', async () => { + const restore = setAiPolicy({ tools: { test_echo: 'propose' }, defaultForAi: 'propose' }); + try { + const result = await executeTool('test_echo', { text: 'stage-me' }, AI); + expect(result.success).toBe(true); + expect(result.message).toMatch(/Vorgeschlagen/); + expect((result.data as { proposalId: string }).proposalId).toBeTruthy(); + + // Tool did NOT run — it was staged + const pending = await listProposals({ status: 'pending' }); + expect(pending).toHaveLength(1); + expect(pending[0].rationale).toBe('because'); + expect(pending[0].missionId).toBe('m-1'); + } finally { + restore(); + } + }); + + it('runs directly for ai actor when policy says auto', async () => { + const restore = setAiPolicy({ tools: { test_echo: 'auto' }, defaultForAi: 'propose' }); + try { + const result = await executeTool('test_echo', { text: 'direct' }, AI); + expect(result.success).toBe(true); + expect(result.message).toBe('echo: direct'); + const pending = await listProposals({ status: 'pending' }); + expect(pending).toHaveLength(0); + } finally { + restore(); + } + }); + + it('refuses with deny policy', async () => { + const restore = setAiPolicy({ tools: { test_echo: 'deny' }, defaultForAi: 'propose' }); + try { + const result = await executeTool('test_echo', { text: 'no' }, AI); + expect(result.success).toBe(false); + expect(result.message).toMatch(/not available/); + } finally { + restore(); + } + }); + + it('approval runs the staged intent with original actor attribution', async () => { + const restore = setAiPolicy({ tools: { test_echo: 'propose' }, defaultForAi: 'propose' }); + try { + const staged = await executeTool('test_echo', { text: 'approved' }, AI); + const proposalId = (staged.data as { proposalId: string }).proposalId; + + const { result, proposal } = await approveProposal(proposalId); + expect(result.success).toBe(true); + expect(result.message).toBe('echo: approved'); + expect(proposal.status).toBe('approved'); + } finally { + restore(); + } + }); + + it('still validates parameters before staging a proposal', async () => { + const restore = setAiPolicy({ tools: { test_echo: 'propose' }, defaultForAi: 'propose' }); + try { + const result = await executeTool('test_echo', {}, AI); + expect(result.success).toBe(false); + expect(result.message).toContain('Missing required parameter'); + const pending = await listProposals({ status: 'pending' }); + expect(pending).toHaveLength(0); + } finally { + restore(); + } + }); +}); diff --git a/apps/mana/apps/web/src/lib/data/tools/executor.ts b/apps/mana/apps/web/src/lib/data/tools/executor.ts index b16dee3d2..2fed9d2f7 100644 --- a/apps/mana/apps/web/src/lib/data/tools/executor.ts +++ b/apps/mana/apps/web/src/lib/data/tools/executor.ts @@ -1,35 +1,126 @@ /** - * Tool Executor — Validates parameters and runs a tool by name. + * Tool Executor — validates parameters, resolves AI policy, and runs or + * stages the tool by name. + * + * Call paths: + * - User action from the UI: `executeTool(name, params)` with no actor + * → ambient `USER_ACTOR`, policy returns `auto`, tool runs directly. + * - AI in the companion orchestrator: `executeTool(name, params, aiActor)` + * → policy resolves per-tool; `propose` writes a Proposal and returns + * a success result carrying the proposal id, `auto` executes, `deny` + * refuses. + * - Approval path: proposal store calls `executeToolRaw(name, params)` + * under `runAsAsync(aiActor, ...)` — same validation, but no policy. */ import { getTool } from './registry'; +import { runAsAsync, USER_ACTOR } from '../events/actor'; +import { resolvePolicy } from '../ai/policy'; +import { createProposal } from '../ai/proposals/store'; +import type { Actor } from '../events/actor'; import type { ToolResult } from './types'; export async function executeTool( name: string, - params: Record + params: Record, + actor?: Actor ): Promise { const tool = getTool(name); if (!tool) { return { success: false, message: `Unknown tool: ${name}` }; } - // Validate required parameters - for (const p of tool.parameters) { - if (p.required && (params[p.name] === undefined || params[p.name] === null)) { - return { success: false, message: `Missing required parameter: ${p.name}` }; - } + const validation = validateParams(tool.parameters, params); + if (!validation.ok) return validation.error; + + const effectiveActor: Actor = actor ?? USER_ACTOR; + const decision = resolvePolicy(name, effectiveActor); + + if (decision === 'deny') { + return { + success: false, + message: `Tool "${name}" is not available to AI actors under current policy`, + }; } - // Validate types - for (const p of tool.parameters) { + if (decision === 'propose') { + // Only ai actors can hit `propose` — resolvePolicy short-circuits + // user/system to `auto`. Narrow defensively in case policy is swapped. + if (effectiveActor.kind !== 'ai') { + return { success: false, message: `propose policy requires an AI actor` }; + } + const proposal = await createProposal({ + actor: effectiveActor, + intent: { kind: 'toolCall', toolName: name, params }, + rationale: effectiveActor.rationale, + }); + return { + success: true, + data: { proposalId: proposal.id, status: 'pending' }, + message: `Vorgeschlagen: "${name}" wartet auf Freigabe.`, + }; + } + + // decision === 'auto' + return runAsAsync(effectiveActor, () => runValidatedTool(tool, params)); +} + +/** + * Run a tool bypassing AI policy. Used by the proposal approval path, which + * already has user consent and must not bounce back into another proposal. + * + * Caller is responsible for installing the right actor via `runAsAsync`. + */ +export async function executeToolRaw( + name: string, + params: Record +): Promise { + const tool = getTool(name); + if (!tool) return { success: false, message: `Unknown tool: ${name}` }; + const validation = validateParams(tool.parameters, params); + if (!validation.ok) return validation.error; + return runValidatedTool(tool, params); +} + +// ── Internals ─────────────────────────────────────────────── + +async function runValidatedTool( + tool: { execute: (p: Record) => Promise }, + params: Record +): Promise { + try { + return await tool.execute(params); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return { success: false, message: `Tool execution failed: ${msg}` }; + } +} + +type ValidationOutcome = { ok: true } | { ok: false; error: ToolResult }; + +function validateParams( + schema: { name: string; type: string; required: boolean; enum?: string[] }[], + params: Record +): ValidationOutcome { + for (const p of schema) { + if (p.required && (params[p.name] === undefined || params[p.name] === null)) { + return { + ok: false, + error: { success: false, message: `Missing required parameter: ${p.name}` }, + }; + } + } + for (const p of schema) { const val = params[p.name]; if (val === undefined || val === null) continue; if (p.type === 'number' && typeof val !== 'number') { const num = Number(val); if (isNaN(num)) { - return { success: false, message: `Parameter ${p.name} must be a number` }; + return { + ok: false, + error: { success: false, message: `Parameter ${p.name} must be a number` }, + }; } params[p.name] = num; } @@ -38,16 +129,13 @@ export async function executeTool( } if (p.enum && !p.enum.includes(String(val))) { return { - success: false, - message: `Parameter ${p.name} must be one of: ${p.enum.join(', ')}`, + ok: false, + error: { + success: false, + message: `Parameter ${p.name} must be one of: ${p.enum.join(', ')}`, + }, }; } } - - try { - return await tool.execute(params); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - return { success: false, message: `Tool execution failed: ${msg}` }; - } + return { ok: true }; }