mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 20:01:09 +02:00
feat(ai): policy-gated tool executor with pendingProposals lifecycle
Adds the staging layer that turns AI-attributed tool calls into user-reviewed proposals instead of direct writes. - `data/ai/policy.ts` — per-tool AiPolicy (`auto` | `propose` | `deny`) with module-level defaults and a global fallback. `user` and `system` actors always bypass (they ARE the decision / are trusted subsystems). - `data/ai/proposals/` — Proposal + Intent types, store with create/list/approve/reject/expire. Proposals are local-only (do NOT sync); the approved write syncs through the normal module path. - `tools/executor.ts` routes by actor+policy: `auto` runs directly under `runAsAsync(actor, ...)`, `propose` stages a Proposal carrying rationale + mission metadata, `deny` refuses. `executeToolRaw` bypasses the policy gate — used only on the approval path where consent already exists. Default policy is conservative: read-only and append-only self-state (log_drink, log_meal) auto-execute; everything that mutates user-visible records defaults to propose. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d1a0d09692
commit
e38257b93d
7 changed files with 716 additions and 20 deletions
60
apps/mana/apps/web/src/lib/data/ai/policy.test.ts
Normal file
60
apps/mana/apps/web/src/lib/data/ai/policy.test.ts
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import { describe, it, expect } from 'vitest';
|
||||
import { resolvePolicy, setAiPolicy, DEFAULT_AI_POLICY } from './policy';
|
||||
import { registerTools } from '../tools/registry';
|
||||
import type { Actor } from '../events/actor';
|
||||
|
||||
const AI: Actor = { kind: 'ai', missionId: 'm', iterationId: 'i', rationale: 'r' };
|
||||
const USER: Actor = { kind: 'user' };
|
||||
const SYSTEM: Actor = { kind: 'system', source: 'projection' };
|
||||
|
||||
describe('resolvePolicy', () => {
|
||||
it('always returns auto for user actors', () => {
|
||||
expect(resolvePolicy('create_task', USER)).toBe('auto');
|
||||
expect(resolvePolicy('anything', USER)).toBe('auto');
|
||||
});
|
||||
|
||||
it('always returns auto for system actors', () => {
|
||||
expect(resolvePolicy('create_task', SYSTEM)).toBe('auto');
|
||||
});
|
||||
|
||||
it('uses per-tool entries for ai actors', () => {
|
||||
expect(resolvePolicy('create_task', AI)).toBe('propose');
|
||||
expect(resolvePolicy('get_task_stats', AI)).toBe('auto');
|
||||
expect(resolvePolicy('log_drink', AI)).toBe('auto');
|
||||
});
|
||||
|
||||
it('falls back to the global ai default when no entry matches', () => {
|
||||
expect(resolvePolicy('unregistered_tool', AI)).toBe(DEFAULT_AI_POLICY.defaultForAi);
|
||||
});
|
||||
|
||||
it('uses module defaults when configured and no per-tool entry exists', () => {
|
||||
registerTools([
|
||||
{
|
||||
name: 'policy_test_custom',
|
||||
module: 'policyTest',
|
||||
description: 'd',
|
||||
parameters: [],
|
||||
async execute() {
|
||||
return { success: true, message: 'ok' };
|
||||
},
|
||||
},
|
||||
]);
|
||||
const restore = setAiPolicy({
|
||||
tools: {},
|
||||
defaultsByModule: { policyTest: 'deny' },
|
||||
defaultForAi: 'auto',
|
||||
});
|
||||
try {
|
||||
expect(resolvePolicy('policy_test_custom', AI)).toBe('deny');
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
|
||||
it('setAiPolicy returns a restorer', () => {
|
||||
const restore = setAiPolicy({ tools: {}, defaultForAi: 'deny' });
|
||||
expect(resolvePolicy('create_task', AI)).toBe('deny');
|
||||
restore();
|
||||
expect(resolvePolicy('create_task', AI)).toBe('propose');
|
||||
});
|
||||
});
|
||||
105
apps/mana/apps/web/src/lib/data/ai/policy.ts
Normal file
105
apps/mana/apps/web/src/lib/data/ai/policy.ts
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
/**
|
||||
* AI policy — per-tool decision of what an AI-attributed call should do.
|
||||
*
|
||||
* `auto` — execute immediately with the `ai` actor propagated into the
|
||||
* write path. Use for read-only queries and append-only
|
||||
* self-state logs where the AI can't do damage.
|
||||
* `propose` — record a {@link Proposal} in the `pendingProposals` table
|
||||
* instead of executing. User reviews + approves in the module
|
||||
* UI, which then runs the stored intent.
|
||||
* `deny` — refuse the call outright. Use for destructive operations
|
||||
* the AI must never touch (e.g. account deletion).
|
||||
*
|
||||
* Policy applies only when the caller is an `ai` actor. `user` writes always
|
||||
* bypass this layer (they ARE the approval). `system` writes (projections,
|
||||
* rules, migrations) also bypass — they are trusted-by-subsystem.
|
||||
*
|
||||
* The default config below is deliberately conservative: anything that
|
||||
* mutates a user-visible record defaults to `propose`. Loosen per-tool as
|
||||
* the UX matures and trust is earned.
|
||||
*/
|
||||
|
||||
import { getTool } from '../tools/registry';
|
||||
import type { Actor } from '../events/actor';
|
||||
|
||||
export type PolicyDecision = 'auto' | 'propose' | 'deny';
|
||||
|
||||
export interface AiPolicy {
|
||||
/** Tool-name → decision. Checked first. */
|
||||
readonly tools: Readonly<Record<string, PolicyDecision>>;
|
||||
/** Module-name → decision. Checked when no per-tool entry exists. */
|
||||
readonly defaultsByModule?: Readonly<Record<string, PolicyDecision>>;
|
||||
/** Global fallback when neither tool nor module has an entry. */
|
||||
readonly defaultForAi: PolicyDecision;
|
||||
}
|
||||
|
||||
export const DEFAULT_AI_POLICY: AiPolicy = {
|
||||
tools: {
|
||||
// ── Read-only / harmless → auto ───────────────────────
|
||||
get_task_stats: 'auto',
|
||||
list_tasks: 'auto',
|
||||
get_todays_events: 'auto',
|
||||
get_drink_progress: 'auto',
|
||||
nutrition_summary: 'auto',
|
||||
get_places: 'auto',
|
||||
location_log: 'auto',
|
||||
|
||||
// ── Append-only self-state logs → auto ────────────────
|
||||
// These are fast-feedback user-logged values (drink, meal). The AI
|
||||
// proposing "did you drink water?" then the user confirming + AI
|
||||
// logging it should not require a second approval step.
|
||||
log_drink: 'auto',
|
||||
log_meal: 'auto',
|
||||
|
||||
// ── Mutating user-visible records → propose ───────────
|
||||
create_task: 'propose',
|
||||
complete_task: 'propose',
|
||||
complete_tasks_by_title: 'propose',
|
||||
create_event: 'propose',
|
||||
create_place: 'propose',
|
||||
visit_place: 'propose',
|
||||
undo_drink: 'propose',
|
||||
},
|
||||
defaultForAi: 'propose',
|
||||
};
|
||||
|
||||
let activePolicy: AiPolicy = DEFAULT_AI_POLICY;
|
||||
|
||||
/** Test / settings hook — replace the active policy. Returns a restorer. */
|
||||
export function setAiPolicy(policy: AiPolicy): () => void {
|
||||
const previous = activePolicy;
|
||||
activePolicy = policy;
|
||||
return () => {
|
||||
activePolicy = previous;
|
||||
};
|
||||
}
|
||||
|
||||
export function getAiPolicy(): AiPolicy {
|
||||
return activePolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the policy decision for a tool invocation by a given actor.
|
||||
*
|
||||
* user → always `auto` (user IS the decision)
|
||||
* system → always `auto` (trusted subsystem)
|
||||
* ai → tools[name] ?? defaultsByModule[tool.module] ?? defaultForAi
|
||||
*/
|
||||
export function resolvePolicy(
|
||||
toolName: string,
|
||||
actor: Actor,
|
||||
policy: AiPolicy = activePolicy
|
||||
): PolicyDecision {
|
||||
if (actor.kind !== 'ai') return 'auto';
|
||||
|
||||
const byTool = policy.tools[toolName];
|
||||
if (byTool) return byTool;
|
||||
|
||||
const moduleDefaults = policy.defaultsByModule;
|
||||
if (moduleDefaults) {
|
||||
const tool = getTool(toolName);
|
||||
if (tool && moduleDefaults[tool.module]) return moduleDefaults[tool.module];
|
||||
}
|
||||
|
||||
return policy.defaultForAi;
|
||||
}
|
||||
138
apps/mana/apps/web/src/lib/data/ai/proposals/store.test.ts
Normal file
138
apps/mana/apps/web/src/lib/data/ai/proposals/store.test.ts
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
import 'fake-indexeddb/auto';
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
|
||||
vi.mock('$lib/stores/funnel-tracking', () => ({ trackFirstContent: vi.fn() }));
|
||||
vi.mock('$lib/triggers/registry', () => ({ fire: vi.fn() }));
|
||||
vi.mock('$lib/triggers/inline-suggest', () => ({
|
||||
checkInlineSuggestion: vi.fn().mockResolvedValue(null),
|
||||
}));
|
||||
|
||||
import { db } from '../../database';
|
||||
import { registerTools } from '../../tools/registry';
|
||||
import {
|
||||
createProposal,
|
||||
listProposals,
|
||||
approveProposal,
|
||||
rejectProposal,
|
||||
expireOldProposals,
|
||||
getProposal,
|
||||
} from './store';
|
||||
import { PROPOSALS_TABLE } from './types';
|
||||
import type { Actor } from '../../events/actor';
|
||||
|
||||
const AI: Extract<Actor, { kind: 'ai' }> = {
|
||||
kind: 'ai',
|
||||
missionId: 'mission-1',
|
||||
iterationId: 'iter-1',
|
||||
rationale: 'test run',
|
||||
};
|
||||
|
||||
let executed: { name: string; params: Record<string, unknown> }[] = [];
|
||||
|
||||
registerTools([
|
||||
{
|
||||
name: 'proposal_test_echo',
|
||||
module: 'proposalTest',
|
||||
description: 'Records invocation for assertions',
|
||||
parameters: [{ name: 'value', type: 'string', description: 'v', required: true }],
|
||||
async execute(params) {
|
||||
executed.push({ name: 'proposal_test_echo', params: { ...params } });
|
||||
return { success: true, message: `echo ${params.value}` };
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
beforeEach(async () => {
|
||||
executed = [];
|
||||
await db.table(PROPOSALS_TABLE).clear();
|
||||
});
|
||||
|
||||
describe('proposal lifecycle', () => {
|
||||
it('creates a pending proposal', async () => {
|
||||
const p = await createProposal({
|
||||
actor: AI,
|
||||
intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'a' } },
|
||||
rationale: 'because',
|
||||
});
|
||||
expect(p.status).toBe('pending');
|
||||
expect(p.missionId).toBe('mission-1');
|
||||
expect(p.rationale).toBe('because');
|
||||
expect(await getProposal(p.id)).toBeTruthy();
|
||||
});
|
||||
|
||||
it('lists pending proposals by filter', async () => {
|
||||
await createProposal({
|
||||
actor: AI,
|
||||
intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'a' } },
|
||||
rationale: 'r',
|
||||
});
|
||||
await createProposal({
|
||||
actor: { ...AI, missionId: 'mission-2' },
|
||||
intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'b' } },
|
||||
rationale: 'r',
|
||||
});
|
||||
const all = await listProposals({ status: 'pending' });
|
||||
expect(all).toHaveLength(2);
|
||||
const m2 = await listProposals({ missionId: 'mission-2' });
|
||||
expect(m2).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('approving runs the intent and marks the proposal approved', async () => {
|
||||
const p = await createProposal({
|
||||
actor: AI,
|
||||
intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'go' } },
|
||||
rationale: 'r',
|
||||
});
|
||||
const { proposal, result } = await approveProposal(p.id, 'looks good');
|
||||
expect(result.success).toBe(true);
|
||||
expect(executed).toEqual([{ name: 'proposal_test_echo', params: { value: 'go' } }]);
|
||||
expect(proposal.status).toBe('approved');
|
||||
expect(proposal.userFeedback).toBe('looks good');
|
||||
|
||||
const persisted = await getProposal(p.id);
|
||||
expect(persisted?.status).toBe('approved');
|
||||
expect(persisted?.decidedBy).toBe('user');
|
||||
});
|
||||
|
||||
it('rejecting stores feedback and does not execute the intent', async () => {
|
||||
const p = await createProposal({
|
||||
actor: AI,
|
||||
intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'x' } },
|
||||
rationale: 'r',
|
||||
});
|
||||
await rejectProposal(p.id, 'not now');
|
||||
const persisted = await getProposal(p.id);
|
||||
expect(persisted?.status).toBe('rejected');
|
||||
expect(persisted?.userFeedback).toBe('not now');
|
||||
expect(executed).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('refuses to approve a non-pending proposal', async () => {
|
||||
const p = await createProposal({
|
||||
actor: AI,
|
||||
intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'x' } },
|
||||
rationale: 'r',
|
||||
});
|
||||
await rejectProposal(p.id);
|
||||
await expect(approveProposal(p.id)).rejects.toThrow(/rejected/);
|
||||
});
|
||||
|
||||
it('expires proposals past their expiresAt', async () => {
|
||||
await createProposal({
|
||||
actor: AI,
|
||||
intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'x' } },
|
||||
rationale: 'r',
|
||||
expiresAt: '2020-01-01T00:00:00.000Z',
|
||||
});
|
||||
await createProposal({
|
||||
actor: AI,
|
||||
intent: { kind: 'toolCall', toolName: 'proposal_test_echo', params: { value: 'y' } },
|
||||
rationale: 'r',
|
||||
expiresAt: '2099-01-01T00:00:00.000Z',
|
||||
});
|
||||
const count = await expireOldProposals(new Date('2026-04-14T00:00:00.000Z'));
|
||||
expect(count).toBe(1);
|
||||
const pending = await listProposals({ status: 'pending' });
|
||||
expect(pending).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
145
apps/mana/apps/web/src/lib/data/ai/proposals/store.ts
Normal file
145
apps/mana/apps/web/src/lib/data/ai/proposals/store.ts
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
/**
|
||||
* Proposal store — create, list, approve, reject, expire.
|
||||
*
|
||||
* Approval re-runs the tool the AI originally called, but this time forces
|
||||
* the executor into `auto` mode so the stored intent can't bounce back into
|
||||
* another proposal. Rejection just marks the row — the AI sees the feedback
|
||||
* on the next planner pass via `listProposals({ status: 'rejected' })`.
|
||||
*/
|
||||
|
||||
import { db } from '../../database';
|
||||
import { runAsAsync } from '../../events/actor';
|
||||
import type { Actor } from '../../events/actor';
|
||||
import type { ToolResult } from '../../tools/types';
|
||||
import type { Intent, Proposal, ProposalStatus } from './types';
|
||||
import { PROPOSALS_TABLE } from './types';
|
||||
|
||||
const table = () => db.table<Proposal>(PROPOSALS_TABLE);
|
||||
|
||||
export interface CreateProposalInput {
|
||||
actor: Extract<Actor, { kind: 'ai' }>;
|
||||
intent: Intent;
|
||||
rationale: string;
|
||||
/** ISO timestamp. Falsy → no auto-expiry. */
|
||||
expiresAt?: string;
|
||||
}
|
||||
|
||||
export async function createProposal(input: CreateProposalInput): Promise<Proposal> {
|
||||
const proposal: Proposal = {
|
||||
id: crypto.randomUUID(),
|
||||
createdAt: new Date().toISOString(),
|
||||
expiresAt: input.expiresAt,
|
||||
status: 'pending',
|
||||
actor: input.actor,
|
||||
missionId: input.actor.missionId,
|
||||
iterationId: input.actor.iterationId,
|
||||
rationale: input.rationale,
|
||||
intent: input.intent,
|
||||
};
|
||||
await table().add(proposal);
|
||||
return proposal;
|
||||
}
|
||||
|
||||
export async function getProposal(id: string): Promise<Proposal | undefined> {
|
||||
return table().get(id);
|
||||
}
|
||||
|
||||
export interface ListProposalsFilter {
|
||||
status?: ProposalStatus;
|
||||
missionId?: string;
|
||||
}
|
||||
|
||||
export async function listProposals(filter: ListProposalsFilter = {}): Promise<Proposal[]> {
|
||||
let coll = table().orderBy('createdAt').reverse();
|
||||
if (filter.status) coll = coll.filter((p) => p.status === filter.status);
|
||||
if (filter.missionId) coll = coll.filter((p) => p.missionId === filter.missionId);
|
||||
return coll.toArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Approve a pending proposal. Runs the stored intent with the AI actor
|
||||
* re-installed so downstream events and records carry the original
|
||||
* mission/iteration attribution — critical for the Workbench timeline.
|
||||
*
|
||||
* The executor is forced into `auto` by construction: the approved
|
||||
* `executeTool` call re-reads policy, which would again say `propose` —
|
||||
* so we bypass policy by calling the tool implementation directly under
|
||||
* the `ai` actor instead of routing through the policy-gated executor.
|
||||
*/
|
||||
export async function approveProposal(
|
||||
id: string,
|
||||
userFeedback?: string
|
||||
): Promise<{ proposal: Proposal; result: ToolResult }> {
|
||||
const proposal = await getProposal(id);
|
||||
if (!proposal) throw new Error(`Proposal not found: ${id}`);
|
||||
if (proposal.status !== 'pending') {
|
||||
throw new Error(`Proposal ${id} is ${proposal.status}, cannot approve`);
|
||||
}
|
||||
|
||||
const result = await runApprovedIntent(proposal);
|
||||
|
||||
const updated: Partial<Proposal> = {
|
||||
status: 'approved',
|
||||
decidedAt: new Date().toISOString(),
|
||||
decidedBy: 'user',
|
||||
userFeedback,
|
||||
};
|
||||
await table().update(id, updated);
|
||||
return { proposal: { ...proposal, ...updated }, result };
|
||||
}
|
||||
|
||||
export async function rejectProposal(id: string, userFeedback?: string): Promise<Proposal> {
|
||||
const proposal = await getProposal(id);
|
||||
if (!proposal) throw new Error(`Proposal not found: ${id}`);
|
||||
if (proposal.status !== 'pending') {
|
||||
throw new Error(`Proposal ${id} is ${proposal.status}, cannot reject`);
|
||||
}
|
||||
const updated: Partial<Proposal> = {
|
||||
status: 'rejected',
|
||||
decidedAt: new Date().toISOString(),
|
||||
decidedBy: 'user',
|
||||
userFeedback,
|
||||
};
|
||||
await table().update(id, updated);
|
||||
return { ...proposal, ...updated };
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark any pending proposal whose `expiresAt` has passed as expired. Fire
|
||||
* this from a low-frequency tick (e.g. on app focus); cheap, indexed scan.
|
||||
*/
|
||||
export async function expireOldProposals(now: Date = new Date()): Promise<number> {
|
||||
const cutoff = now.toISOString();
|
||||
const stale = await table()
|
||||
.where('status')
|
||||
.equals('pending')
|
||||
.filter((p) => typeof p.expiresAt === 'string' && p.expiresAt < cutoff)
|
||||
.toArray();
|
||||
|
||||
for (const p of stale) {
|
||||
await table().update(p.id, {
|
||||
status: 'expired',
|
||||
decidedAt: cutoff,
|
||||
decidedBy: 'auto-expire',
|
||||
});
|
||||
}
|
||||
return stale.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the intent under the original AI actor, bypassing policy. The user
|
||||
* has consented via approval; re-entering the policy gate would bounce the
|
||||
* call straight back into a new proposal.
|
||||
*
|
||||
* The `executor` import is lazy: `tools/executor.ts` imports this file's
|
||||
* `createProposal`, so a top-level import here would form a cycle.
|
||||
*/
|
||||
async function runApprovedIntent(proposal: Proposal): Promise<ToolResult> {
|
||||
return runAsAsync(proposal.actor, async () => {
|
||||
if (proposal.intent.kind === 'toolCall') {
|
||||
const { executeToolRaw } = await import('../../tools/executor');
|
||||
return executeToolRaw(proposal.intent.toolName, proposal.intent.params);
|
||||
}
|
||||
throw new Error(`Unsupported intent kind: ${(proposal.intent as { kind: string }).kind}`);
|
||||
});
|
||||
}
|
||||
62
apps/mana/apps/web/src/lib/data/ai/proposals/types.ts
Normal file
62
apps/mana/apps/web/src/lib/data/ai/proposals/types.ts
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Proposals — staged AI intents awaiting user approval.
|
||||
*
|
||||
* When an AI-attributed tool call hits a policy of `propose`, the executor
|
||||
* records a {@link Proposal} instead of performing the underlying mutation.
|
||||
* The proposal sits in the local `pendingProposals` Dexie table until the
|
||||
* user approves it (→ run the intent), rejects it, or it auto-expires.
|
||||
*
|
||||
* Proposals are intentionally local-only — they do not sync through
|
||||
* mana-sync. The approved mutation syncs normally once executed, so
|
||||
* other devices see the resulting write without ever seeing the proposal
|
||||
* state machine.
|
||||
*/
|
||||
|
||||
import type { Actor } from '../../events/actor';
|
||||
|
||||
/** Lifecycle states a proposal can be in. */
|
||||
export type ProposalStatus = 'pending' | 'approved' | 'rejected' | 'expired';
|
||||
|
||||
/**
|
||||
* Structured description of what the AI wants to happen if the proposal is
|
||||
* approved. Start with `toolCall` (execute the named tool with params) and
|
||||
* extend the union with `patch` / `create` variants once module UIs need
|
||||
* to render field-level diffs inline.
|
||||
*/
|
||||
export type Intent = ToolCallIntent;
|
||||
|
||||
export interface ToolCallIntent {
|
||||
readonly kind: 'toolCall';
|
||||
readonly toolName: string;
|
||||
readonly params: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface Proposal {
|
||||
readonly id: string;
|
||||
readonly createdAt: string;
|
||||
readonly expiresAt?: string;
|
||||
readonly status: ProposalStatus;
|
||||
|
||||
/**
|
||||
* The AI actor that submitted this proposal. Always `kind: 'ai'` by
|
||||
* construction — `resolvePolicy` never routes user/system writes here.
|
||||
*/
|
||||
readonly actor: Actor;
|
||||
/** Mirrors `actor.missionId` for index-based queries of "all proposals in mission X". */
|
||||
readonly missionId?: string;
|
||||
/** Mirrors `actor.iterationId`. */
|
||||
readonly iterationId?: string;
|
||||
/** The AI's stated reason for the change — surfaced in the approval UI. */
|
||||
readonly rationale: string;
|
||||
|
||||
/** What runs on approve. */
|
||||
readonly intent: Intent;
|
||||
|
||||
/** Set when the proposal leaves the `pending` state. */
|
||||
readonly decidedAt?: string;
|
||||
readonly decidedBy?: 'user' | 'auto-expire';
|
||||
/** Free-text feedback from the user, captured on approve or reject. */
|
||||
readonly userFeedback?: string;
|
||||
}
|
||||
|
||||
export const PROPOSALS_TABLE = 'pendingProposals';
|
||||
|
|
@ -1,8 +1,23 @@
|
|||
import { describe, it, expect, beforeEach } from 'vitest';
|
||||
import 'fake-indexeddb/auto';
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
|
||||
vi.mock('$lib/stores/funnel-tracking', () => ({ trackFirstContent: vi.fn() }));
|
||||
vi.mock('$lib/triggers/registry', () => ({ fire: vi.fn() }));
|
||||
vi.mock('$lib/triggers/inline-suggest', () => ({
|
||||
checkInlineSuggestion: vi.fn().mockResolvedValue(null),
|
||||
}));
|
||||
|
||||
import { executeTool } from './executor';
|
||||
import { registerTools, getTools } from './registry';
|
||||
import { setAiPolicy } from '../ai/policy';
|
||||
import { listProposals, approveProposal } from '../ai/proposals/store';
|
||||
import { PROPOSALS_TABLE } from '../ai/proposals/types';
|
||||
import { db } from '../database';
|
||||
import type { Actor } from '../events/actor';
|
||||
import type { ModuleTool } from './types';
|
||||
|
||||
const AI: Actor = { kind: 'ai', missionId: 'm-1', iterationId: 'it-1', rationale: 'because' };
|
||||
|
||||
// Reset registry between tests by reloading — registry uses module-level array
|
||||
// Instead, we just register test tools and rely on dedup
|
||||
const testTools: ModuleTool[] = [
|
||||
|
|
@ -102,3 +117,86 @@ describe('Tool Executor', () => {
|
|||
expect(result.success).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Tool Executor — AI policy routing', () => {
|
||||
beforeEach(async () => {
|
||||
await db.table(PROPOSALS_TABLE).clear();
|
||||
});
|
||||
|
||||
it('runs a tool directly for user actors regardless of name', async () => {
|
||||
// test_echo has no policy entry — user default is always auto
|
||||
const result = await executeTool('test_echo', { text: 'hi' });
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.message).toBe('echo: hi');
|
||||
});
|
||||
|
||||
it('stages a proposal when ai actor hits a propose-policy tool', async () => {
|
||||
const restore = setAiPolicy({ tools: { test_echo: 'propose' }, defaultForAi: 'propose' });
|
||||
try {
|
||||
const result = await executeTool('test_echo', { text: 'stage-me' }, AI);
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.message).toMatch(/Vorgeschlagen/);
|
||||
expect((result.data as { proposalId: string }).proposalId).toBeTruthy();
|
||||
|
||||
// Tool did NOT run — it was staged
|
||||
const pending = await listProposals({ status: 'pending' });
|
||||
expect(pending).toHaveLength(1);
|
||||
expect(pending[0].rationale).toBe('because');
|
||||
expect(pending[0].missionId).toBe('m-1');
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
|
||||
it('runs directly for ai actor when policy says auto', async () => {
|
||||
const restore = setAiPolicy({ tools: { test_echo: 'auto' }, defaultForAi: 'propose' });
|
||||
try {
|
||||
const result = await executeTool('test_echo', { text: 'direct' }, AI);
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.message).toBe('echo: direct');
|
||||
const pending = await listProposals({ status: 'pending' });
|
||||
expect(pending).toHaveLength(0);
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
|
||||
it('refuses with deny policy', async () => {
|
||||
const restore = setAiPolicy({ tools: { test_echo: 'deny' }, defaultForAi: 'propose' });
|
||||
try {
|
||||
const result = await executeTool('test_echo', { text: 'no' }, AI);
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.message).toMatch(/not available/);
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
|
||||
it('approval runs the staged intent with original actor attribution', async () => {
|
||||
const restore = setAiPolicy({ tools: { test_echo: 'propose' }, defaultForAi: 'propose' });
|
||||
try {
|
||||
const staged = await executeTool('test_echo', { text: 'approved' }, AI);
|
||||
const proposalId = (staged.data as { proposalId: string }).proposalId;
|
||||
|
||||
const { result, proposal } = await approveProposal(proposalId);
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.message).toBe('echo: approved');
|
||||
expect(proposal.status).toBe('approved');
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
|
||||
it('still validates parameters before staging a proposal', async () => {
|
||||
const restore = setAiPolicy({ tools: { test_echo: 'propose' }, defaultForAi: 'propose' });
|
||||
try {
|
||||
const result = await executeTool('test_echo', {}, AI);
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.message).toContain('Missing required parameter');
|
||||
const pending = await listProposals({ status: 'pending' });
|
||||
expect(pending).toHaveLength(0);
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,35 +1,126 @@
|
|||
/**
|
||||
* Tool Executor — Validates parameters and runs a tool by name.
|
||||
* Tool Executor — validates parameters, resolves AI policy, and runs or
|
||||
* stages the tool by name.
|
||||
*
|
||||
* Call paths:
|
||||
* - User action from the UI: `executeTool(name, params)` with no actor
|
||||
* → ambient `USER_ACTOR`, policy returns `auto`, tool runs directly.
|
||||
* - AI in the companion orchestrator: `executeTool(name, params, aiActor)`
|
||||
* → policy resolves per-tool; `propose` writes a Proposal and returns
|
||||
* a success result carrying the proposal id, `auto` executes, `deny`
|
||||
* refuses.
|
||||
* - Approval path: proposal store calls `executeToolRaw(name, params)`
|
||||
* under `runAsAsync(aiActor, ...)` — same validation, but no policy.
|
||||
*/
|
||||
|
||||
import { getTool } from './registry';
|
||||
import { runAsAsync, USER_ACTOR } from '../events/actor';
|
||||
import { resolvePolicy } from '../ai/policy';
|
||||
import { createProposal } from '../ai/proposals/store';
|
||||
import type { Actor } from '../events/actor';
|
||||
import type { ToolResult } from './types';
|
||||
|
||||
export async function executeTool(
|
||||
name: string,
|
||||
params: Record<string, unknown>
|
||||
params: Record<string, unknown>,
|
||||
actor?: Actor
|
||||
): Promise<ToolResult> {
|
||||
const tool = getTool(name);
|
||||
if (!tool) {
|
||||
return { success: false, message: `Unknown tool: ${name}` };
|
||||
}
|
||||
|
||||
// Validate required parameters
|
||||
for (const p of tool.parameters) {
|
||||
if (p.required && (params[p.name] === undefined || params[p.name] === null)) {
|
||||
return { success: false, message: `Missing required parameter: ${p.name}` };
|
||||
}
|
||||
const validation = validateParams(tool.parameters, params);
|
||||
if (!validation.ok) return validation.error;
|
||||
|
||||
const effectiveActor: Actor = actor ?? USER_ACTOR;
|
||||
const decision = resolvePolicy(name, effectiveActor);
|
||||
|
||||
if (decision === 'deny') {
|
||||
return {
|
||||
success: false,
|
||||
message: `Tool "${name}" is not available to AI actors under current policy`,
|
||||
};
|
||||
}
|
||||
|
||||
// Validate types
|
||||
for (const p of tool.parameters) {
|
||||
if (decision === 'propose') {
|
||||
// Only ai actors can hit `propose` — resolvePolicy short-circuits
|
||||
// user/system to `auto`. Narrow defensively in case policy is swapped.
|
||||
if (effectiveActor.kind !== 'ai') {
|
||||
return { success: false, message: `propose policy requires an AI actor` };
|
||||
}
|
||||
const proposal = await createProposal({
|
||||
actor: effectiveActor,
|
||||
intent: { kind: 'toolCall', toolName: name, params },
|
||||
rationale: effectiveActor.rationale,
|
||||
});
|
||||
return {
|
||||
success: true,
|
||||
data: { proposalId: proposal.id, status: 'pending' },
|
||||
message: `Vorgeschlagen: "${name}" wartet auf Freigabe.`,
|
||||
};
|
||||
}
|
||||
|
||||
// decision === 'auto'
|
||||
return runAsAsync(effectiveActor, () => runValidatedTool(tool, params));
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a tool bypassing AI policy. Used by the proposal approval path, which
|
||||
* already has user consent and must not bounce back into another proposal.
|
||||
*
|
||||
* Caller is responsible for installing the right actor via `runAsAsync`.
|
||||
*/
|
||||
export async function executeToolRaw(
|
||||
name: string,
|
||||
params: Record<string, unknown>
|
||||
): Promise<ToolResult> {
|
||||
const tool = getTool(name);
|
||||
if (!tool) return { success: false, message: `Unknown tool: ${name}` };
|
||||
const validation = validateParams(tool.parameters, params);
|
||||
if (!validation.ok) return validation.error;
|
||||
return runValidatedTool(tool, params);
|
||||
}
|
||||
|
||||
// ── Internals ───────────────────────────────────────────────
|
||||
|
||||
async function runValidatedTool(
|
||||
tool: { execute: (p: Record<string, unknown>) => Promise<ToolResult> },
|
||||
params: Record<string, unknown>
|
||||
): Promise<ToolResult> {
|
||||
try {
|
||||
return await tool.execute(params);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
return { success: false, message: `Tool execution failed: ${msg}` };
|
||||
}
|
||||
}
|
||||
|
||||
type ValidationOutcome = { ok: true } | { ok: false; error: ToolResult };
|
||||
|
||||
function validateParams(
|
||||
schema: { name: string; type: string; required: boolean; enum?: string[] }[],
|
||||
params: Record<string, unknown>
|
||||
): ValidationOutcome {
|
||||
for (const p of schema) {
|
||||
if (p.required && (params[p.name] === undefined || params[p.name] === null)) {
|
||||
return {
|
||||
ok: false,
|
||||
error: { success: false, message: `Missing required parameter: ${p.name}` },
|
||||
};
|
||||
}
|
||||
}
|
||||
for (const p of schema) {
|
||||
const val = params[p.name];
|
||||
if (val === undefined || val === null) continue;
|
||||
|
||||
if (p.type === 'number' && typeof val !== 'number') {
|
||||
const num = Number(val);
|
||||
if (isNaN(num)) {
|
||||
return { success: false, message: `Parameter ${p.name} must be a number` };
|
||||
return {
|
||||
ok: false,
|
||||
error: { success: false, message: `Parameter ${p.name} must be a number` },
|
||||
};
|
||||
}
|
||||
params[p.name] = num;
|
||||
}
|
||||
|
|
@ -38,16 +129,13 @@ export async function executeTool(
|
|||
}
|
||||
if (p.enum && !p.enum.includes(String(val))) {
|
||||
return {
|
||||
success: false,
|
||||
message: `Parameter ${p.name} must be one of: ${p.enum.join(', ')}`,
|
||||
ok: false,
|
||||
error: {
|
||||
success: false,
|
||||
message: `Parameter ${p.name} must be one of: ${p.enum.join(', ')}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
return await tool.execute(params);
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
return { success: false, message: `Tool execution failed: ${msg}` };
|
||||
}
|
||||
return { ok: true };
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue