feat(ai): MissionRunner — orchestrates Planner + proposal staging

Executes one iteration end-to-end: resolve Mission inputs → build the policy-filtered tool allowlist → invoke the Planner → stage each PlannedStep as a Proposal (or auto-run if policy says so) → finalize the iteration with summary + status. - `data/ai/missions/runner.ts` - `runMission(id, deps)` runs a single iteration. Planner + stageStep are injected so the Runner is unit-testable without a live LLM. - `runDueMissions(now, deps)` scans for active missions past their nextRunAt and runs each once. Safe to call on a foreground tick. - Reuses the iteration id returned by `startIteration` so `finishIteration` updates the same row (fixed a dup-id bug the tests caught). - `data/ai/missions/input-resolvers.ts` — registry: modules register a resolver at init, Runner looks up by module name. Missing resolvers degrade gracefully to "fewer inputs", never crash a run. - `data/ai/missions/available-tools.ts` — exposes only tools the AI policy rates non-`deny`. Defence-in-depth with the executor + parser. overallStatus derivation: 0 steps → 'approved' (no-op run is valid) all steps failed → 'failed' any step staged (proposal id) → 'awaiting-review' all steps ran auto → 'approved' Planner throw is caught and recorded as a failed iteration — one bad mission can't stall the queue. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-14 21:41:09 +02:00 · 2026-04-14 21:20:29 +02:00 · 2026-04-14 21:20:29 +02:00 · 1c6201be50
commit 1c6201be50
parent 72d5c708c4
4 changed files with 511 additions and 0 deletions
--- a/apps/mana/apps/web/src/lib/data/ai/missions/available-tools.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/available-tools.ts
@ -0,0 +1,30 @@
+/**
+ * Build the tool list the Planner is allowed to consider.
+ *
+ * Only tools the policy rates `auto` or `propose` are exposed — `deny` is
+ * invisible to the AI. This is defence-in-depth: even if the Planner
+ * hallucinates a denied tool name, the parser rejects it because the name
+ * isn't in the allow-set, AND the executor would refuse at runtime.
+ */
+
+import { getTools } from '../../tools/registry';
+import { resolvePolicy } from '../policy';
+import type { Actor } from '../../events/actor';
+import type { AvailableTool } from './planner/types';
+
+export function getAvailableToolsForAi(aiActor: Extract<Actor, { kind: 'ai' }>): AvailableTool[] {
+	return getTools()
+		.filter((tool) => resolvePolicy(tool.name, aiActor) !== 'deny')
+		.map((tool) => ({
+			name: tool.name,
+			module: tool.module,
+			description: tool.description,
+			parameters: tool.parameters.map((p) => ({
+				name: p.name,
+				type: p.type,
+				required: p.required,
+				description: p.description,
+				enum: p.enum,
+			})),
+		}));
+}
--- a/apps/mana/apps/web/src/lib/data/ai/missions/input-resolvers.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/input-resolvers.ts
@ -0,0 +1,62 @@
+/**
+ * Input-resolver registry.
+ *
+ * A Mission references records in other modules via {@link MissionInputRef}.
+ * The Runner needs to fetch those records and project them to plain text
+ * so the Planner can drop them into its prompt. Each module registers a
+ * resolver once on app init (`registerInputResolver`) — the Runner then
+ * looks up by module name and falls back to a no-op with a console warning
+ * if nothing's registered (keeps Missions robust against module removal).
+ *
+ * Why a registry instead of a central switch statement: cross-module
+ * imports here would couple `data/ai/` to every product module. The
+ * registry pattern lets each module own its own projection in its own
+ * module-init file.
+ */
+
+import type { MissionInputRef } from './types';
+import type { ResolvedInput } from './planner/types';
+
+export type InputResolver = (ref: MissionInputRef) => Promise<ResolvedInput | null>;
+
+const resolvers = new Map<string, InputResolver>();
+
+/** Register a resolver for a module. Idempotent — last registration wins. */
+export function registerInputResolver(moduleName: string, resolver: InputResolver): void {
+	resolvers.set(moduleName, resolver);
+}
+
+/** Remove a resolver (test helper). */
+export function unregisterInputResolver(moduleName: string): void {
+	resolvers.delete(moduleName);
+}
+
+export function getInputResolver(moduleName: string): InputResolver | undefined {
+	return resolvers.get(moduleName);
+}
+
+/**
+ * Resolve every ref a Mission declares, in parallel. Refs whose module has
+ * no resolver registered, or whose resolver returns null, are dropped with
+ * a warning — the Planner just sees fewer inputs, never crashes the run.
+ */
+export async function resolveMissionInputs(
+	refs: readonly MissionInputRef[]
+): Promise<ResolvedInput[]> {
+	const results = await Promise.all(
+		refs.map(async (ref) => {
+			const resolver = resolvers.get(ref.module);
+			if (!resolver) {
+				console.warn(`[MissionRunner] no input resolver registered for module "${ref.module}"`);
+				return null;
+			}
+			try {
+				return await resolver(ref);
+			} catch (err) {
+				console.error(`[MissionRunner] input resolver for ${ref.module} threw on ${ref.id}:`, err);
+				return null;
+			}
+		})
+	);
+	return results.filter((r): r is ResolvedInput => r !== null);
+}
--- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts
@ -0,0 +1,216 @@
+import 'fake-indexeddb/auto';
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+
+vi.mock('$lib/stores/funnel-tracking', () => ({ trackFirstContent: vi.fn() }));
+vi.mock('$lib/triggers/registry', () => ({ fire: vi.fn() }));
+vi.mock('$lib/triggers/inline-suggest', () => ({
+	checkInlineSuggestion: vi.fn().mockResolvedValue(null),
+}));
+
+import { db } from '../../database';
+import { registerTools } from '../../tools/registry';
+import { setAiPolicy } from '../policy';
+import { createMission, getMission, pauseMission } from './store';
+import { runMission, runDueMissions } from './runner';
+import {
+	registerInputResolver,
+	unregisterInputResolver,
+	resolveMissionInputs,
+} from './input-resolvers';
+import { MISSIONS_TABLE } from './types';
+import type { AiPlanInput, AiPlanOutput } from './planner/types';
+
+let executed: { name: string; params: Record<string, unknown> }[] = [];
+
+registerTools([
+	{
+		name: 'runner_test_stage',
+		module: 'runnerTest',
+		description: 'proposes',
+		parameters: [{ name: 'val', type: 'string', required: true, description: 'v' }],
+		async execute(params) {
+			executed.push({ name: 'runner_test_stage', params: { ...params } });
+			return { success: true, message: 'ok' };
+		},
+	},
+]);
+
+beforeEach(async () => {
+	executed = [];
+	await db.table(MISSIONS_TABLE).clear();
+	await db.table('pendingProposals').clear();
+});
+
+describe('runMission', () => {
+	it('runs the planner, stages proposals, and marks the iteration awaiting-review', async () => {
+		const restore = setAiPolicy({
+			tools: { runner_test_stage: 'propose' },
+			defaultForAi: 'propose',
+		});
+		try {
+			const m = await createMission({
+				title: 'Test mission',
+				conceptMarkdown: '',
+				objective: 'test',
+				cadence: { kind: 'manual' },
+			});
+			const planStub: AiPlanOutput = {
+				summary: 'Staged a test step',
+				steps: [
+					{
+						summary: 'Do a thing',
+						toolName: 'runner_test_stage',
+						params: { val: 'hello' },
+						rationale: 'because test',
+					},
+				],
+			};
+			const result = await runMission(m.id, {
+				plan: async (_input: AiPlanInput) => planStub,
+			});
+
+			expect(result.plannedSteps).toBe(1);
+			expect(result.stagedSteps).toBe(1);
+			expect(result.iteration.overallStatus).toBe('awaiting-review');
+
+			const after = await getMission(m.id);
+			expect(after?.iterations).toHaveLength(1);
+			expect(after?.iterations[0].plan[0].proposalId).toBeTruthy();
+			expect(after?.iterations[0].plan[0].status).toBe('staged');
+
+			// Tool did NOT execute — proposal was staged
+			expect(executed).toHaveLength(0);
+		} finally {
+			restore();
+		}
+	});
+
+	it('passes the built AiPlanInput to the planner with mission + tool allowlist', async () => {
+		const restore = setAiPolicy({
+			tools: { runner_test_stage: 'propose' },
+			defaultForAi: 'deny',
+		});
+		try {
+			const m = await createMission({
+				title: 'Test',
+				conceptMarkdown: '',
+				objective: 'test',
+				cadence: { kind: 'manual' },
+			});
+			let captured: AiPlanInput | null = null;
+			await runMission(m.id, {
+				plan: async (input) => {
+					captured = input;
+					return { summary: '', steps: [] };
+				},
+			});
+			expect(captured).toBeTruthy();
+			expect(captured!.mission.id).toBe(m.id);
+			const allowedNames = captured!.availableTools.map((t) => t.name);
+			expect(allowedNames).toContain('runner_test_stage');
+		} finally {
+			restore();
+		}
+	});
+
+	it('marks an iteration failed when the planner throws', async () => {
+		const m = await createMission({
+			title: 'x',
+			conceptMarkdown: '',
+			objective: 'x',
+			cadence: { kind: 'manual' },
+		});
+		const result = await runMission(m.id, {
+			plan: async () => {
+				throw new Error('planner down');
+			},
+		});
+		expect(result.iteration.overallStatus).toBe('failed');
+		const after = await getMission(m.id);
+		expect(after?.iterations[0].overallStatus).toBe('failed');
+		expect(after?.iterations[0].summary).toContain('planner down');
+	});
+
+	it('produces an approved iteration when planner returns zero steps', async () => {
+		const m = await createMission({
+			title: 'x',
+			conceptMarkdown: '',
+			objective: 'x',
+			cadence: { kind: 'manual' },
+		});
+		const result = await runMission(m.id, {
+			plan: async () => ({ summary: 'nothing needed', steps: [] }),
+		});
+		expect(result.iteration.overallStatus).toBe('approved');
+	});
+
+	it('refuses to run a paused mission', async () => {
+		const m = await createMission({
+			title: 'x',
+			conceptMarkdown: '',
+			objective: 'x',
+			cadence: { kind: 'manual' },
+		});
+		await pauseMission(m.id);
+		await expect(
+			runMission(m.id, { plan: async () => ({ summary: '', steps: [] }) })
+		).rejects.toThrow(/paused/);
+	});
+});
+
+describe('runDueMissions', () => {
+	it('runs only active missions whose nextRunAt has passed', async () => {
+		const a = await createMission({
+			title: 'due',
+			conceptMarkdown: '',
+			objective: 'x',
+			cadence: { kind: 'interval', everyMinutes: 5 },
+		});
+		const b = await createMission({
+			title: 'future',
+			conceptMarkdown: '',
+			objective: 'x',
+			cadence: { kind: 'interval', everyMinutes: 5 },
+		});
+		// Force `a` into the past, leave `b` in the future
+		await db.table(MISSIONS_TABLE).update(a.id, { nextRunAt: '2020-01-01T00:00:00.000Z' });
+
+		const runs: string[] = [];
+		await runDueMissions(new Date(), {
+			plan: async (input) => {
+				runs.push(input.mission.id);
+				return { summary: '', steps: [] };
+			},
+		});
+		expect(runs).toEqual([a.id]);
+		expect(runs).not.toContain(b.id);
+	});
+});
+
+describe('resolveMissionInputs', () => {
+	it('resolves via registered resolvers and skips missing modules', async () => {
+		registerInputResolver('testmod', async (ref) => ({
+			id: ref.id,
+			module: 'testmod',
+			table: ref.table,
+			title: 'T',
+			content: `content for ${ref.id}`,
+		}));
+		try {
+			const refs = [
+				{ module: 'testmod', table: 't', id: 'a' },
+				{ module: 'nope', table: 't', id: 'b' },
+			];
+			const resolved = await resolveMissionInputs(refs);
+			expect(resolved).toHaveLength(1);
+			expect(resolved[0].content).toContain('a');
+		} finally {
+			unregisterInputResolver('testmod');
+		}
+	});
+
+	it('returns empty array when nothing is registered', async () => {
+		const r = await resolveMissionInputs([{ module: 'unknown', table: 't', id: 'x' }]);
+		expect(r).toEqual([]);
+	});
+});
--- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
@ -0,0 +1,203 @@
+/**
+ * MissionRunner — executes one iteration of a Mission.
+ *
+ *   load mission
+ *     ↓
+ *   resolve inputs via registered resolvers
+ *     ↓
+ *   build available-tool list (policy-filtered)
+ *     ↓
+ *   call planner (LLM) → AiPlanOutput
+ *     ↓
+ *   for each step: stage a Proposal under the AI actor
+ *     ↓
+ *   finishIteration(summary, overallStatus, plan-with-proposal-ids)
+ *
+ * Planner + proposal-staging are injected so the Runner is unit-testable
+ * without a live LLM or Dexie hooks. Default implementations call the
+ * shared LlmOrchestrator / `executeTool(...)` respectively; production
+ * code passes those in via the setup module.
+ */
+
+import { getMission, startIteration, finishIteration } from './store';
+import { resolveMissionInputs } from './input-resolvers';
+import { getAvailableToolsForAi } from './available-tools';
+import { executeTool } from '../../tools/executor';
+import type { Actor } from '../../events/actor';
+import type { Mission, MissionIteration, PlanStep } from './types';
+import type { AiPlanInput, AiPlanOutput, PlannedStep } from './planner/types';
+
+export interface MissionRunnerDeps {
+	/** Invoke the Planner LLM task with the fully-built input. */
+	plan: (input: AiPlanInput) => Promise<AiPlanOutput>;
+	/** Stage a single planned step as a Proposal. Returns the proposal id on success. */
+	stageStep?: (step: PlannedStep, aiActor: Extract<Actor, { kind: 'ai' }>) => Promise<StageOutcome>;
+}
+
+export type StageOutcome =
+	| { readonly ok: true; readonly proposalId: string }
+	| { readonly ok: false; readonly error: string };
+
+/** Default step-staging implementation: policy-gated executor under AI actor. */
+export const defaultStageStep: Required<MissionRunnerDeps>['stageStep'] = async (step, aiActor) => {
+	const stepActor: Extract<Actor, { kind: 'ai' }> = {
+		...aiActor,
+		// Per-step rationale wins over the mission-wide one so the review UI
+		// shows *this step's* reasoning.
+		rationale: step.rationale || aiActor.rationale,
+	};
+	const result = await executeTool(step.toolName, step.params, stepActor);
+	if (!result.success) {
+		return { ok: false, error: result.message };
+	}
+	const data = result.data as { proposalId?: string } | undefined;
+	if (data?.proposalId) return { ok: true, proposalId: data.proposalId };
+	// Policy resolved to 'auto' — no proposal row was created, the tool
+	// ran directly. Treat as ok but without a proposal id to thread back.
+	return { ok: true, proposalId: '' };
+};
+
+export interface RunMissionResult {
+	readonly iteration: MissionIteration;
+	readonly plannedSteps: number;
+	readonly stagedSteps: number;
+	readonly failedSteps: number;
+}
+
+/** Run one iteration of the given mission. */
+export async function runMission(
+	missionId: string,
+	deps: MissionRunnerDeps
+): Promise<RunMissionResult> {
+	const mission = await getMission(missionId);
+	if (!mission) throw new Error(`Mission not found: ${missionId}`);
+	if (mission.state !== 'active') {
+		throw new Error(`Mission ${missionId} is ${mission.state}, cannot run`);
+	}
+
+	// Start the iteration with an empty plan so it's visible in the UI as "running".
+	// Use the id the store generates so finishIteration updates the same row.
+	const startedIteration = await startIteration(mission.id, { plan: [] });
+	const iterationId = startedIteration.id;
+	const aiActor: Extract<Actor, { kind: 'ai' }> = {
+		kind: 'ai',
+		missionId: mission.id,
+		iterationId,
+		rationale: mission.objective,
+	};
+
+	// Gather context
+	const resolvedInputs = await resolveMissionInputs(mission.inputs);
+	const availableTools = getAvailableToolsForAi(aiActor);
+
+	// Ask the planner
+	let plan: AiPlanOutput;
+	try {
+		plan = await deps.plan({ mission, resolvedInputs, availableTools });
+	} catch (err) {
+		const msg = err instanceof Error ? err.message : String(err);
+		await finishIteration(mission.id, iterationId, {
+			summary: `Planner failed: ${msg}`,
+			overallStatus: 'failed',
+		});
+		return emptyResult(mission, iterationId, 'failed', msg);
+	}
+
+	// Stage each planned step as a Proposal (or auto-execute if policy says so).
+	const stage = deps.stageStep ?? defaultStageStep;
+	const recordedSteps: PlanStep[] = [];
+	let stagedCount = 0;
+	let failedCount = 0;
+
+	for (const [i, ps] of plan.steps.entries()) {
+		const outcome = await stage(ps, aiActor);
+		if (outcome.ok) {
+			stagedCount++;
+			recordedSteps.push({
+				id: `${iterationId}-${i}`,
+				summary: ps.summary,
+				intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
+				proposalId: outcome.proposalId || undefined,
+				status: outcome.proposalId ? 'staged' : 'approved',
+			});
+		} else {
+			failedCount++;
+			recordedSteps.push({
+				id: `${iterationId}-${i}`,
+				summary: ps.summary,
+				intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
+				status: 'failed',
+			});
+		}
+	}
+
+	const overallStatus: MissionIteration['overallStatus'] =
+		plan.steps.length === 0
+			? 'approved' // nothing to do is a valid outcome
+			: failedCount === plan.steps.length
+				? 'failed'
+				: stagedCount > 0
+					? 'awaiting-review'
+					: 'approved';
+
+	await finishIteration(mission.id, iterationId, {
+		summary: plan.summary,
+		overallStatus,
+		plan: recordedSteps,
+	});
+
+	return {
+		iteration: {
+			id: iterationId,
+			startedAt: new Date().toISOString(),
+			plan: recordedSteps,
+			summary: plan.summary,
+			overallStatus,
+		},
+		plannedSteps: plan.steps.length,
+		stagedSteps: stagedCount,
+		failedSteps: failedCount,
+	};
+}
+
+function emptyResult(
+	_mission: Mission,
+	iterationId: string,
+	status: MissionIteration['overallStatus'],
+	summary: string
+): RunMissionResult {
+	return {
+		iteration: {
+			id: iterationId,
+			startedAt: new Date().toISOString(),
+			plan: [],
+			summary,
+			overallStatus: status,
+		},
+		plannedSteps: 0,
+		stagedSteps: 0,
+		failedSteps: 0,
+	};
+}
+
+/**
+ * Scan all active missions whose `nextRunAt` has passed and run them once
+ * each. Used by the foreground tick that wires this into `+layout.svelte`.
+ * Safe to call concurrently — each mission run is independent.
+ */
+export async function runDueMissions(
+	now: Date,
+	deps: MissionRunnerDeps
+): Promise<RunMissionResult[]> {
+	const { listMissions } = await import('./store');
+	const due = await listMissions({ dueBefore: now.toISOString() });
+	const results: RunMissionResult[] = [];
+	for (const m of due) {
+		try {
+			results.push(await runMission(m.id, deps));
+		} catch (err) {
+			console.error(`[MissionRunner] mission ${m.id} run threw:`, err);
+		}
+	}
+	return results;
+}