feat(ai): MissionRunner — orchestrates Planner + proposal staging

Executes one iteration end-to-end: resolve Mission inputs → build the
policy-filtered tool allowlist → invoke the Planner → stage each
PlannedStep as a Proposal (or auto-run if policy says so) → finalize
the iteration with summary + status.

- `data/ai/missions/runner.ts`
  - `runMission(id, deps)` runs a single iteration. Planner + stageStep
    are injected so the Runner is unit-testable without a live LLM.
  - `runDueMissions(now, deps)` scans for active missions past their
    nextRunAt and runs each once. Safe to call on a foreground tick.
  - Reuses the iteration id returned by `startIteration` so
    `finishIteration` updates the same row (fixed a dup-id bug the
    tests caught).
- `data/ai/missions/input-resolvers.ts` — registry: modules register a
  resolver at init, Runner looks up by module name. Missing resolvers
  degrade gracefully to "fewer inputs", never crash a run.
- `data/ai/missions/available-tools.ts` — exposes only tools the AI
  policy rates non-`deny`. Defence-in-depth with the executor + parser.

overallStatus derivation:
  0 steps                         → 'approved'  (no-op run is valid)
  all steps failed                → 'failed'
  any step staged (proposal id)   → 'awaiting-review'
  all steps ran auto              → 'approved'

Planner throw is caught and recorded as a failed iteration — one bad
mission can't stall the queue.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-14 21:20:29 +02:00
parent 72d5c708c4
commit 1c6201be50
4 changed files with 511 additions and 0 deletions

View file

@ -0,0 +1,30 @@
/**
* Build the tool list the Planner is allowed to consider.
*
* Only tools the policy rates `auto` or `propose` are exposed `deny` is
* invisible to the AI. This is defence-in-depth: even if the Planner
* hallucinates a denied tool name, the parser rejects it because the name
* isn't in the allow-set, AND the executor would refuse at runtime.
*/
import { getTools } from '../../tools/registry';
import { resolvePolicy } from '../policy';
import type { Actor } from '../../events/actor';
import type { AvailableTool } from './planner/types';
export function getAvailableToolsForAi(aiActor: Extract<Actor, { kind: 'ai' }>): AvailableTool[] {
return getTools()
.filter((tool) => resolvePolicy(tool.name, aiActor) !== 'deny')
.map((tool) => ({
name: tool.name,
module: tool.module,
description: tool.description,
parameters: tool.parameters.map((p) => ({
name: p.name,
type: p.type,
required: p.required,
description: p.description,
enum: p.enum,
})),
}));
}

View file

@ -0,0 +1,62 @@
/**
* Input-resolver registry.
*
* A Mission references records in other modules via {@link MissionInputRef}.
* The Runner needs to fetch those records and project them to plain text
* so the Planner can drop them into its prompt. Each module registers a
* resolver once on app init (`registerInputResolver`) the Runner then
* looks up by module name and falls back to a no-op with a console warning
* if nothing's registered (keeps Missions robust against module removal).
*
* Why a registry instead of a central switch statement: cross-module
* imports here would couple `data/ai/` to every product module. The
* registry pattern lets each module own its own projection in its own
* module-init file.
*/
import type { MissionInputRef } from './types';
import type { ResolvedInput } from './planner/types';
export type InputResolver = (ref: MissionInputRef) => Promise<ResolvedInput | null>;
const resolvers = new Map<string, InputResolver>();
/** Register a resolver for a module. Idempotent — last registration wins. */
export function registerInputResolver(moduleName: string, resolver: InputResolver): void {
resolvers.set(moduleName, resolver);
}
/** Remove a resolver (test helper). */
export function unregisterInputResolver(moduleName: string): void {
resolvers.delete(moduleName);
}
export function getInputResolver(moduleName: string): InputResolver | undefined {
return resolvers.get(moduleName);
}
/**
* Resolve every ref a Mission declares, in parallel. Refs whose module has
* no resolver registered, or whose resolver returns null, are dropped with
* a warning the Planner just sees fewer inputs, never crashes the run.
*/
export async function resolveMissionInputs(
refs: readonly MissionInputRef[]
): Promise<ResolvedInput[]> {
const results = await Promise.all(
refs.map(async (ref) => {
const resolver = resolvers.get(ref.module);
if (!resolver) {
console.warn(`[MissionRunner] no input resolver registered for module "${ref.module}"`);
return null;
}
try {
return await resolver(ref);
} catch (err) {
console.error(`[MissionRunner] input resolver for ${ref.module} threw on ${ref.id}:`, err);
return null;
}
})
);
return results.filter((r): r is ResolvedInput => r !== null);
}

View file

@ -0,0 +1,216 @@
import 'fake-indexeddb/auto';
import { describe, it, expect, beforeEach, vi } from 'vitest';
vi.mock('$lib/stores/funnel-tracking', () => ({ trackFirstContent: vi.fn() }));
vi.mock('$lib/triggers/registry', () => ({ fire: vi.fn() }));
vi.mock('$lib/triggers/inline-suggest', () => ({
checkInlineSuggestion: vi.fn().mockResolvedValue(null),
}));
import { db } from '../../database';
import { registerTools } from '../../tools/registry';
import { setAiPolicy } from '../policy';
import { createMission, getMission, pauseMission } from './store';
import { runMission, runDueMissions } from './runner';
import {
registerInputResolver,
unregisterInputResolver,
resolveMissionInputs,
} from './input-resolvers';
import { MISSIONS_TABLE } from './types';
import type { AiPlanInput, AiPlanOutput } from './planner/types';
let executed: { name: string; params: Record<string, unknown> }[] = [];
registerTools([
{
name: 'runner_test_stage',
module: 'runnerTest',
description: 'proposes',
parameters: [{ name: 'val', type: 'string', required: true, description: 'v' }],
async execute(params) {
executed.push({ name: 'runner_test_stage', params: { ...params } });
return { success: true, message: 'ok' };
},
},
]);
beforeEach(async () => {
executed = [];
await db.table(MISSIONS_TABLE).clear();
await db.table('pendingProposals').clear();
});
describe('runMission', () => {
it('runs the planner, stages proposals, and marks the iteration awaiting-review', async () => {
const restore = setAiPolicy({
tools: { runner_test_stage: 'propose' },
defaultForAi: 'propose',
});
try {
const m = await createMission({
title: 'Test mission',
conceptMarkdown: '',
objective: 'test',
cadence: { kind: 'manual' },
});
const planStub: AiPlanOutput = {
summary: 'Staged a test step',
steps: [
{
summary: 'Do a thing',
toolName: 'runner_test_stage',
params: { val: 'hello' },
rationale: 'because test',
},
],
};
const result = await runMission(m.id, {
plan: async (_input: AiPlanInput) => planStub,
});
expect(result.plannedSteps).toBe(1);
expect(result.stagedSteps).toBe(1);
expect(result.iteration.overallStatus).toBe('awaiting-review');
const after = await getMission(m.id);
expect(after?.iterations).toHaveLength(1);
expect(after?.iterations[0].plan[0].proposalId).toBeTruthy();
expect(after?.iterations[0].plan[0].status).toBe('staged');
// Tool did NOT execute — proposal was staged
expect(executed).toHaveLength(0);
} finally {
restore();
}
});
it('passes the built AiPlanInput to the planner with mission + tool allowlist', async () => {
const restore = setAiPolicy({
tools: { runner_test_stage: 'propose' },
defaultForAi: 'deny',
});
try {
const m = await createMission({
title: 'Test',
conceptMarkdown: '',
objective: 'test',
cadence: { kind: 'manual' },
});
let captured: AiPlanInput | null = null;
await runMission(m.id, {
plan: async (input) => {
captured = input;
return { summary: '', steps: [] };
},
});
expect(captured).toBeTruthy();
expect(captured!.mission.id).toBe(m.id);
const allowedNames = captured!.availableTools.map((t) => t.name);
expect(allowedNames).toContain('runner_test_stage');
} finally {
restore();
}
});
it('marks an iteration failed when the planner throws', async () => {
const m = await createMission({
title: 'x',
conceptMarkdown: '',
objective: 'x',
cadence: { kind: 'manual' },
});
const result = await runMission(m.id, {
plan: async () => {
throw new Error('planner down');
},
});
expect(result.iteration.overallStatus).toBe('failed');
const after = await getMission(m.id);
expect(after?.iterations[0].overallStatus).toBe('failed');
expect(after?.iterations[0].summary).toContain('planner down');
});
it('produces an approved iteration when planner returns zero steps', async () => {
const m = await createMission({
title: 'x',
conceptMarkdown: '',
objective: 'x',
cadence: { kind: 'manual' },
});
const result = await runMission(m.id, {
plan: async () => ({ summary: 'nothing needed', steps: [] }),
});
expect(result.iteration.overallStatus).toBe('approved');
});
it('refuses to run a paused mission', async () => {
const m = await createMission({
title: 'x',
conceptMarkdown: '',
objective: 'x',
cadence: { kind: 'manual' },
});
await pauseMission(m.id);
await expect(
runMission(m.id, { plan: async () => ({ summary: '', steps: [] }) })
).rejects.toThrow(/paused/);
});
});
describe('runDueMissions', () => {
it('runs only active missions whose nextRunAt has passed', async () => {
const a = await createMission({
title: 'due',
conceptMarkdown: '',
objective: 'x',
cadence: { kind: 'interval', everyMinutes: 5 },
});
const b = await createMission({
title: 'future',
conceptMarkdown: '',
objective: 'x',
cadence: { kind: 'interval', everyMinutes: 5 },
});
// Force `a` into the past, leave `b` in the future
await db.table(MISSIONS_TABLE).update(a.id, { nextRunAt: '2020-01-01T00:00:00.000Z' });
const runs: string[] = [];
await runDueMissions(new Date(), {
plan: async (input) => {
runs.push(input.mission.id);
return { summary: '', steps: [] };
},
});
expect(runs).toEqual([a.id]);
expect(runs).not.toContain(b.id);
});
});
describe('resolveMissionInputs', () => {
it('resolves via registered resolvers and skips missing modules', async () => {
registerInputResolver('testmod', async (ref) => ({
id: ref.id,
module: 'testmod',
table: ref.table,
title: 'T',
content: `content for ${ref.id}`,
}));
try {
const refs = [
{ module: 'testmod', table: 't', id: 'a' },
{ module: 'nope', table: 't', id: 'b' },
];
const resolved = await resolveMissionInputs(refs);
expect(resolved).toHaveLength(1);
expect(resolved[0].content).toContain('a');
} finally {
unregisterInputResolver('testmod');
}
});
it('returns empty array when nothing is registered', async () => {
const r = await resolveMissionInputs([{ module: 'unknown', table: 't', id: 'x' }]);
expect(r).toEqual([]);
});
});

View file

@ -0,0 +1,203 @@
/**
* MissionRunner executes one iteration of a Mission.
*
* load mission
*
* resolve inputs via registered resolvers
*
* build available-tool list (policy-filtered)
*
* call planner (LLM) AiPlanOutput
*
* for each step: stage a Proposal under the AI actor
*
* finishIteration(summary, overallStatus, plan-with-proposal-ids)
*
* Planner + proposal-staging are injected so the Runner is unit-testable
* without a live LLM or Dexie hooks. Default implementations call the
* shared LlmOrchestrator / `executeTool(...)` respectively; production
* code passes those in via the setup module.
*/
import { getMission, startIteration, finishIteration } from './store';
import { resolveMissionInputs } from './input-resolvers';
import { getAvailableToolsForAi } from './available-tools';
import { executeTool } from '../../tools/executor';
import type { Actor } from '../../events/actor';
import type { Mission, MissionIteration, PlanStep } from './types';
import type { AiPlanInput, AiPlanOutput, PlannedStep } from './planner/types';
export interface MissionRunnerDeps {
/** Invoke the Planner LLM task with the fully-built input. */
plan: (input: AiPlanInput) => Promise<AiPlanOutput>;
/** Stage a single planned step as a Proposal. Returns the proposal id on success. */
stageStep?: (step: PlannedStep, aiActor: Extract<Actor, { kind: 'ai' }>) => Promise<StageOutcome>;
}
export type StageOutcome =
| { readonly ok: true; readonly proposalId: string }
| { readonly ok: false; readonly error: string };
/** Default step-staging implementation: policy-gated executor under AI actor. */
export const defaultStageStep: Required<MissionRunnerDeps>['stageStep'] = async (step, aiActor) => {
const stepActor: Extract<Actor, { kind: 'ai' }> = {
...aiActor,
// Per-step rationale wins over the mission-wide one so the review UI
// shows *this step's* reasoning.
rationale: step.rationale || aiActor.rationale,
};
const result = await executeTool(step.toolName, step.params, stepActor);
if (!result.success) {
return { ok: false, error: result.message };
}
const data = result.data as { proposalId?: string } | undefined;
if (data?.proposalId) return { ok: true, proposalId: data.proposalId };
// Policy resolved to 'auto' — no proposal row was created, the tool
// ran directly. Treat as ok but without a proposal id to thread back.
return { ok: true, proposalId: '' };
};
export interface RunMissionResult {
readonly iteration: MissionIteration;
readonly plannedSteps: number;
readonly stagedSteps: number;
readonly failedSteps: number;
}
/** Run one iteration of the given mission. */
export async function runMission(
missionId: string,
deps: MissionRunnerDeps
): Promise<RunMissionResult> {
const mission = await getMission(missionId);
if (!mission) throw new Error(`Mission not found: ${missionId}`);
if (mission.state !== 'active') {
throw new Error(`Mission ${missionId} is ${mission.state}, cannot run`);
}
// Start the iteration with an empty plan so it's visible in the UI as "running".
// Use the id the store generates so finishIteration updates the same row.
const startedIteration = await startIteration(mission.id, { plan: [] });
const iterationId = startedIteration.id;
const aiActor: Extract<Actor, { kind: 'ai' }> = {
kind: 'ai',
missionId: mission.id,
iterationId,
rationale: mission.objective,
};
// Gather context
const resolvedInputs = await resolveMissionInputs(mission.inputs);
const availableTools = getAvailableToolsForAi(aiActor);
// Ask the planner
let plan: AiPlanOutput;
try {
plan = await deps.plan({ mission, resolvedInputs, availableTools });
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
await finishIteration(mission.id, iterationId, {
summary: `Planner failed: ${msg}`,
overallStatus: 'failed',
});
return emptyResult(mission, iterationId, 'failed', msg);
}
// Stage each planned step as a Proposal (or auto-execute if policy says so).
const stage = deps.stageStep ?? defaultStageStep;
const recordedSteps: PlanStep[] = [];
let stagedCount = 0;
let failedCount = 0;
for (const [i, ps] of plan.steps.entries()) {
const outcome = await stage(ps, aiActor);
if (outcome.ok) {
stagedCount++;
recordedSteps.push({
id: `${iterationId}-${i}`,
summary: ps.summary,
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
proposalId: outcome.proposalId || undefined,
status: outcome.proposalId ? 'staged' : 'approved',
});
} else {
failedCount++;
recordedSteps.push({
id: `${iterationId}-${i}`,
summary: ps.summary,
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
status: 'failed',
});
}
}
const overallStatus: MissionIteration['overallStatus'] =
plan.steps.length === 0
? 'approved' // nothing to do is a valid outcome
: failedCount === plan.steps.length
? 'failed'
: stagedCount > 0
? 'awaiting-review'
: 'approved';
await finishIteration(mission.id, iterationId, {
summary: plan.summary,
overallStatus,
plan: recordedSteps,
});
return {
iteration: {
id: iterationId,
startedAt: new Date().toISOString(),
plan: recordedSteps,
summary: plan.summary,
overallStatus,
},
plannedSteps: plan.steps.length,
stagedSteps: stagedCount,
failedSteps: failedCount,
};
}
function emptyResult(
_mission: Mission,
iterationId: string,
status: MissionIteration['overallStatus'],
summary: string
): RunMissionResult {
return {
iteration: {
id: iterationId,
startedAt: new Date().toISOString(),
plan: [],
summary,
overallStatus: status,
},
plannedSteps: 0,
stagedSteps: 0,
failedSteps: 0,
};
}
/**
* Scan all active missions whose `nextRunAt` has passed and run them once
* each. Used by the foreground tick that wires this into `+layout.svelte`.
* Safe to call concurrently each mission run is independent.
*/
export async function runDueMissions(
now: Date,
deps: MissionRunnerDeps
): Promise<RunMissionResult[]> {
const { listMissions } = await import('./store');
const due = await listMissions({ dueBefore: now.toISOString() });
const results: RunMissionResult[] = [];
for (const m of due) {
try {
results.push(await runMission(m.id, deps));
} catch (err) {
console.error(`[MissionRunner] mission ${m.id} run threw:`, err);
}
}
return results;
}