mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 21:41:09 +02:00
feat(ai): MissionRunner — orchestrates Planner + proposal staging
Executes one iteration end-to-end: resolve Mission inputs → build the
policy-filtered tool allowlist → invoke the Planner → stage each
PlannedStep as a Proposal (or auto-run if policy says so) → finalize
the iteration with summary + status.
- `data/ai/missions/runner.ts`
- `runMission(id, deps)` runs a single iteration. Planner + stageStep
are injected so the Runner is unit-testable without a live LLM.
- `runDueMissions(now, deps)` scans for active missions past their
nextRunAt and runs each once. Safe to call on a foreground tick.
- Reuses the iteration id returned by `startIteration` so
`finishIteration` updates the same row (fixed a dup-id bug the
tests caught).
- `data/ai/missions/input-resolvers.ts` — registry: modules register a
resolver at init, Runner looks up by module name. Missing resolvers
degrade gracefully to "fewer inputs", never crash a run.
- `data/ai/missions/available-tools.ts` — exposes only tools the AI
policy rates non-`deny`. Defence-in-depth with the executor + parser.
overallStatus derivation:
0 steps → 'approved' (no-op run is valid)
all steps failed → 'failed'
any step staged (proposal id) → 'awaiting-review'
all steps ran auto → 'approved'
Planner throw is caught and recorded as a failed iteration — one bad
mission can't stall the queue.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
72d5c708c4
commit
1c6201be50
4 changed files with 511 additions and 0 deletions
|
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
* Build the tool list the Planner is allowed to consider.
|
||||
*
|
||||
* Only tools the policy rates `auto` or `propose` are exposed — `deny` is
|
||||
* invisible to the AI. This is defence-in-depth: even if the Planner
|
||||
* hallucinates a denied tool name, the parser rejects it because the name
|
||||
* isn't in the allow-set, AND the executor would refuse at runtime.
|
||||
*/
|
||||
|
||||
import { getTools } from '../../tools/registry';
|
||||
import { resolvePolicy } from '../policy';
|
||||
import type { Actor } from '../../events/actor';
|
||||
import type { AvailableTool } from './planner/types';
|
||||
|
||||
export function getAvailableToolsForAi(aiActor: Extract<Actor, { kind: 'ai' }>): AvailableTool[] {
|
||||
return getTools()
|
||||
.filter((tool) => resolvePolicy(tool.name, aiActor) !== 'deny')
|
||||
.map((tool) => ({
|
||||
name: tool.name,
|
||||
module: tool.module,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters.map((p) => ({
|
||||
name: p.name,
|
||||
type: p.type,
|
||||
required: p.required,
|
||||
description: p.description,
|
||||
enum: p.enum,
|
||||
})),
|
||||
}));
|
||||
}
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Input-resolver registry.
|
||||
*
|
||||
* A Mission references records in other modules via {@link MissionInputRef}.
|
||||
* The Runner needs to fetch those records and project them to plain text
|
||||
* so the Planner can drop them into its prompt. Each module registers a
|
||||
* resolver once on app init (`registerInputResolver`) — the Runner then
|
||||
* looks up by module name and falls back to a no-op with a console warning
|
||||
* if nothing's registered (keeps Missions robust against module removal).
|
||||
*
|
||||
* Why a registry instead of a central switch statement: cross-module
|
||||
* imports here would couple `data/ai/` to every product module. The
|
||||
* registry pattern lets each module own its own projection in its own
|
||||
* module-init file.
|
||||
*/
|
||||
|
||||
import type { MissionInputRef } from './types';
|
||||
import type { ResolvedInput } from './planner/types';
|
||||
|
||||
export type InputResolver = (ref: MissionInputRef) => Promise<ResolvedInput | null>;
|
||||
|
||||
const resolvers = new Map<string, InputResolver>();
|
||||
|
||||
/** Register a resolver for a module. Idempotent — last registration wins. */
|
||||
export function registerInputResolver(moduleName: string, resolver: InputResolver): void {
|
||||
resolvers.set(moduleName, resolver);
|
||||
}
|
||||
|
||||
/** Remove a resolver (test helper). */
|
||||
export function unregisterInputResolver(moduleName: string): void {
|
||||
resolvers.delete(moduleName);
|
||||
}
|
||||
|
||||
export function getInputResolver(moduleName: string): InputResolver | undefined {
|
||||
return resolvers.get(moduleName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve every ref a Mission declares, in parallel. Refs whose module has
|
||||
* no resolver registered, or whose resolver returns null, are dropped with
|
||||
* a warning — the Planner just sees fewer inputs, never crashes the run.
|
||||
*/
|
||||
export async function resolveMissionInputs(
|
||||
refs: readonly MissionInputRef[]
|
||||
): Promise<ResolvedInput[]> {
|
||||
const results = await Promise.all(
|
||||
refs.map(async (ref) => {
|
||||
const resolver = resolvers.get(ref.module);
|
||||
if (!resolver) {
|
||||
console.warn(`[MissionRunner] no input resolver registered for module "${ref.module}"`);
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return await resolver(ref);
|
||||
} catch (err) {
|
||||
console.error(`[MissionRunner] input resolver for ${ref.module} threw on ${ref.id}:`, err);
|
||||
return null;
|
||||
}
|
||||
})
|
||||
);
|
||||
return results.filter((r): r is ResolvedInput => r !== null);
|
||||
}
|
||||
216
apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts
Normal file
216
apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
import 'fake-indexeddb/auto';
|
||||
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
||||
|
||||
vi.mock('$lib/stores/funnel-tracking', () => ({ trackFirstContent: vi.fn() }));
|
||||
vi.mock('$lib/triggers/registry', () => ({ fire: vi.fn() }));
|
||||
vi.mock('$lib/triggers/inline-suggest', () => ({
|
||||
checkInlineSuggestion: vi.fn().mockResolvedValue(null),
|
||||
}));
|
||||
|
||||
import { db } from '../../database';
|
||||
import { registerTools } from '../../tools/registry';
|
||||
import { setAiPolicy } from '../policy';
|
||||
import { createMission, getMission, pauseMission } from './store';
|
||||
import { runMission, runDueMissions } from './runner';
|
||||
import {
|
||||
registerInputResolver,
|
||||
unregisterInputResolver,
|
||||
resolveMissionInputs,
|
||||
} from './input-resolvers';
|
||||
import { MISSIONS_TABLE } from './types';
|
||||
import type { AiPlanInput, AiPlanOutput } from './planner/types';
|
||||
|
||||
let executed: { name: string; params: Record<string, unknown> }[] = [];
|
||||
|
||||
registerTools([
|
||||
{
|
||||
name: 'runner_test_stage',
|
||||
module: 'runnerTest',
|
||||
description: 'proposes',
|
||||
parameters: [{ name: 'val', type: 'string', required: true, description: 'v' }],
|
||||
async execute(params) {
|
||||
executed.push({ name: 'runner_test_stage', params: { ...params } });
|
||||
return { success: true, message: 'ok' };
|
||||
},
|
||||
},
|
||||
]);
|
||||
|
||||
beforeEach(async () => {
|
||||
executed = [];
|
||||
await db.table(MISSIONS_TABLE).clear();
|
||||
await db.table('pendingProposals').clear();
|
||||
});
|
||||
|
||||
describe('runMission', () => {
|
||||
it('runs the planner, stages proposals, and marks the iteration awaiting-review', async () => {
|
||||
const restore = setAiPolicy({
|
||||
tools: { runner_test_stage: 'propose' },
|
||||
defaultForAi: 'propose',
|
||||
});
|
||||
try {
|
||||
const m = await createMission({
|
||||
title: 'Test mission',
|
||||
conceptMarkdown: '',
|
||||
objective: 'test',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
const planStub: AiPlanOutput = {
|
||||
summary: 'Staged a test step',
|
||||
steps: [
|
||||
{
|
||||
summary: 'Do a thing',
|
||||
toolName: 'runner_test_stage',
|
||||
params: { val: 'hello' },
|
||||
rationale: 'because test',
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = await runMission(m.id, {
|
||||
plan: async (_input: AiPlanInput) => planStub,
|
||||
});
|
||||
|
||||
expect(result.plannedSteps).toBe(1);
|
||||
expect(result.stagedSteps).toBe(1);
|
||||
expect(result.iteration.overallStatus).toBe('awaiting-review');
|
||||
|
||||
const after = await getMission(m.id);
|
||||
expect(after?.iterations).toHaveLength(1);
|
||||
expect(after?.iterations[0].plan[0].proposalId).toBeTruthy();
|
||||
expect(after?.iterations[0].plan[0].status).toBe('staged');
|
||||
|
||||
// Tool did NOT execute — proposal was staged
|
||||
expect(executed).toHaveLength(0);
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
|
||||
it('passes the built AiPlanInput to the planner with mission + tool allowlist', async () => {
|
||||
const restore = setAiPolicy({
|
||||
tools: { runner_test_stage: 'propose' },
|
||||
defaultForAi: 'deny',
|
||||
});
|
||||
try {
|
||||
const m = await createMission({
|
||||
title: 'Test',
|
||||
conceptMarkdown: '',
|
||||
objective: 'test',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
let captured: AiPlanInput | null = null;
|
||||
await runMission(m.id, {
|
||||
plan: async (input) => {
|
||||
captured = input;
|
||||
return { summary: '', steps: [] };
|
||||
},
|
||||
});
|
||||
expect(captured).toBeTruthy();
|
||||
expect(captured!.mission.id).toBe(m.id);
|
||||
const allowedNames = captured!.availableTools.map((t) => t.name);
|
||||
expect(allowedNames).toContain('runner_test_stage');
|
||||
} finally {
|
||||
restore();
|
||||
}
|
||||
});
|
||||
|
||||
it('marks an iteration failed when the planner throws', async () => {
|
||||
const m = await createMission({
|
||||
title: 'x',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
const result = await runMission(m.id, {
|
||||
plan: async () => {
|
||||
throw new Error('planner down');
|
||||
},
|
||||
});
|
||||
expect(result.iteration.overallStatus).toBe('failed');
|
||||
const after = await getMission(m.id);
|
||||
expect(after?.iterations[0].overallStatus).toBe('failed');
|
||||
expect(after?.iterations[0].summary).toContain('planner down');
|
||||
});
|
||||
|
||||
it('produces an approved iteration when planner returns zero steps', async () => {
|
||||
const m = await createMission({
|
||||
title: 'x',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
const result = await runMission(m.id, {
|
||||
plan: async () => ({ summary: 'nothing needed', steps: [] }),
|
||||
});
|
||||
expect(result.iteration.overallStatus).toBe('approved');
|
||||
});
|
||||
|
||||
it('refuses to run a paused mission', async () => {
|
||||
const m = await createMission({
|
||||
title: 'x',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
cadence: { kind: 'manual' },
|
||||
});
|
||||
await pauseMission(m.id);
|
||||
await expect(
|
||||
runMission(m.id, { plan: async () => ({ summary: '', steps: [] }) })
|
||||
).rejects.toThrow(/paused/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('runDueMissions', () => {
|
||||
it('runs only active missions whose nextRunAt has passed', async () => {
|
||||
const a = await createMission({
|
||||
title: 'due',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
cadence: { kind: 'interval', everyMinutes: 5 },
|
||||
});
|
||||
const b = await createMission({
|
||||
title: 'future',
|
||||
conceptMarkdown: '',
|
||||
objective: 'x',
|
||||
cadence: { kind: 'interval', everyMinutes: 5 },
|
||||
});
|
||||
// Force `a` into the past, leave `b` in the future
|
||||
await db.table(MISSIONS_TABLE).update(a.id, { nextRunAt: '2020-01-01T00:00:00.000Z' });
|
||||
|
||||
const runs: string[] = [];
|
||||
await runDueMissions(new Date(), {
|
||||
plan: async (input) => {
|
||||
runs.push(input.mission.id);
|
||||
return { summary: '', steps: [] };
|
||||
},
|
||||
});
|
||||
expect(runs).toEqual([a.id]);
|
||||
expect(runs).not.toContain(b.id);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveMissionInputs', () => {
|
||||
it('resolves via registered resolvers and skips missing modules', async () => {
|
||||
registerInputResolver('testmod', async (ref) => ({
|
||||
id: ref.id,
|
||||
module: 'testmod',
|
||||
table: ref.table,
|
||||
title: 'T',
|
||||
content: `content for ${ref.id}`,
|
||||
}));
|
||||
try {
|
||||
const refs = [
|
||||
{ module: 'testmod', table: 't', id: 'a' },
|
||||
{ module: 'nope', table: 't', id: 'b' },
|
||||
];
|
||||
const resolved = await resolveMissionInputs(refs);
|
||||
expect(resolved).toHaveLength(1);
|
||||
expect(resolved[0].content).toContain('a');
|
||||
} finally {
|
||||
unregisterInputResolver('testmod');
|
||||
}
|
||||
});
|
||||
|
||||
it('returns empty array when nothing is registered', async () => {
|
||||
const r = await resolveMissionInputs([{ module: 'unknown', table: 't', id: 'x' }]);
|
||||
expect(r).toEqual([]);
|
||||
});
|
||||
});
|
||||
203
apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
Normal file
203
apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
/**
|
||||
* MissionRunner — executes one iteration of a Mission.
|
||||
*
|
||||
* load mission
|
||||
* ↓
|
||||
* resolve inputs via registered resolvers
|
||||
* ↓
|
||||
* build available-tool list (policy-filtered)
|
||||
* ↓
|
||||
* call planner (LLM) → AiPlanOutput
|
||||
* ↓
|
||||
* for each step: stage a Proposal under the AI actor
|
||||
* ↓
|
||||
* finishIteration(summary, overallStatus, plan-with-proposal-ids)
|
||||
*
|
||||
* Planner + proposal-staging are injected so the Runner is unit-testable
|
||||
* without a live LLM or Dexie hooks. Default implementations call the
|
||||
* shared LlmOrchestrator / `executeTool(...)` respectively; production
|
||||
* code passes those in via the setup module.
|
||||
*/
|
||||
|
||||
import { getMission, startIteration, finishIteration } from './store';
|
||||
import { resolveMissionInputs } from './input-resolvers';
|
||||
import { getAvailableToolsForAi } from './available-tools';
|
||||
import { executeTool } from '../../tools/executor';
|
||||
import type { Actor } from '../../events/actor';
|
||||
import type { Mission, MissionIteration, PlanStep } from './types';
|
||||
import type { AiPlanInput, AiPlanOutput, PlannedStep } from './planner/types';
|
||||
|
||||
export interface MissionRunnerDeps {
|
||||
/** Invoke the Planner LLM task with the fully-built input. */
|
||||
plan: (input: AiPlanInput) => Promise<AiPlanOutput>;
|
||||
/** Stage a single planned step as a Proposal. Returns the proposal id on success. */
|
||||
stageStep?: (step: PlannedStep, aiActor: Extract<Actor, { kind: 'ai' }>) => Promise<StageOutcome>;
|
||||
}
|
||||
|
||||
export type StageOutcome =
|
||||
| { readonly ok: true; readonly proposalId: string }
|
||||
| { readonly ok: false; readonly error: string };
|
||||
|
||||
/** Default step-staging implementation: policy-gated executor under AI actor. */
|
||||
export const defaultStageStep: Required<MissionRunnerDeps>['stageStep'] = async (step, aiActor) => {
|
||||
const stepActor: Extract<Actor, { kind: 'ai' }> = {
|
||||
...aiActor,
|
||||
// Per-step rationale wins over the mission-wide one so the review UI
|
||||
// shows *this step's* reasoning.
|
||||
rationale: step.rationale || aiActor.rationale,
|
||||
};
|
||||
const result = await executeTool(step.toolName, step.params, stepActor);
|
||||
if (!result.success) {
|
||||
return { ok: false, error: result.message };
|
||||
}
|
||||
const data = result.data as { proposalId?: string } | undefined;
|
||||
if (data?.proposalId) return { ok: true, proposalId: data.proposalId };
|
||||
// Policy resolved to 'auto' — no proposal row was created, the tool
|
||||
// ran directly. Treat as ok but without a proposal id to thread back.
|
||||
return { ok: true, proposalId: '' };
|
||||
};
|
||||
|
||||
export interface RunMissionResult {
|
||||
readonly iteration: MissionIteration;
|
||||
readonly plannedSteps: number;
|
||||
readonly stagedSteps: number;
|
||||
readonly failedSteps: number;
|
||||
}
|
||||
|
||||
/** Run one iteration of the given mission. */
|
||||
export async function runMission(
|
||||
missionId: string,
|
||||
deps: MissionRunnerDeps
|
||||
): Promise<RunMissionResult> {
|
||||
const mission = await getMission(missionId);
|
||||
if (!mission) throw new Error(`Mission not found: ${missionId}`);
|
||||
if (mission.state !== 'active') {
|
||||
throw new Error(`Mission ${missionId} is ${mission.state}, cannot run`);
|
||||
}
|
||||
|
||||
// Start the iteration with an empty plan so it's visible in the UI as "running".
|
||||
// Use the id the store generates so finishIteration updates the same row.
|
||||
const startedIteration = await startIteration(mission.id, { plan: [] });
|
||||
const iterationId = startedIteration.id;
|
||||
const aiActor: Extract<Actor, { kind: 'ai' }> = {
|
||||
kind: 'ai',
|
||||
missionId: mission.id,
|
||||
iterationId,
|
||||
rationale: mission.objective,
|
||||
};
|
||||
|
||||
// Gather context
|
||||
const resolvedInputs = await resolveMissionInputs(mission.inputs);
|
||||
const availableTools = getAvailableToolsForAi(aiActor);
|
||||
|
||||
// Ask the planner
|
||||
let plan: AiPlanOutput;
|
||||
try {
|
||||
plan = await deps.plan({ mission, resolvedInputs, availableTools });
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
await finishIteration(mission.id, iterationId, {
|
||||
summary: `Planner failed: ${msg}`,
|
||||
overallStatus: 'failed',
|
||||
});
|
||||
return emptyResult(mission, iterationId, 'failed', msg);
|
||||
}
|
||||
|
||||
// Stage each planned step as a Proposal (or auto-execute if policy says so).
|
||||
const stage = deps.stageStep ?? defaultStageStep;
|
||||
const recordedSteps: PlanStep[] = [];
|
||||
let stagedCount = 0;
|
||||
let failedCount = 0;
|
||||
|
||||
for (const [i, ps] of plan.steps.entries()) {
|
||||
const outcome = await stage(ps, aiActor);
|
||||
if (outcome.ok) {
|
||||
stagedCount++;
|
||||
recordedSteps.push({
|
||||
id: `${iterationId}-${i}`,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
proposalId: outcome.proposalId || undefined,
|
||||
status: outcome.proposalId ? 'staged' : 'approved',
|
||||
});
|
||||
} else {
|
||||
failedCount++;
|
||||
recordedSteps.push({
|
||||
id: `${iterationId}-${i}`,
|
||||
summary: ps.summary,
|
||||
intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params },
|
||||
status: 'failed',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const overallStatus: MissionIteration['overallStatus'] =
|
||||
plan.steps.length === 0
|
||||
? 'approved' // nothing to do is a valid outcome
|
||||
: failedCount === plan.steps.length
|
||||
? 'failed'
|
||||
: stagedCount > 0
|
||||
? 'awaiting-review'
|
||||
: 'approved';
|
||||
|
||||
await finishIteration(mission.id, iterationId, {
|
||||
summary: plan.summary,
|
||||
overallStatus,
|
||||
plan: recordedSteps,
|
||||
});
|
||||
|
||||
return {
|
||||
iteration: {
|
||||
id: iterationId,
|
||||
startedAt: new Date().toISOString(),
|
||||
plan: recordedSteps,
|
||||
summary: plan.summary,
|
||||
overallStatus,
|
||||
},
|
||||
plannedSteps: plan.steps.length,
|
||||
stagedSteps: stagedCount,
|
||||
failedSteps: failedCount,
|
||||
};
|
||||
}
|
||||
|
||||
function emptyResult(
|
||||
_mission: Mission,
|
||||
iterationId: string,
|
||||
status: MissionIteration['overallStatus'],
|
||||
summary: string
|
||||
): RunMissionResult {
|
||||
return {
|
||||
iteration: {
|
||||
id: iterationId,
|
||||
startedAt: new Date().toISOString(),
|
||||
plan: [],
|
||||
summary,
|
||||
overallStatus: status,
|
||||
},
|
||||
plannedSteps: 0,
|
||||
stagedSteps: 0,
|
||||
failedSteps: 0,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan all active missions whose `nextRunAt` has passed and run them once
|
||||
* each. Used by the foreground tick that wires this into `+layout.svelte`.
|
||||
* Safe to call concurrently — each mission run is independent.
|
||||
*/
|
||||
export async function runDueMissions(
|
||||
now: Date,
|
||||
deps: MissionRunnerDeps
|
||||
): Promise<RunMissionResult[]> {
|
||||
const { listMissions } = await import('./store');
|
||||
const due = await listMissions({ dueBefore: now.toISOString() });
|
||||
const results: RunMissionResult[] = [];
|
||||
for (const m of due) {
|
||||
try {
|
||||
results.push(await runMission(m.id, deps));
|
||||
} catch (err) {
|
||||
console.error(`[MissionRunner] mission ${m.id} run threw:`, err);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue