diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/available-tools.ts b/apps/mana/apps/web/src/lib/data/ai/missions/available-tools.ts new file mode 100644 index 000000000..7fd23aa12 --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/missions/available-tools.ts @@ -0,0 +1,30 @@ +/** + * Build the tool list the Planner is allowed to consider. + * + * Only tools the policy rates `auto` or `propose` are exposed — `deny` is + * invisible to the AI. This is defence-in-depth: even if the Planner + * hallucinates a denied tool name, the parser rejects it because the name + * isn't in the allow-set, AND the executor would refuse at runtime. + */ + +import { getTools } from '../../tools/registry'; +import { resolvePolicy } from '../policy'; +import type { Actor } from '../../events/actor'; +import type { AvailableTool } from './planner/types'; + +export function getAvailableToolsForAi(aiActor: Extract): AvailableTool[] { + return getTools() + .filter((tool) => resolvePolicy(tool.name, aiActor) !== 'deny') + .map((tool) => ({ + name: tool.name, + module: tool.module, + description: tool.description, + parameters: tool.parameters.map((p) => ({ + name: p.name, + type: p.type, + required: p.required, + description: p.description, + enum: p.enum, + })), + })); +} diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/input-resolvers.ts b/apps/mana/apps/web/src/lib/data/ai/missions/input-resolvers.ts new file mode 100644 index 000000000..aaf4779bf --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/missions/input-resolvers.ts @@ -0,0 +1,62 @@ +/** + * Input-resolver registry. + * + * A Mission references records in other modules via {@link MissionInputRef}. + * The Runner needs to fetch those records and project them to plain text + * so the Planner can drop them into its prompt. Each module registers a + * resolver once on app init (`registerInputResolver`) — the Runner then + * looks up by module name and falls back to a no-op with a console warning + * if nothing's registered (keeps Missions robust against module removal). + * + * Why a registry instead of a central switch statement: cross-module + * imports here would couple `data/ai/` to every product module. The + * registry pattern lets each module own its own projection in its own + * module-init file. + */ + +import type { MissionInputRef } from './types'; +import type { ResolvedInput } from './planner/types'; + +export type InputResolver = (ref: MissionInputRef) => Promise; + +const resolvers = new Map(); + +/** Register a resolver for a module. Idempotent — last registration wins. */ +export function registerInputResolver(moduleName: string, resolver: InputResolver): void { + resolvers.set(moduleName, resolver); +} + +/** Remove a resolver (test helper). */ +export function unregisterInputResolver(moduleName: string): void { + resolvers.delete(moduleName); +} + +export function getInputResolver(moduleName: string): InputResolver | undefined { + return resolvers.get(moduleName); +} + +/** + * Resolve every ref a Mission declares, in parallel. Refs whose module has + * no resolver registered, or whose resolver returns null, are dropped with + * a warning — the Planner just sees fewer inputs, never crashes the run. + */ +export async function resolveMissionInputs( + refs: readonly MissionInputRef[] +): Promise { + const results = await Promise.all( + refs.map(async (ref) => { + const resolver = resolvers.get(ref.module); + if (!resolver) { + console.warn(`[MissionRunner] no input resolver registered for module "${ref.module}"`); + return null; + } + try { + return await resolver(ref); + } catch (err) { + console.error(`[MissionRunner] input resolver for ${ref.module} threw on ${ref.id}:`, err); + return null; + } + }) + ); + return results.filter((r): r is ResolvedInput => r !== null); +} diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts new file mode 100644 index 000000000..2fbdfe5ae --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts @@ -0,0 +1,216 @@ +import 'fake-indexeddb/auto'; +import { describe, it, expect, beforeEach, vi } from 'vitest'; + +vi.mock('$lib/stores/funnel-tracking', () => ({ trackFirstContent: vi.fn() })); +vi.mock('$lib/triggers/registry', () => ({ fire: vi.fn() })); +vi.mock('$lib/triggers/inline-suggest', () => ({ + checkInlineSuggestion: vi.fn().mockResolvedValue(null), +})); + +import { db } from '../../database'; +import { registerTools } from '../../tools/registry'; +import { setAiPolicy } from '../policy'; +import { createMission, getMission, pauseMission } from './store'; +import { runMission, runDueMissions } from './runner'; +import { + registerInputResolver, + unregisterInputResolver, + resolveMissionInputs, +} from './input-resolvers'; +import { MISSIONS_TABLE } from './types'; +import type { AiPlanInput, AiPlanOutput } from './planner/types'; + +let executed: { name: string; params: Record }[] = []; + +registerTools([ + { + name: 'runner_test_stage', + module: 'runnerTest', + description: 'proposes', + parameters: [{ name: 'val', type: 'string', required: true, description: 'v' }], + async execute(params) { + executed.push({ name: 'runner_test_stage', params: { ...params } }); + return { success: true, message: 'ok' }; + }, + }, +]); + +beforeEach(async () => { + executed = []; + await db.table(MISSIONS_TABLE).clear(); + await db.table('pendingProposals').clear(); +}); + +describe('runMission', () => { + it('runs the planner, stages proposals, and marks the iteration awaiting-review', async () => { + const restore = setAiPolicy({ + tools: { runner_test_stage: 'propose' }, + defaultForAi: 'propose', + }); + try { + const m = await createMission({ + title: 'Test mission', + conceptMarkdown: '', + objective: 'test', + cadence: { kind: 'manual' }, + }); + const planStub: AiPlanOutput = { + summary: 'Staged a test step', + steps: [ + { + summary: 'Do a thing', + toolName: 'runner_test_stage', + params: { val: 'hello' }, + rationale: 'because test', + }, + ], + }; + const result = await runMission(m.id, { + plan: async (_input: AiPlanInput) => planStub, + }); + + expect(result.plannedSteps).toBe(1); + expect(result.stagedSteps).toBe(1); + expect(result.iteration.overallStatus).toBe('awaiting-review'); + + const after = await getMission(m.id); + expect(after?.iterations).toHaveLength(1); + expect(after?.iterations[0].plan[0].proposalId).toBeTruthy(); + expect(after?.iterations[0].plan[0].status).toBe('staged'); + + // Tool did NOT execute — proposal was staged + expect(executed).toHaveLength(0); + } finally { + restore(); + } + }); + + it('passes the built AiPlanInput to the planner with mission + tool allowlist', async () => { + const restore = setAiPolicy({ + tools: { runner_test_stage: 'propose' }, + defaultForAi: 'deny', + }); + try { + const m = await createMission({ + title: 'Test', + conceptMarkdown: '', + objective: 'test', + cadence: { kind: 'manual' }, + }); + let captured: AiPlanInput | null = null; + await runMission(m.id, { + plan: async (input) => { + captured = input; + return { summary: '', steps: [] }; + }, + }); + expect(captured).toBeTruthy(); + expect(captured!.mission.id).toBe(m.id); + const allowedNames = captured!.availableTools.map((t) => t.name); + expect(allowedNames).toContain('runner_test_stage'); + } finally { + restore(); + } + }); + + it('marks an iteration failed when the planner throws', async () => { + const m = await createMission({ + title: 'x', + conceptMarkdown: '', + objective: 'x', + cadence: { kind: 'manual' }, + }); + const result = await runMission(m.id, { + plan: async () => { + throw new Error('planner down'); + }, + }); + expect(result.iteration.overallStatus).toBe('failed'); + const after = await getMission(m.id); + expect(after?.iterations[0].overallStatus).toBe('failed'); + expect(after?.iterations[0].summary).toContain('planner down'); + }); + + it('produces an approved iteration when planner returns zero steps', async () => { + const m = await createMission({ + title: 'x', + conceptMarkdown: '', + objective: 'x', + cadence: { kind: 'manual' }, + }); + const result = await runMission(m.id, { + plan: async () => ({ summary: 'nothing needed', steps: [] }), + }); + expect(result.iteration.overallStatus).toBe('approved'); + }); + + it('refuses to run a paused mission', async () => { + const m = await createMission({ + title: 'x', + conceptMarkdown: '', + objective: 'x', + cadence: { kind: 'manual' }, + }); + await pauseMission(m.id); + await expect( + runMission(m.id, { plan: async () => ({ summary: '', steps: [] }) }) + ).rejects.toThrow(/paused/); + }); +}); + +describe('runDueMissions', () => { + it('runs only active missions whose nextRunAt has passed', async () => { + const a = await createMission({ + title: 'due', + conceptMarkdown: '', + objective: 'x', + cadence: { kind: 'interval', everyMinutes: 5 }, + }); + const b = await createMission({ + title: 'future', + conceptMarkdown: '', + objective: 'x', + cadence: { kind: 'interval', everyMinutes: 5 }, + }); + // Force `a` into the past, leave `b` in the future + await db.table(MISSIONS_TABLE).update(a.id, { nextRunAt: '2020-01-01T00:00:00.000Z' }); + + const runs: string[] = []; + await runDueMissions(new Date(), { + plan: async (input) => { + runs.push(input.mission.id); + return { summary: '', steps: [] }; + }, + }); + expect(runs).toEqual([a.id]); + expect(runs).not.toContain(b.id); + }); +}); + +describe('resolveMissionInputs', () => { + it('resolves via registered resolvers and skips missing modules', async () => { + registerInputResolver('testmod', async (ref) => ({ + id: ref.id, + module: 'testmod', + table: ref.table, + title: 'T', + content: `content for ${ref.id}`, + })); + try { + const refs = [ + { module: 'testmod', table: 't', id: 'a' }, + { module: 'nope', table: 't', id: 'b' }, + ]; + const resolved = await resolveMissionInputs(refs); + expect(resolved).toHaveLength(1); + expect(resolved[0].content).toContain('a'); + } finally { + unregisterInputResolver('testmod'); + } + }); + + it('returns empty array when nothing is registered', async () => { + const r = await resolveMissionInputs([{ module: 'unknown', table: 't', id: 'x' }]); + expect(r).toEqual([]); + }); +}); diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts new file mode 100644 index 000000000..c91f2c3f3 --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts @@ -0,0 +1,203 @@ +/** + * MissionRunner — executes one iteration of a Mission. + * + * load mission + * ↓ + * resolve inputs via registered resolvers + * ↓ + * build available-tool list (policy-filtered) + * ↓ + * call planner (LLM) → AiPlanOutput + * ↓ + * for each step: stage a Proposal under the AI actor + * ↓ + * finishIteration(summary, overallStatus, plan-with-proposal-ids) + * + * Planner + proposal-staging are injected so the Runner is unit-testable + * without a live LLM or Dexie hooks. Default implementations call the + * shared LlmOrchestrator / `executeTool(...)` respectively; production + * code passes those in via the setup module. + */ + +import { getMission, startIteration, finishIteration } from './store'; +import { resolveMissionInputs } from './input-resolvers'; +import { getAvailableToolsForAi } from './available-tools'; +import { executeTool } from '../../tools/executor'; +import type { Actor } from '../../events/actor'; +import type { Mission, MissionIteration, PlanStep } from './types'; +import type { AiPlanInput, AiPlanOutput, PlannedStep } from './planner/types'; + +export interface MissionRunnerDeps { + /** Invoke the Planner LLM task with the fully-built input. */ + plan: (input: AiPlanInput) => Promise; + /** Stage a single planned step as a Proposal. Returns the proposal id on success. */ + stageStep?: (step: PlannedStep, aiActor: Extract) => Promise; +} + +export type StageOutcome = + | { readonly ok: true; readonly proposalId: string } + | { readonly ok: false; readonly error: string }; + +/** Default step-staging implementation: policy-gated executor under AI actor. */ +export const defaultStageStep: Required['stageStep'] = async (step, aiActor) => { + const stepActor: Extract = { + ...aiActor, + // Per-step rationale wins over the mission-wide one so the review UI + // shows *this step's* reasoning. + rationale: step.rationale || aiActor.rationale, + }; + const result = await executeTool(step.toolName, step.params, stepActor); + if (!result.success) { + return { ok: false, error: result.message }; + } + const data = result.data as { proposalId?: string } | undefined; + if (data?.proposalId) return { ok: true, proposalId: data.proposalId }; + // Policy resolved to 'auto' — no proposal row was created, the tool + // ran directly. Treat as ok but without a proposal id to thread back. + return { ok: true, proposalId: '' }; +}; + +export interface RunMissionResult { + readonly iteration: MissionIteration; + readonly plannedSteps: number; + readonly stagedSteps: number; + readonly failedSteps: number; +} + +/** Run one iteration of the given mission. */ +export async function runMission( + missionId: string, + deps: MissionRunnerDeps +): Promise { + const mission = await getMission(missionId); + if (!mission) throw new Error(`Mission not found: ${missionId}`); + if (mission.state !== 'active') { + throw new Error(`Mission ${missionId} is ${mission.state}, cannot run`); + } + + // Start the iteration with an empty plan so it's visible in the UI as "running". + // Use the id the store generates so finishIteration updates the same row. + const startedIteration = await startIteration(mission.id, { plan: [] }); + const iterationId = startedIteration.id; + const aiActor: Extract = { + kind: 'ai', + missionId: mission.id, + iterationId, + rationale: mission.objective, + }; + + // Gather context + const resolvedInputs = await resolveMissionInputs(mission.inputs); + const availableTools = getAvailableToolsForAi(aiActor); + + // Ask the planner + let plan: AiPlanOutput; + try { + plan = await deps.plan({ mission, resolvedInputs, availableTools }); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + await finishIteration(mission.id, iterationId, { + summary: `Planner failed: ${msg}`, + overallStatus: 'failed', + }); + return emptyResult(mission, iterationId, 'failed', msg); + } + + // Stage each planned step as a Proposal (or auto-execute if policy says so). + const stage = deps.stageStep ?? defaultStageStep; + const recordedSteps: PlanStep[] = []; + let stagedCount = 0; + let failedCount = 0; + + for (const [i, ps] of plan.steps.entries()) { + const outcome = await stage(ps, aiActor); + if (outcome.ok) { + stagedCount++; + recordedSteps.push({ + id: `${iterationId}-${i}`, + summary: ps.summary, + intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params }, + proposalId: outcome.proposalId || undefined, + status: outcome.proposalId ? 'staged' : 'approved', + }); + } else { + failedCount++; + recordedSteps.push({ + id: `${iterationId}-${i}`, + summary: ps.summary, + intent: { kind: 'toolCall', toolName: ps.toolName, params: ps.params }, + status: 'failed', + }); + } + } + + const overallStatus: MissionIteration['overallStatus'] = + plan.steps.length === 0 + ? 'approved' // nothing to do is a valid outcome + : failedCount === plan.steps.length + ? 'failed' + : stagedCount > 0 + ? 'awaiting-review' + : 'approved'; + + await finishIteration(mission.id, iterationId, { + summary: plan.summary, + overallStatus, + plan: recordedSteps, + }); + + return { + iteration: { + id: iterationId, + startedAt: new Date().toISOString(), + plan: recordedSteps, + summary: plan.summary, + overallStatus, + }, + plannedSteps: plan.steps.length, + stagedSteps: stagedCount, + failedSteps: failedCount, + }; +} + +function emptyResult( + _mission: Mission, + iterationId: string, + status: MissionIteration['overallStatus'], + summary: string +): RunMissionResult { + return { + iteration: { + id: iterationId, + startedAt: new Date().toISOString(), + plan: [], + summary, + overallStatus: status, + }, + plannedSteps: 0, + stagedSteps: 0, + failedSteps: 0, + }; +} + +/** + * Scan all active missions whose `nextRunAt` has passed and run them once + * each. Used by the foreground tick that wires this into `+layout.svelte`. + * Safe to call concurrently — each mission run is independent. + */ +export async function runDueMissions( + now: Date, + deps: MissionRunnerDeps +): Promise { + const { listMissions } = await import('./store'); + const due = await listMissions({ dueBefore: now.toISOString() }); + const results: RunMissionResult[] = []; + for (const m of due) { + try { + results.push(await runMission(m.id, deps)); + } catch (err) { + console.error(`[MissionRunner] mission ${m.id} run threw:`, err); + } + } + return results; +}