feat(ai): Mission Planner LLM task + prompt/parser

Turns a Mission (concept + objective + linked inputs + iteration history)
into a structured plan of tool-call proposals via the shared
LlmOrchestrator.

- `data/ai/missions/planner/types.ts` — AiPlanInput, AiPlanOutput,
  PlannedStep, ResolvedInput, AvailableTool
- `data/ai/missions/planner/prompt.ts` — pure builder producing system +
  user messages. System prompt enforces a strict fenced-JSON contract and
  lists available tools with parameter schema. User prompt injects the
  mission content, resolved input records, and the last 3 iterations
  (especially any userFeedback so the planner can course-correct).
- `data/ai/missions/planner/parser.ts` — strict parser with a
  discriminated ParseResult union. Rejects unknown tools, missing
  rationale, malformed shape. Tolerates missing optional fields.
- `llm-tasks/ai-plan.ts` — aiPlanTask LlmTask, minTier 'browser',
  contentClass 'personal'. On parse failure returns an empty plan with
  an explanatory summary rather than throwing, so the Runner can record
  a failed iteration without killing the queue.

No Runner yet — the planner is pure (input in, plan out). Runner (next
commit) will resolve inputs from modules, invoke the task, stage each
PlannedStep as a Proposal under the AI actor, and update the Mission
iteration.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-14 21:16:10 +02:00
parent d7bf8a2fd4
commit 72d5c708c4
6 changed files with 548 additions and 0 deletions

View file

@ -0,0 +1,92 @@
import { describe, it, expect } from 'vitest';
import { parsePlannerResponse } from './parser';
const TOOLS = new Set(['create_task', 'log_drink']);
describe('parsePlannerResponse', () => {
it('parses a valid fenced json block', () => {
const text = `\`\`\`json
{
"summary": "Plan für heute",
"steps": [
{ "summary": "Task anlegen", "toolName": "create_task", "params": { "title": "Foo" }, "rationale": "weil wichtig" }
]
}
\`\`\``;
const r = parsePlannerResponse(text, TOOLS);
expect(r.ok).toBe(true);
if (!r.ok) return;
expect(r.value.summary).toBe('Plan für heute');
expect(r.value.steps).toHaveLength(1);
expect(r.value.steps[0].toolName).toBe('create_task');
expect(r.value.steps[0].params).toEqual({ title: 'Foo' });
});
it('accepts a bare JSON object without fence', () => {
const text = `{ "summary": "x", "steps": [
{ "summary": "log", "toolName": "log_drink", "params": {}, "rationale": "Routine" }
]}`;
const r = parsePlannerResponse(text, TOOLS);
expect(r.ok).toBe(true);
});
it('rejects when no JSON block found', () => {
const r = parsePlannerResponse('just prose no JSON here', TOOLS);
expect(r.ok).toBe(false);
});
it('rejects invalid JSON inside the fence', () => {
const r = parsePlannerResponse('```json\n{not: valid}\n```', TOOLS);
expect(r.ok).toBe(false);
});
it('rejects when steps is missing or not an array', () => {
const r = parsePlannerResponse('```json\n{"summary":"x"}\n```', TOOLS);
expect(r.ok).toBe(false);
});
it('rejects steps referencing unknown tools', () => {
const text = `\`\`\`json
{ "summary": "", "steps": [{ "toolName": "nuke_database", "params": {}, "rationale": "why not" }] }
\`\`\``;
const r = parsePlannerResponse(text, TOOLS);
expect(r.ok).toBe(false);
if (r.ok) return;
expect(r.reason).toContain('nuke_database');
});
it('rejects steps missing rationale', () => {
const text = `\`\`\`json
{ "summary": "", "steps": [{ "toolName": "create_task", "params": { "title": "x" } }] }
\`\`\``;
const r = parsePlannerResponse(text, TOOLS);
expect(r.ok).toBe(false);
if (r.ok) return;
expect(r.reason).toContain('rationale');
});
it('tolerates missing summary / step summary by defaulting to empty', () => {
const text = `\`\`\`json
{
"steps": [
{ "toolName": "create_task", "params": {}, "rationale": "need one" }
]
}
\`\`\``;
const r = parsePlannerResponse(text, TOOLS);
expect(r.ok).toBe(true);
if (!r.ok) return;
expect(r.value.summary).toBe('');
expect(r.value.steps[0].summary).toBe('');
});
it('accepts an empty steps array (no-op iteration)', () => {
const text = `\`\`\`json
{ "summary": "nothing to do today", "steps": [] }
\`\`\``;
const r = parsePlannerResponse(text, TOOLS);
expect(r.ok).toBe(true);
if (!r.ok) return;
expect(r.value.steps).toHaveLength(0);
});
});

View file

@ -0,0 +1,99 @@
/**
* Parser for Planner LLM output.
*
* Strict: we only accept the fenced `json` block the system prompt
* prescribes, validate shape, and surface errors so the Runner can
* record them on the iteration instead of silently producing a bad plan.
*/
import type { AiPlanOutput, PlannedStep } from './types';
export type ParseResult =
| { readonly ok: true; readonly value: AiPlanOutput }
| { readonly ok: false; readonly reason: string; readonly raw?: string };
/** Extract and validate the Planner JSON from the LLM's raw response. */
export function parsePlannerResponse(text: string, knownToolNames: Set<string>): ParseResult {
const block = extractJsonBlock(text);
if (!block) return { ok: false, reason: 'no JSON block found', raw: text };
let parsed: unknown;
try {
parsed = JSON.parse(block);
} catch (err) {
return {
ok: false,
reason: `JSON parse failed: ${err instanceof Error ? err.message : String(err)}`,
raw: block,
};
}
if (typeof parsed !== 'object' || parsed === null) {
return { ok: false, reason: 'top-level value is not an object', raw: block };
}
const obj = parsed as Record<string, unknown>;
const summary = typeof obj.summary === 'string' ? obj.summary : '';
const rawSteps = obj.steps;
if (!Array.isArray(rawSteps)) {
return { ok: false, reason: '`steps` must be an array', raw: block };
}
const steps: PlannedStep[] = [];
for (let i = 0; i < rawSteps.length; i++) {
const step = rawSteps[i];
const validation = validateStep(step, knownToolNames, i);
if (!validation.ok) {
return { ok: false, reason: validation.reason, raw: block };
}
steps.push(validation.value);
}
return { ok: true, value: { summary, steps } };
}
function extractJsonBlock(text: string): string | null {
// Prefer a ```json fenced block
const fenced = /```(?:json)?\s*\n?([\s\S]*?)\n?```/;
const m = text.match(fenced);
if (m) return m[1].trim();
// Fallback: the whole response might BE a JSON object
const trimmed = text.trim();
if (trimmed.startsWith('{') && trimmed.endsWith('}')) return trimmed;
return null;
}
function validateStep(
raw: unknown,
knownToolNames: Set<string>,
index: number
): { ok: true; value: PlannedStep } | { ok: false; reason: string } {
if (typeof raw !== 'object' || raw === null) {
return { ok: false, reason: `step[${index}] is not an object` };
}
const obj = raw as Record<string, unknown>;
const toolName = obj.toolName;
if (typeof toolName !== 'string' || toolName.length === 0) {
return { ok: false, reason: `step[${index}].toolName missing or not a string` };
}
if (!knownToolNames.has(toolName)) {
return {
ok: false,
reason: `step[${index}].toolName "${toolName}" is not in the allowed tool set`,
};
}
const summary = typeof obj.summary === 'string' ? obj.summary : '';
const rationale = typeof obj.rationale === 'string' ? obj.rationale : '';
if (rationale.length === 0) {
return { ok: false, reason: `step[${index}].rationale is required (user will see this)` };
}
const params =
typeof obj.params === 'object' && obj.params !== null
? (obj.params as Record<string, unknown>)
: {};
return {
ok: true,
value: { summary, toolName, rationale, params },
};
}

View file

@ -0,0 +1,127 @@
import { describe, it, expect } from 'vitest';
import { buildPlannerPrompt } from './prompt';
import type { AiPlanInput } from './types';
import type { Mission } from '../types';
function baseMission(overrides: Partial<Mission> = {}): Mission {
return {
id: 'm-1',
createdAt: '2026-04-14T10:00:00.000Z',
updatedAt: '2026-04-14T10:00:00.000Z',
title: 'Weekly review',
conceptMarkdown: '# Concept\nDo a thing.',
objective: 'Review progress each Monday',
inputs: [],
cadence: { kind: 'weekly', dayOfWeek: 1, atHour: 9 },
state: 'active',
iterations: [],
...overrides,
};
}
describe('buildPlannerPrompt', () => {
it('emits system + user messages with mission title and objective', () => {
const input: AiPlanInput = {
mission: baseMission(),
resolvedInputs: [],
availableTools: [],
};
const { system, user } = buildPlannerPrompt(input);
expect(user).toContain('Weekly review');
expect(user).toContain('Review progress each Monday');
expect(system).toContain('JSON');
expect(system).toContain('rationale');
});
it('lists available tools with their params in the system prompt', () => {
const input: AiPlanInput = {
mission: baseMission(),
resolvedInputs: [],
availableTools: [
{
name: 'create_task',
module: 'todo',
description: 'Creates a task',
parameters: [
{ name: 'title', type: 'string', required: true, description: 'Task title' },
{
name: 'priority',
type: 'string',
required: false,
description: 'prio',
enum: ['low', 'high'],
},
],
},
],
};
const { system } = buildPlannerPrompt(input);
expect(system).toContain('create_task');
expect(system).toContain('title');
expect(system).toContain('(required)');
expect(system).toContain('[low|high]');
});
it('injects resolved input content into the user prompt', () => {
const input: AiPlanInput = {
mission: baseMission({
inputs: [{ module: 'notes', table: 'notes', id: 'n-1' }],
}),
resolvedInputs: [
{ id: 'n-1', module: 'notes', table: 'notes', title: 'Strategy', content: 'Be bold.' },
],
availableTools: [],
};
const { user } = buildPlannerPrompt(input);
expect(user).toContain('Strategy');
expect(user).toContain('Be bold.');
});
it('includes user feedback from the most recent iteration', () => {
const input: AiPlanInput = {
mission: baseMission({
iterations: [
{
id: 'it-1',
startedAt: '2026-04-07T09:00:00.000Z',
finishedAt: '2026-04-07T09:01:00.000Z',
plan: [
{
id: 's-1',
summary: 'Old step',
intent: { kind: 'toolCall', toolName: 'create_task', params: {} },
status: 'rejected',
},
],
userFeedback: 'Zu aggressiv — bitte zurücknehmen',
overallStatus: 'rejected',
},
],
}),
resolvedInputs: [],
availableTools: [],
};
const { user } = buildPlannerPrompt(input);
expect(user).toContain('Zu aggressiv');
expect(user).toContain('[rejected]');
});
it('truncates iteration history to the last 3', () => {
const many = Array.from({ length: 10 }, (_, i) => ({
id: `it-${i}`,
startedAt: `2026-04-${String(i + 1).padStart(2, '0')}T10:00:00.000Z`,
plan: [],
overallStatus: 'approved' as const,
userFeedback: `feedback-${i}`,
}));
const { user } = buildPlannerPrompt({
mission: baseMission({ iterations: many }),
resolvedInputs: [],
availableTools: [],
});
// Only the last three iterations (7, 8, 9) should be present
expect(user).toContain('feedback-9');
expect(user).toContain('feedback-7');
expect(user).not.toContain('feedback-5');
});
});

View file

@ -0,0 +1,112 @@
/**
* Prompt builder for the Mission Planner.
*
* Produces a system + user message pair the LLM backend can consume. The
* grammar we ask the model to return is deliberately small (JSON in a
* fenced block, one shape) the parser is strict, and we'd rather the
* LLM mess up in a detectable way than produce half-valid output.
*
* Keeps everything the Planner sees inside the prompt, nothing ambient
* makes the task reproducible and unit-testable without a live LLM.
*/
import type { AiPlanInput } from './types';
export interface PlannerMessages {
readonly system: string;
readonly user: string;
}
export function buildPlannerPrompt(input: AiPlanInput): PlannerMessages {
return {
system: buildSystemPrompt(input),
user: buildUserPrompt(input),
};
}
function buildSystemPrompt(input: AiPlanInput): string {
const toolBlock = input.availableTools
.map((t) => {
const params = t.parameters
.map((p) => {
const req = p.required ? ' (required)' : '';
const enumeration = p.enum ? ` [${p.enum.join('|')}]` : '';
return ` - ${p.name}: ${p.type}${enumeration}${req}${p.description}`;
})
.join('\n');
return `${t.name} (${t.module}) — ${t.description}\n${params || ' (no parameters)'}`;
})
.join('\n');
return `Du bist eine KI, die im Auftrag des Nutzers an einer langlebigen Mission arbeitet.
Dein Job: aus dem aktuellen Mission-Kontext einen kurzen, konkreten Plan ableiten 1 bis 5 Schritte, jeder ein Tool-Aufruf auf Nutzerdaten. Jeder Schritt MUSS eine Begründung haben (rationale), die der Nutzer in der Review-UI sieht.
Wichtige Regeln:
1. Nutze NUR Tools aus der Liste unten. Unbekannte Tools Plan invalide.
2. Jeder Step wird als Proposal gestaged der Nutzer approved oder rejected. Du schreibst nie direkt.
3. Berücksichtige das Feedback aus vorherigen Iterationen (unten im User-Prompt). Wenn ein Vorschlag rejected wurde, wiederhole ihn nicht ohne Änderung.
4. Antworte AUSSCHLIESSLICH mit einem JSON-Block in folgendem Format, keine Prosa davor/danach:
\`\`\`json
{
"summary": "Ein Satz was du in dieser Iteration tust.",
"steps": [
{
"summary": "Kurzer Schritt-Titel",
"toolName": "create_task",
"params": { "title": "…" },
"rationale": "Warum genau jetzt, auf Basis welchen Inputs."
}
]
}
\`\`\`
Verfügbare Tools:
${toolBlock || ' (keine Tools verfügbar — gib leeren steps zurück)'}`;
}
function buildUserPrompt(input: AiPlanInput): string {
const { mission, resolvedInputs } = input;
const inputsBlock =
resolvedInputs.length === 0
? '_(keine verlinkten Inputs)_'
: resolvedInputs
.map((r) => {
const header = `### ${r.module}/${r.table}: ${r.title ?? r.id}`;
return `${header}\n${r.content}`;
})
.join('\n\n');
const iterationHistory =
mission.iterations.length === 0
? '_(erste Iteration)_'
: mission.iterations
.slice(-3) // only the last 3 to keep the prompt tight
.map((it) => {
const steps = it.plan.map((s) => ` - [${s.status}] ${s.summary}`).join('\n');
const feedback = it.userFeedback ? `\n Nutzer-Feedback: ${it.userFeedback}` : '';
const summary = it.summary ? `\n Summary: ${it.summary}` : '';
return `**${it.startedAt}** (${it.overallStatus}):${summary}\n${steps}${feedback}`;
})
.join('\n\n');
return `# Mission: ${mission.title}
## Konzept
${mission.conceptMarkdown || '_(leer)_'}
## Konkretes Ziel
${mission.objective}
## Verlinkte Inputs
${inputsBlock}
## Letzte Iterationen (max. 3)
${iterationHistory}
---
Erzeuge jetzt einen Plan für die nächste Iteration.`;
}

View file

@ -0,0 +1,56 @@
/**
* Planner contract shared input / output shape used by prompt builder,
* parser, and the `aiPlanTask` LLM wrapper.
*/
import type { Mission } from '../types';
/** Resolved content for one {@link MissionInputRef}. */
export interface ResolvedInput {
readonly id: string;
readonly module: string;
readonly table: string;
readonly title?: string;
readonly content: string;
}
/** Tool definition the Planner is allowed to propose. */
export interface AvailableTool {
readonly name: string;
readonly module: string;
readonly description: string;
readonly parameters: ReadonlyArray<{
readonly name: string;
readonly type: string;
readonly required: boolean;
readonly description: string;
readonly enum?: readonly string[];
}>;
}
export interface AiPlanInput {
readonly mission: Mission;
/** Content of every MissionInputRef, pre-fetched by the caller (Runner). */
readonly resolvedInputs: readonly ResolvedInput[];
/** Tools the policy has whitelisted for AI proposals in this run. */
readonly availableTools: readonly AvailableTool[];
}
/**
* One step in the Planner's output. The Runner turns each step into a
* Proposal by calling `executeTool(toolName, params, aiActor)` the
* policy routes it to `propose`, and the Proposal carries the rationale.
*/
export interface PlannedStep {
readonly summary: string;
readonly toolName: string;
readonly params: Record<string, unknown>;
readonly rationale: string;
}
export interface AiPlanOutput {
/** 1N steps the AI wants to take this iteration. May be empty (no-op run). */
readonly steps: readonly PlannedStep[];
/** The AI's one-line summary of the plan, stored on `MissionIteration.summary`. */
readonly summary: string;
}

View file

@ -0,0 +1,62 @@
/**
* aiPlanTask LLM task for the Mission Planner.
*
* Takes a Mission plus pre-resolved inputs + available tools, asks the
* configured LLM backend for a structured plan, parses it, and returns
* typed steps the Runner turns into Proposals.
*
* Routing:
* - `minTier: 'browser'` the Planner runs entirely on the device by
* default. Users can override to mana-server / cloud in settings for
* more capable reasoning on long missions.
* - `contentClass: 'personal'` the prompt contains the user's notes and
* goals. If any linked input is from a strictly-sensitive module
* (journal, dreams, finance), the Runner is responsible for narrowing
* to `'sensitive'` on the request so cloud is refused.
*
* Error path: the parser returns a structured `ParseResult`. If parsing
* fails, the task still returns with `steps: []` and a summary
* explaining why so the Runner can record a failed iteration without
* throwing through the whole mission queue.
*/
import type { LlmBackend, LlmTask } from '@mana/shared-llm';
import { buildPlannerPrompt } from '$lib/data/ai/missions/planner/prompt';
import { parsePlannerResponse } from '$lib/data/ai/missions/planner/parser';
import type { AiPlanInput, AiPlanOutput } from '$lib/data/ai/missions/planner/types';
export type { AiPlanInput, AiPlanOutput } from '$lib/data/ai/missions/planner/types';
export const aiPlanTask: LlmTask<AiPlanInput, AiPlanOutput> = {
name: 'ai.plan',
minTier: 'browser',
contentClass: 'personal',
requires: { streaming: false },
displayLabel: 'AI Mission Planner',
async runLlm(input: AiPlanInput, backend: LlmBackend): Promise<AiPlanOutput> {
const { system, user } = buildPlannerPrompt(input);
const result = await backend.generate({
taskName: 'ai.plan',
contentClass: 'personal',
messages: [
{ role: 'system', content: system },
{ role: 'user', content: user },
],
temperature: 0.3,
maxTokens: 1024,
});
const knownToolNames = new Set(input.availableTools.map((t) => t.name));
const parsed = parsePlannerResponse(result.content, knownToolNames);
if (!parsed.ok) {
return {
steps: [],
summary: `Plan konnte nicht erzeugt werden: ${parsed.reason}`,
};
}
return parsed.value;
},
};