feat(shared-ai): runPlannerLoop + compact system prompt for function calling

Introduces the new planner pipeline both the webapp runner and the mana-ai tick will swap onto in the next commits. Additive for now — the legacy buildPlannerPrompt + parsePlannerResponse stay exported so callers can migrate one at a time; they get removed once the last consumer is gone. - planner/loop.ts — runPlannerLoop orchestrates a multi-turn chat against a caller-supplied LlmClient. Tool-calls from the LLM are handed to an onToolCall callback and their results fed back as tool-messages. Parallel tool-calls in one turn execute sequentially to keep the message log linear for debugging. Stops on assistant stop, empty tool_calls, or a hard max-rounds ceiling (default 5). - planner/system-prompt.ts — new buildSystemPrompt. ~40-line German system frame, no tool listing (the SDK-level tools field carries the schemas now), no JSON format example, no "please return JSON" plea. User frame renders mission + linked inputs + last 3 iteration summaries, same as before. - Five test cases covering the loop: immediate stop, single tool call with result feedback, parallel calls execute in order, tool failures propagate as tool-messages the LLM can react to, and maxRounds ceiling fires with the right stopReason. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 14:26:42 +02:00 · 2026-04-20 15:31:01 +02:00 · 2026-04-20 15:31:01 +02:00 · 4daca8970b
commit 4daca8970b
parent 2cf89ce26a
5 changed files with 537 additions and 1 deletions
--- a/packages/shared-ai/src/planner/loop.ts
+++ b/packages/shared-ai/src/planner/loop.ts
@ -0,0 +1,179 @@
+/**
+ * Multi-turn tool-calling loop shared between the webapp runner and the
+ * server-side mana-ai tick. Replaces the text-JSON planner pipeline:
+ * we hand the LLM a tool catalog, it emits native tool_calls, we
+ * execute them and feed the results back as tool-messages until the
+ * LLM has nothing more to call (or we hit the round budget).
+ *
+ * Environment-specific concerns (HTTP transport, auth, actor
+ * attribution) live in the caller-provided ``LlmClient`` and
+ * ``onToolCall`` callback. The loop itself stays pure.
+ */
+
+import type { ToolSchema, ToolSpec } from '../tools/function-schema';
+import { toolsToFunctionSchemas } from '../tools/function-schema';
+
+// ─── Chat-message contract ──────────────────────────────────────────
+
+export interface ToolCallRequest {
+	readonly id: string;
+	readonly name: string;
+	readonly arguments: Record<string, unknown>;
+}
+
+export interface ToolResult {
+	readonly success: boolean;
+	readonly data?: unknown;
+	readonly message: string;
+}
+
+export type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
+
+export interface ChatMessage {
+	readonly role: ChatRole;
+	readonly content?: string | null;
+	readonly toolCalls?: readonly ToolCallRequest[];
+	readonly toolCallId?: string;
+}
+
+// ─── LLM client contract ────────────────────────────────────────────
+
+export interface LlmCompletionRequest {
+	readonly messages: readonly ChatMessage[];
+	readonly tools: readonly ToolSpec[];
+	readonly model: string;
+	readonly temperature?: number;
+}
+
+export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter';
+
+export interface LlmCompletionResponse {
+	readonly content: string | null;
+	readonly toolCalls: readonly ToolCallRequest[];
+	readonly finishReason: LlmFinishReason;
+}
+
+export interface LlmClient {
+	complete(req: LlmCompletionRequest): Promise<LlmCompletionResponse>;
+}
+
+// ─── Loop input / result ────────────────────────────────────────────
+
+export interface PlannerLoopInput {
+	readonly systemPrompt: string;
+	readonly userPrompt: string;
+	readonly tools: readonly ToolSchema[];
+	readonly model: string;
+	readonly temperature?: number;
+	/** Hard ceiling on planner rounds. Each round = one LLM call plus
+	 *  whatever tool executions its output triggered. Defaults to 5. */
+	readonly maxRounds?: number;
+}
+
+export interface ExecutedCall {
+	readonly round: number;
+	readonly call: ToolCallRequest;
+	readonly result: ToolResult;
+}
+
+export type LoopStopReason = 'assistant-stop' | 'max-rounds' | 'no-tool-calls' | 'llm-error';
+
+export interface PlannerLoopResult {
+	readonly rounds: number;
+	readonly executedCalls: readonly ExecutedCall[];
+	/** Final assistant text when the LLM stopped instead of calling a
+	 *  tool. ``null`` when the last turn was a tool-call burst that we
+	 *  cut off via round budget. */
+	readonly summary: string | null;
+	readonly stopReason: LoopStopReason;
+	/** Complete chat history for debug-log capture (system + user +
+	 *  every assistant/tool turn). Never synced — contains decrypted
+	 *  user content. */
+	readonly messages: readonly ChatMessage[];
+}
+
+// ─── The loop ───────────────────────────────────────────────────────
+
+const DEFAULT_MAX_ROUNDS = 5;
+
+export async function runPlannerLoop(opts: {
+	readonly llm: LlmClient;
+	readonly input: PlannerLoopInput;
+	/** Execute a tool call and return the result that should be fed back
+	 *  to the LLM as a tool-message. Must not throw — convert errors to
+	 *  ``{ success: false, message }``. The loop injects the result
+	 *  verbatim so the LLM can reason over failures (e.g. "vault locked
+	 *  → ask user to unlock"). */
+	readonly onToolCall: (call: ToolCallRequest) => Promise<ToolResult>;
+}): Promise<PlannerLoopResult> {
+	const { llm, input, onToolCall } = opts;
+	const maxRounds = input.maxRounds ?? DEFAULT_MAX_ROUNDS;
+	const toolSpecs = toolsToFunctionSchemas(input.tools);
+
+	const messages: ChatMessage[] = [
+		{ role: 'system', content: input.systemPrompt },
+		{ role: 'user', content: input.userPrompt },
+	];
+	const executedCalls: ExecutedCall[] = [];
+	let summary: string | null = null;
+	let stopReason: LoopStopReason = 'max-rounds';
+	let rounds = 0;
+
+	while (rounds < maxRounds) {
+		rounds++;
+		const response = await llm.complete({
+			messages,
+			tools: toolSpecs,
+			model: input.model,
+			temperature: input.temperature,
+		});
+
+		// Append the assistant turn to history before we execute any
+		// tools — the LLM needs to see its own prior tool_calls alongside
+		// the tool-message results in the next turn.
+		messages.push({
+			role: 'assistant',
+			content: response.content,
+			toolCalls: response.toolCalls.length > 0 ? response.toolCalls : undefined,
+		});
+
+		if (response.toolCalls.length === 0) {
+			summary = response.content;
+			stopReason = response.finishReason === 'stop' ? 'assistant-stop' : 'no-tool-calls';
+			break;
+		}
+
+		// Execute each tool_call sequentially. Parallel execution is a
+		// perfectly valid optimisation for pure-read tools but we keep
+		// order here so the message log tells a linear story when the
+		// user debugs a failure.
+		for (const call of response.toolCalls) {
+			const result = await onToolCall(call);
+			executedCalls.push({ round: rounds, call, result });
+			messages.push({
+				role: 'tool',
+				toolCallId: call.id,
+				content: JSON.stringify({
+					success: result.success,
+					message: result.message,
+					...(result.data !== undefined ? { data: result.data } : {}),
+				}),
+			});
+		}
+
+		// If the round limit is about to hit, surface it as the reason —
+		// the outer consumer can mark the iteration as incomplete.
+		if (rounds >= maxRounds) {
+			stopReason = 'max-rounds';
+			break;
+		}
+	}
+
+	return {
+		rounds,
+		executedCalls,
+		summary,
+		stopReason,
+		messages,
+	};
+}