feat(shared-ai): runPlannerLoop + compact system prompt for function calling

Introduces the new planner pipeline both the webapp runner and the mana-ai tick will swap onto in the next commits. Additive for now — the legacy buildPlannerPrompt + parsePlannerResponse stay exported so callers can migrate one at a time; they get removed once the last consumer is gone. - planner/loop.ts — runPlannerLoop orchestrates a multi-turn chat against a caller-supplied LlmClient. Tool-calls from the LLM are handed to an onToolCall callback and their results fed back as tool-messages. Parallel tool-calls in one turn execute sequentially to keep the message log linear for debugging. Stops on assistant stop, empty tool_calls, or a hard max-rounds ceiling (default 5). - planner/system-prompt.ts — new buildSystemPrompt. ~40-line German system frame, no tool listing (the SDK-level tools field carries the schemas now), no JSON format example, no "please return JSON" plea. User frame renders mission + linked inputs + last 3 iteration summaries, same as before. - Five test cases covering the loop: immediate stop, single tool call with result feedback, parallel calls execute in order, tool failures propagate as tool-messages the LLM can react to, and maxRounds ceiling fires with the right stopReason. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-22 02:06:42 +02:00 · 2026-04-20 15:31:01 +02:00 · 2026-04-20 15:31:01 +02:00 · 4daca8970b
commit 4daca8970b
parent 2cf89ce26a
5 changed files with 537 additions and 1 deletions
--- a/packages/shared-ai/src/planner/loop.test.ts
+++ b/packages/shared-ai/src/planner/loop.test.ts
@ -0,0 +1,200 @@
+import { describe, expect, it, vi } from 'vitest';
+import {
+	runPlannerLoop,
+	type ChatMessage,
+	type LlmClient,
+	type LlmCompletionResponse,
+	type ToolCallRequest,
+	type ToolResult,
+} from './loop';
+import type { ToolSchema } from '../tools/schemas';
+
+/**
+ * Scriptable mock LLM — each ``enqueue*`` call pushes one planned
+ * response onto a FIFO. The loop pulls responses in order. If the loop
+ * asks for more turns than we enqueued, the test fails loudly rather
+ * than hanging.
+ */
+class MockLlm implements LlmClient {
+	private queue: LlmCompletionResponse[] = [];
+	public calls: Array<{ messages: readonly ChatMessage[]; toolNames: string[] }> = [];
+
+	enqueueToolCalls(calls: Array<{ name: string; args: Record<string, unknown> }>): this {
+		this.queue.push({
+			content: null,
+			toolCalls: calls.map((c, i) => ({
+				id: `call_${this.queue.length}_${i}`,
+				name: c.name,
+				arguments: c.args,
+			})),
+			finishReason: 'tool_calls',
+		});
+		return this;
+	}
+
+	enqueueStop(content: string | null = null): this {
+		this.queue.push({ content, toolCalls: [], finishReason: 'stop' });
+		return this;
+	}
+
+	async complete(req: {
+		messages: readonly ChatMessage[];
+		tools: readonly unknown[];
+	}): Promise<LlmCompletionResponse> {
+		// Snapshot at call time — the loop mutates the same array after,
+		// and we want to assert the state the LLM actually saw.
+		this.calls.push({
+			messages: [...req.messages],
+			toolNames: (req.tools as Array<{ function: { name: string } }>).map((t) => t.function.name),
+		});
+		const next = this.queue.shift();
+		if (!next) throw new Error('MockLlm: no more responses enqueued');
+		return next;
+	}
+}
+
+const tools: ToolSchema[] = [
+	{
+		name: 'list_things',
+		module: 'test',
+		description: 'list things',
+		defaultPolicy: 'auto',
+		parameters: [],
+	},
+	{
+		name: 'create_thing',
+		module: 'test',
+		description: 'create a thing',
+		defaultPolicy: 'propose',
+		parameters: [{ name: 'title', type: 'string', description: 'title', required: true }],
+	},
+];
+
+describe('runPlannerLoop', () => {
+	it('stops immediately when the LLM emits no tool_calls', async () => {
+		const llm = new MockLlm().enqueueStop('done');
+		const onToolCall = vi.fn();
+		const result = await runPlannerLoop({
+			llm,
+			input: {
+				systemPrompt: 's',
+				userPrompt: 'u',
+				tools,
+				model: 'test/model',
+			},
+			onToolCall,
+		});
+		expect(result.rounds).toBe(1);
+		expect(result.executedCalls).toHaveLength(0);
+		expect(result.summary).toBe('done');
+		expect(result.stopReason).toBe('assistant-stop');
+		expect(onToolCall).not.toHaveBeenCalled();
+	});
+
+	it('executes a single tool call and feeds the result back', async () => {
+		const llm = new MockLlm()
+			.enqueueToolCalls([{ name: 'list_things', args: {} }])
+			.enqueueStop('all done');
+
+		const onToolCall = vi.fn(
+			async (_call: ToolCallRequest): Promise<ToolResult> => ({
+				success: true,
+				data: ['a', 'b'],
+				message: '2 things',
+			})
+		);
+
+		const result = await runPlannerLoop({
+			llm,
+			input: { systemPrompt: 's', userPrompt: 'u', tools, model: 'm' },
+			onToolCall,
+		});
+
+		expect(result.rounds).toBe(2);
+		expect(result.executedCalls).toHaveLength(1);
+		expect(result.executedCalls[0].call.name).toBe('list_things');
+		expect(result.summary).toBe('all done');
+		expect(result.stopReason).toBe('assistant-stop');
+
+		// Second LLM call must have seen the tool result in its messages.
+		expect(llm.calls[1].messages).toHaveLength(4); // system + user + assistant + tool
+		const toolMsg = llm.calls[1].messages[3];
+		expect(toolMsg.role).toBe('tool');
+		expect(toolMsg.content).toContain('2 things');
+	});
+
+	it('executes parallel tool calls sequentially', async () => {
+		const llm = new MockLlm()
+			.enqueueToolCalls([
+				{ name: 'create_thing', args: { title: 'a' } },
+				{ name: 'create_thing', args: { title: 'b' } },
+				{ name: 'create_thing', args: { title: 'c' } },
+			])
+			.enqueueStop();
+
+		const executedInOrder: string[] = [];
+		const onToolCall = async (call: ToolCallRequest): Promise<ToolResult> => {
+			executedInOrder.push(call.arguments.title as string);
+			return { success: true, message: 'ok' };
+		};
+
+		const result = await runPlannerLoop({
+			llm,
+			input: { systemPrompt: 's', userPrompt: 'u', tools, model: 'm' },
+			onToolCall,
+		});
+
+		expect(executedInOrder).toEqual(['a', 'b', 'c']);
+		expect(result.executedCalls).toHaveLength(3);
+	});
+
+	it('propagates tool failures as tool-messages (LLM can react)', async () => {
+		const llm = new MockLlm()
+			.enqueueToolCalls([{ name: 'list_things', args: {} }])
+			.enqueueStop('ack');
+
+		const onToolCall = async (): Promise<ToolResult> => ({
+			success: false,
+			message: 'db locked',
+		});
+
+		const result = await runPlannerLoop({
+			llm,
+			input: { systemPrompt: 's', userPrompt: 'u', tools, model: 'm' },
+			onToolCall,
+		});
+
+		const toolMsg = llm.calls[1].messages[3];
+		expect(toolMsg.content).toContain('db locked');
+		expect(toolMsg.content).toContain('"success":false');
+		expect(result.executedCalls[0].result.success).toBe(false);
+	});
+
+	it('honours the maxRounds ceiling', async () => {
+		const llm = new MockLlm();
+		// Seed enough tool-call turns to exceed the cap
+		for (let i = 0; i < 10; i++) {
+			llm.enqueueToolCalls([{ name: 'list_things', args: {} }]);
+		}
+		const onToolCall = async (): Promise<ToolResult> => ({
+			success: true,
+			message: 'ok',
+		});
+
+		const result = await runPlannerLoop({
+			llm,
+			input: {
+				systemPrompt: 's',
+				userPrompt: 'u',
+				tools,
+				model: 'm',
+				maxRounds: 3,
+			},
+			onToolCall,
+		});
+
+		expect(result.rounds).toBe(3);
+		expect(result.stopReason).toBe('max-rounds');
+		expect(result.executedCalls).toHaveLength(3);
+	});
+});