From 5b7564b3a49d98faa42e7a16e5f1793ce07a858c Mon Sep 17 00:00:00 2001 From: Till JS Date: Mon, 20 Apr 2026 18:05:46 +0200 Subject: [PATCH] test(ai): promote MockLlmClient to a shared @mana/shared-ai export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runPlannerLoop test file and the webapp's mission-runner test each had their own inline scripted LLM mock — same interface, diverged slightly. Consolidates into packages/shared-ai/src/planner/mock-llm.ts and re-exports from the package root so any consumer can drive the loop deterministically. Both existing test files now use the shared client. 5 + 3 tests pass, 44 total in shared-ai still green. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/lib/data/ai/missions/runner.test.ts | 33 +++------- packages/shared-ai/src/index.ts | 1 + packages/shared-ai/src/planner/index.ts | 2 + packages/shared-ai/src/planner/loop.test.ts | 64 ++----------------- packages/shared-ai/src/planner/mock-llm.ts | 50 +++++++++++++++ 5 files changed, 70 insertions(+), 80 deletions(-) create mode 100644 packages/shared-ai/src/planner/mock-llm.ts diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts index 67fdd62f0..bd2b1b919 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.test.ts @@ -12,7 +12,7 @@ import { registerTools } from '../../tools/registry'; import { createMission, getMission } from './store'; import { runMission } from './runner'; import { MISSIONS_TABLE } from './types'; -import type { LlmClient, LlmCompletionRequest, LlmCompletionResponse } from '@mana/shared-ai'; +import { MockLlmClient } from '@mana/shared-ai'; let executed: { name: string; params: Record }[] = []; @@ -34,33 +34,20 @@ beforeEach(async () => { await db.table(MISSIONS_TABLE).clear(); }); -/** Minimal LlmClient for runner tests — scripts one or more assistant - * turns via enqueueToolCalls / enqueueStop. */ +/** Builder for concise scripted LLM turns. Wraps the shared + * MockLlmClient from @mana/shared-ai so tests read top-down. */ function mockLlm( turns: Array< | { kind: 'tool_calls'; calls: Array<{ name: string; args: Record }> } | { kind: 'stop'; content?: string } > -): LlmClient { - let i = 0; - return { - async complete(_req: LlmCompletionRequest): Promise { - const turn = turns[i++]; - if (!turn) throw new Error('MockLlm exhausted'); - if (turn.kind === 'stop') { - return { content: turn.content ?? null, toolCalls: [], finishReason: 'stop' }; - } - return { - content: null, - toolCalls: turn.calls.map((c, n) => ({ - id: `call_${i}_${n}`, - name: c.name, - arguments: c.args, - })), - finishReason: 'tool_calls', - }; - }, - }; +) { + const m = new MockLlmClient(); + for (const t of turns) { + if (t.kind === 'stop') m.enqueueStop(t.content ?? null); + else m.enqueueToolCalls(t.calls); + } + return m; } describe('runMission', () => { diff --git a/packages/shared-ai/src/index.ts b/packages/shared-ai/src/index.ts index 06dd5e2ad..3c236f157 100644 --- a/packages/shared-ai/src/index.ts +++ b/packages/shared-ai/src/index.ts @@ -82,6 +82,7 @@ export type { export { buildPlannerPrompt, buildSystemPrompt, + MockLlmClient, parsePlannerResponse, runPlannerLoop, } from './planner'; diff --git a/packages/shared-ai/src/planner/index.ts b/packages/shared-ai/src/planner/index.ts index 5e0924cd3..a7cfd8127 100644 --- a/packages/shared-ai/src/planner/index.ts +++ b/packages/shared-ai/src/planner/index.ts @@ -10,6 +10,8 @@ export type { AiPlanInput, AiPlanOutput, AvailableTool, PlannedStep, ResolvedInp export { buildSystemPrompt } from './system-prompt'; export type { SystemPromptInput, SystemPromptOutput } from './system-prompt'; export { runPlannerLoop } from './loop'; +export { MockLlmClient } from './mock-llm'; +export type { MockLlmTurn } from './mock-llm'; export type { ChatMessage, ChatRole, diff --git a/packages/shared-ai/src/planner/loop.test.ts b/packages/shared-ai/src/planner/loop.test.ts index 0cbd45d08..ce8fe8f6f 100644 --- a/packages/shared-ai/src/planner/loop.test.ts +++ b/packages/shared-ai/src/planner/loop.test.ts @@ -1,58 +1,8 @@ import { describe, expect, it, vi } from 'vitest'; -import { - runPlannerLoop, - type ChatMessage, - type LlmClient, - type LlmCompletionResponse, - type ToolCallRequest, - type ToolResult, -} from './loop'; +import { runPlannerLoop, type ToolCallRequest, type ToolResult } from './loop'; +import { MockLlmClient } from './mock-llm'; import type { ToolSchema } from '../tools/schemas'; -/** - * Scriptable mock LLM — each ``enqueue*`` call pushes one planned - * response onto a FIFO. The loop pulls responses in order. If the loop - * asks for more turns than we enqueued, the test fails loudly rather - * than hanging. - */ -class MockLlm implements LlmClient { - private queue: LlmCompletionResponse[] = []; - public calls: Array<{ messages: readonly ChatMessage[]; toolNames: string[] }> = []; - - enqueueToolCalls(calls: Array<{ name: string; args: Record }>): this { - this.queue.push({ - content: null, - toolCalls: calls.map((c, i) => ({ - id: `call_${this.queue.length}_${i}`, - name: c.name, - arguments: c.args, - })), - finishReason: 'tool_calls', - }); - return this; - } - - enqueueStop(content: string | null = null): this { - this.queue.push({ content, toolCalls: [], finishReason: 'stop' }); - return this; - } - - async complete(req: { - messages: readonly ChatMessage[]; - tools: readonly unknown[]; - }): Promise { - // Snapshot at call time — the loop mutates the same array after, - // and we want to assert the state the LLM actually saw. - this.calls.push({ - messages: [...req.messages], - toolNames: (req.tools as Array<{ function: { name: string } }>).map((t) => t.function.name), - }); - const next = this.queue.shift(); - if (!next) throw new Error('MockLlm: no more responses enqueued'); - return next; - } -} - const tools: ToolSchema[] = [ { name: 'list_things', @@ -72,7 +22,7 @@ const tools: ToolSchema[] = [ describe('runPlannerLoop', () => { it('stops immediately when the LLM emits no tool_calls', async () => { - const llm = new MockLlm().enqueueStop('done'); + const llm = new MockLlmClient().enqueueStop('done'); const onToolCall = vi.fn(); const result = await runPlannerLoop({ llm, @@ -92,7 +42,7 @@ describe('runPlannerLoop', () => { }); it('executes a single tool call and feeds the result back', async () => { - const llm = new MockLlm() + const llm = new MockLlmClient() .enqueueToolCalls([{ name: 'list_things', args: {} }]) .enqueueStop('all done'); @@ -124,7 +74,7 @@ describe('runPlannerLoop', () => { }); it('executes parallel tool calls sequentially', async () => { - const llm = new MockLlm() + const llm = new MockLlmClient() .enqueueToolCalls([ { name: 'create_thing', args: { title: 'a' } }, { name: 'create_thing', args: { title: 'b' } }, @@ -149,7 +99,7 @@ describe('runPlannerLoop', () => { }); it('propagates tool failures as tool-messages (LLM can react)', async () => { - const llm = new MockLlm() + const llm = new MockLlmClient() .enqueueToolCalls([{ name: 'list_things', args: {} }]) .enqueueStop('ack'); @@ -171,7 +121,7 @@ describe('runPlannerLoop', () => { }); it('honours the maxRounds ceiling', async () => { - const llm = new MockLlm(); + const llm = new MockLlmClient(); // Seed enough tool-call turns to exceed the cap for (let i = 0; i < 10; i++) { llm.enqueueToolCalls([{ name: 'list_things', args: {} }]); diff --git a/packages/shared-ai/src/planner/mock-llm.ts b/packages/shared-ai/src/planner/mock-llm.ts new file mode 100644 index 000000000..c718c0087 --- /dev/null +++ b/packages/shared-ai/src/planner/mock-llm.ts @@ -0,0 +1,50 @@ +/** + * Scriptable MockLlmClient — drives runPlannerLoop in tests without + * hitting a real LLM. Each ``enqueue*`` call queues one assistant + * turn; the loop consumes them FIFO. Unscripted turns throw loudly + * so tests fail fast instead of hanging. + */ + +import type { ChatMessage, LlmClient, LlmCompletionRequest, LlmCompletionResponse } from './loop'; + +export interface MockLlmTurn { + messages: readonly ChatMessage[]; + toolNames: string[]; +} + +export class MockLlmClient implements LlmClient { + private queue: LlmCompletionResponse[] = []; + /** Snapshots of each inbound call. Use to assert what the LLM saw + * on each round (messages + tool schemas). */ + public readonly calls: MockLlmTurn[] = []; + + enqueueToolCalls(calls: Array<{ name: string; args: Record }>): this { + this.queue.push({ + content: null, + toolCalls: calls.map((c, i) => ({ + id: `call_${this.queue.length}_${i}`, + name: c.name, + arguments: c.args, + })), + finishReason: 'tool_calls', + }); + return this; + } + + enqueueStop(content: string | null = null): this { + this.queue.push({ content, toolCalls: [], finishReason: 'stop' }); + return this; + } + + async complete(req: LlmCompletionRequest): Promise { + // Snapshot at call-time — the loop mutates the array after, and + // tests want to assert the state the LLM actually saw. + this.calls.push({ + messages: [...req.messages], + toolNames: req.tools.map((t) => t.function.name), + }); + const next = this.queue.shift(); + if (!next) throw new Error('MockLlmClient: no more responses enqueued'); + return next; + } +}