test(ai): promote MockLlmClient to a shared @mana/shared-ai export

The runPlannerLoop test file and the webapp's mission-runner test each
had their own inline scripted LLM mock — same interface, diverged
slightly. Consolidates into packages/shared-ai/src/planner/mock-llm.ts
and re-exports from the package root so any consumer can drive the
loop deterministically.

Both existing test files now use the shared client. 5 + 3 tests pass,
44 total in shared-ai still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-20 18:05:46 +02:00
parent e10c2436a6
commit 5b7564b3a4
5 changed files with 70 additions and 80 deletions

View file

@ -12,7 +12,7 @@ import { registerTools } from '../../tools/registry';
import { createMission, getMission } from './store'; import { createMission, getMission } from './store';
import { runMission } from './runner'; import { runMission } from './runner';
import { MISSIONS_TABLE } from './types'; import { MISSIONS_TABLE } from './types';
import type { LlmClient, LlmCompletionRequest, LlmCompletionResponse } from '@mana/shared-ai'; import { MockLlmClient } from '@mana/shared-ai';
let executed: { name: string; params: Record<string, unknown> }[] = []; let executed: { name: string; params: Record<string, unknown> }[] = [];
@ -34,33 +34,20 @@ beforeEach(async () => {
await db.table(MISSIONS_TABLE).clear(); await db.table(MISSIONS_TABLE).clear();
}); });
/** Minimal LlmClient for runner tests scripts one or more assistant /** Builder for concise scripted LLM turns. Wraps the shared
* turns via enqueueToolCalls / enqueueStop. */ * MockLlmClient from @mana/shared-ai so tests read top-down. */
function mockLlm( function mockLlm(
turns: Array< turns: Array<
| { kind: 'tool_calls'; calls: Array<{ name: string; args: Record<string, unknown> }> } | { kind: 'tool_calls'; calls: Array<{ name: string; args: Record<string, unknown> }> }
| { kind: 'stop'; content?: string } | { kind: 'stop'; content?: string }
> >
): LlmClient { ) {
let i = 0; const m = new MockLlmClient();
return { for (const t of turns) {
async complete(_req: LlmCompletionRequest): Promise<LlmCompletionResponse> { if (t.kind === 'stop') m.enqueueStop(t.content ?? null);
const turn = turns[i++]; else m.enqueueToolCalls(t.calls);
if (!turn) throw new Error('MockLlm exhausted'); }
if (turn.kind === 'stop') { return m;
return { content: turn.content ?? null, toolCalls: [], finishReason: 'stop' };
}
return {
content: null,
toolCalls: turn.calls.map((c, n) => ({
id: `call_${i}_${n}`,
name: c.name,
arguments: c.args,
})),
finishReason: 'tool_calls',
};
},
};
} }
describe('runMission', () => { describe('runMission', () => {

View file

@ -82,6 +82,7 @@ export type {
export { export {
buildPlannerPrompt, buildPlannerPrompt,
buildSystemPrompt, buildSystemPrompt,
MockLlmClient,
parsePlannerResponse, parsePlannerResponse,
runPlannerLoop, runPlannerLoop,
} from './planner'; } from './planner';

View file

@ -10,6 +10,8 @@ export type { AiPlanInput, AiPlanOutput, AvailableTool, PlannedStep, ResolvedInp
export { buildSystemPrompt } from './system-prompt'; export { buildSystemPrompt } from './system-prompt';
export type { SystemPromptInput, SystemPromptOutput } from './system-prompt'; export type { SystemPromptInput, SystemPromptOutput } from './system-prompt';
export { runPlannerLoop } from './loop'; export { runPlannerLoop } from './loop';
export { MockLlmClient } from './mock-llm';
export type { MockLlmTurn } from './mock-llm';
export type { export type {
ChatMessage, ChatMessage,
ChatRole, ChatRole,

View file

@ -1,58 +1,8 @@
import { describe, expect, it, vi } from 'vitest'; import { describe, expect, it, vi } from 'vitest';
import { import { runPlannerLoop, type ToolCallRequest, type ToolResult } from './loop';
runPlannerLoop, import { MockLlmClient } from './mock-llm';
type ChatMessage,
type LlmClient,
type LlmCompletionResponse,
type ToolCallRequest,
type ToolResult,
} from './loop';
import type { ToolSchema } from '../tools/schemas'; import type { ToolSchema } from '../tools/schemas';
/**
* Scriptable mock LLM each ``enqueue*`` call pushes one planned
* response onto a FIFO. The loop pulls responses in order. If the loop
* asks for more turns than we enqueued, the test fails loudly rather
* than hanging.
*/
class MockLlm implements LlmClient {
private queue: LlmCompletionResponse[] = [];
public calls: Array<{ messages: readonly ChatMessage[]; toolNames: string[] }> = [];
enqueueToolCalls(calls: Array<{ name: string; args: Record<string, unknown> }>): this {
this.queue.push({
content: null,
toolCalls: calls.map((c, i) => ({
id: `call_${this.queue.length}_${i}`,
name: c.name,
arguments: c.args,
})),
finishReason: 'tool_calls',
});
return this;
}
enqueueStop(content: string | null = null): this {
this.queue.push({ content, toolCalls: [], finishReason: 'stop' });
return this;
}
async complete(req: {
messages: readonly ChatMessage[];
tools: readonly unknown[];
}): Promise<LlmCompletionResponse> {
// Snapshot at call time — the loop mutates the same array after,
// and we want to assert the state the LLM actually saw.
this.calls.push({
messages: [...req.messages],
toolNames: (req.tools as Array<{ function: { name: string } }>).map((t) => t.function.name),
});
const next = this.queue.shift();
if (!next) throw new Error('MockLlm: no more responses enqueued');
return next;
}
}
const tools: ToolSchema[] = [ const tools: ToolSchema[] = [
{ {
name: 'list_things', name: 'list_things',
@ -72,7 +22,7 @@ const tools: ToolSchema[] = [
describe('runPlannerLoop', () => { describe('runPlannerLoop', () => {
it('stops immediately when the LLM emits no tool_calls', async () => { it('stops immediately when the LLM emits no tool_calls', async () => {
const llm = new MockLlm().enqueueStop('done'); const llm = new MockLlmClient().enqueueStop('done');
const onToolCall = vi.fn(); const onToolCall = vi.fn();
const result = await runPlannerLoop({ const result = await runPlannerLoop({
llm, llm,
@ -92,7 +42,7 @@ describe('runPlannerLoop', () => {
}); });
it('executes a single tool call and feeds the result back', async () => { it('executes a single tool call and feeds the result back', async () => {
const llm = new MockLlm() const llm = new MockLlmClient()
.enqueueToolCalls([{ name: 'list_things', args: {} }]) .enqueueToolCalls([{ name: 'list_things', args: {} }])
.enqueueStop('all done'); .enqueueStop('all done');
@ -124,7 +74,7 @@ describe('runPlannerLoop', () => {
}); });
it('executes parallel tool calls sequentially', async () => { it('executes parallel tool calls sequentially', async () => {
const llm = new MockLlm() const llm = new MockLlmClient()
.enqueueToolCalls([ .enqueueToolCalls([
{ name: 'create_thing', args: { title: 'a' } }, { name: 'create_thing', args: { title: 'a' } },
{ name: 'create_thing', args: { title: 'b' } }, { name: 'create_thing', args: { title: 'b' } },
@ -149,7 +99,7 @@ describe('runPlannerLoop', () => {
}); });
it('propagates tool failures as tool-messages (LLM can react)', async () => { it('propagates tool failures as tool-messages (LLM can react)', async () => {
const llm = new MockLlm() const llm = new MockLlmClient()
.enqueueToolCalls([{ name: 'list_things', args: {} }]) .enqueueToolCalls([{ name: 'list_things', args: {} }])
.enqueueStop('ack'); .enqueueStop('ack');
@ -171,7 +121,7 @@ describe('runPlannerLoop', () => {
}); });
it('honours the maxRounds ceiling', async () => { it('honours the maxRounds ceiling', async () => {
const llm = new MockLlm(); const llm = new MockLlmClient();
// Seed enough tool-call turns to exceed the cap // Seed enough tool-call turns to exceed the cap
for (let i = 0; i < 10; i++) { for (let i = 0; i < 10; i++) {
llm.enqueueToolCalls([{ name: 'list_things', args: {} }]); llm.enqueueToolCalls([{ name: 'list_things', args: {} }]);

View file

@ -0,0 +1,50 @@
/**
* Scriptable MockLlmClient drives runPlannerLoop in tests without
* hitting a real LLM. Each ``enqueue*`` call queues one assistant
* turn; the loop consumes them FIFO. Unscripted turns throw loudly
* so tests fail fast instead of hanging.
*/
import type { ChatMessage, LlmClient, LlmCompletionRequest, LlmCompletionResponse } from './loop';
export interface MockLlmTurn {
messages: readonly ChatMessage[];
toolNames: string[];
}
export class MockLlmClient implements LlmClient {
private queue: LlmCompletionResponse[] = [];
/** Snapshots of each inbound call. Use to assert what the LLM saw
* on each round (messages + tool schemas). */
public readonly calls: MockLlmTurn[] = [];
enqueueToolCalls(calls: Array<{ name: string; args: Record<string, unknown> }>): this {
this.queue.push({
content: null,
toolCalls: calls.map((c, i) => ({
id: `call_${this.queue.length}_${i}`,
name: c.name,
arguments: c.args,
})),
finishReason: 'tool_calls',
});
return this;
}
enqueueStop(content: string | null = null): this {
this.queue.push({ content, toolCalls: [], finishReason: 'stop' });
return this;
}
async complete(req: LlmCompletionRequest): Promise<LlmCompletionResponse> {
// Snapshot at call-time — the loop mutates the array after, and
// tests want to assert the state the LLM actually saw.
this.calls.push({
messages: [...req.messages],
toolNames: req.tools.map((t) => t.function.name),
});
const next = this.queue.shift();
if (!next) throw new Error('MockLlmClient: no more responses enqueued');
return next;
}
}