mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:01:08 +02:00
test(ai): promote MockLlmClient to a shared @mana/shared-ai export
The runPlannerLoop test file and the webapp's mission-runner test each had their own inline scripted LLM mock — same interface, diverged slightly. Consolidates into packages/shared-ai/src/planner/mock-llm.ts and re-exports from the package root so any consumer can drive the loop deterministically. Both existing test files now use the shared client. 5 + 3 tests pass, 44 total in shared-ai still green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e10c2436a6
commit
5b7564b3a4
5 changed files with 70 additions and 80 deletions
|
|
@ -12,7 +12,7 @@ import { registerTools } from '../../tools/registry';
|
||||||
import { createMission, getMission } from './store';
|
import { createMission, getMission } from './store';
|
||||||
import { runMission } from './runner';
|
import { runMission } from './runner';
|
||||||
import { MISSIONS_TABLE } from './types';
|
import { MISSIONS_TABLE } from './types';
|
||||||
import type { LlmClient, LlmCompletionRequest, LlmCompletionResponse } from '@mana/shared-ai';
|
import { MockLlmClient } from '@mana/shared-ai';
|
||||||
|
|
||||||
let executed: { name: string; params: Record<string, unknown> }[] = [];
|
let executed: { name: string; params: Record<string, unknown> }[] = [];
|
||||||
|
|
||||||
|
|
@ -34,33 +34,20 @@ beforeEach(async () => {
|
||||||
await db.table(MISSIONS_TABLE).clear();
|
await db.table(MISSIONS_TABLE).clear();
|
||||||
});
|
});
|
||||||
|
|
||||||
/** Minimal LlmClient for runner tests — scripts one or more assistant
|
/** Builder for concise scripted LLM turns. Wraps the shared
|
||||||
* turns via enqueueToolCalls / enqueueStop. */
|
* MockLlmClient from @mana/shared-ai so tests read top-down. */
|
||||||
function mockLlm(
|
function mockLlm(
|
||||||
turns: Array<
|
turns: Array<
|
||||||
| { kind: 'tool_calls'; calls: Array<{ name: string; args: Record<string, unknown> }> }
|
| { kind: 'tool_calls'; calls: Array<{ name: string; args: Record<string, unknown> }> }
|
||||||
| { kind: 'stop'; content?: string }
|
| { kind: 'stop'; content?: string }
|
||||||
>
|
>
|
||||||
): LlmClient {
|
) {
|
||||||
let i = 0;
|
const m = new MockLlmClient();
|
||||||
return {
|
for (const t of turns) {
|
||||||
async complete(_req: LlmCompletionRequest): Promise<LlmCompletionResponse> {
|
if (t.kind === 'stop') m.enqueueStop(t.content ?? null);
|
||||||
const turn = turns[i++];
|
else m.enqueueToolCalls(t.calls);
|
||||||
if (!turn) throw new Error('MockLlm exhausted');
|
}
|
||||||
if (turn.kind === 'stop') {
|
return m;
|
||||||
return { content: turn.content ?? null, toolCalls: [], finishReason: 'stop' };
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
content: null,
|
|
||||||
toolCalls: turn.calls.map((c, n) => ({
|
|
||||||
id: `call_${i}_${n}`,
|
|
||||||
name: c.name,
|
|
||||||
arguments: c.args,
|
|
||||||
})),
|
|
||||||
finishReason: 'tool_calls',
|
|
||||||
};
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
describe('runMission', () => {
|
describe('runMission', () => {
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,7 @@ export type {
|
||||||
export {
|
export {
|
||||||
buildPlannerPrompt,
|
buildPlannerPrompt,
|
||||||
buildSystemPrompt,
|
buildSystemPrompt,
|
||||||
|
MockLlmClient,
|
||||||
parsePlannerResponse,
|
parsePlannerResponse,
|
||||||
runPlannerLoop,
|
runPlannerLoop,
|
||||||
} from './planner';
|
} from './planner';
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,8 @@ export type { AiPlanInput, AiPlanOutput, AvailableTool, PlannedStep, ResolvedInp
|
||||||
export { buildSystemPrompt } from './system-prompt';
|
export { buildSystemPrompt } from './system-prompt';
|
||||||
export type { SystemPromptInput, SystemPromptOutput } from './system-prompt';
|
export type { SystemPromptInput, SystemPromptOutput } from './system-prompt';
|
||||||
export { runPlannerLoop } from './loop';
|
export { runPlannerLoop } from './loop';
|
||||||
|
export { MockLlmClient } from './mock-llm';
|
||||||
|
export type { MockLlmTurn } from './mock-llm';
|
||||||
export type {
|
export type {
|
||||||
ChatMessage,
|
ChatMessage,
|
||||||
ChatRole,
|
ChatRole,
|
||||||
|
|
|
||||||
|
|
@ -1,58 +1,8 @@
|
||||||
import { describe, expect, it, vi } from 'vitest';
|
import { describe, expect, it, vi } from 'vitest';
|
||||||
import {
|
import { runPlannerLoop, type ToolCallRequest, type ToolResult } from './loop';
|
||||||
runPlannerLoop,
|
import { MockLlmClient } from './mock-llm';
|
||||||
type ChatMessage,
|
|
||||||
type LlmClient,
|
|
||||||
type LlmCompletionResponse,
|
|
||||||
type ToolCallRequest,
|
|
||||||
type ToolResult,
|
|
||||||
} from './loop';
|
|
||||||
import type { ToolSchema } from '../tools/schemas';
|
import type { ToolSchema } from '../tools/schemas';
|
||||||
|
|
||||||
/**
|
|
||||||
* Scriptable mock LLM — each ``enqueue*`` call pushes one planned
|
|
||||||
* response onto a FIFO. The loop pulls responses in order. If the loop
|
|
||||||
* asks for more turns than we enqueued, the test fails loudly rather
|
|
||||||
* than hanging.
|
|
||||||
*/
|
|
||||||
class MockLlm implements LlmClient {
|
|
||||||
private queue: LlmCompletionResponse[] = [];
|
|
||||||
public calls: Array<{ messages: readonly ChatMessage[]; toolNames: string[] }> = [];
|
|
||||||
|
|
||||||
enqueueToolCalls(calls: Array<{ name: string; args: Record<string, unknown> }>): this {
|
|
||||||
this.queue.push({
|
|
||||||
content: null,
|
|
||||||
toolCalls: calls.map((c, i) => ({
|
|
||||||
id: `call_${this.queue.length}_${i}`,
|
|
||||||
name: c.name,
|
|
||||||
arguments: c.args,
|
|
||||||
})),
|
|
||||||
finishReason: 'tool_calls',
|
|
||||||
});
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
enqueueStop(content: string | null = null): this {
|
|
||||||
this.queue.push({ content, toolCalls: [], finishReason: 'stop' });
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
async complete(req: {
|
|
||||||
messages: readonly ChatMessage[];
|
|
||||||
tools: readonly unknown[];
|
|
||||||
}): Promise<LlmCompletionResponse> {
|
|
||||||
// Snapshot at call time — the loop mutates the same array after,
|
|
||||||
// and we want to assert the state the LLM actually saw.
|
|
||||||
this.calls.push({
|
|
||||||
messages: [...req.messages],
|
|
||||||
toolNames: (req.tools as Array<{ function: { name: string } }>).map((t) => t.function.name),
|
|
||||||
});
|
|
||||||
const next = this.queue.shift();
|
|
||||||
if (!next) throw new Error('MockLlm: no more responses enqueued');
|
|
||||||
return next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const tools: ToolSchema[] = [
|
const tools: ToolSchema[] = [
|
||||||
{
|
{
|
||||||
name: 'list_things',
|
name: 'list_things',
|
||||||
|
|
@ -72,7 +22,7 @@ const tools: ToolSchema[] = [
|
||||||
|
|
||||||
describe('runPlannerLoop', () => {
|
describe('runPlannerLoop', () => {
|
||||||
it('stops immediately when the LLM emits no tool_calls', async () => {
|
it('stops immediately when the LLM emits no tool_calls', async () => {
|
||||||
const llm = new MockLlm().enqueueStop('done');
|
const llm = new MockLlmClient().enqueueStop('done');
|
||||||
const onToolCall = vi.fn();
|
const onToolCall = vi.fn();
|
||||||
const result = await runPlannerLoop({
|
const result = await runPlannerLoop({
|
||||||
llm,
|
llm,
|
||||||
|
|
@ -92,7 +42,7 @@ describe('runPlannerLoop', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('executes a single tool call and feeds the result back', async () => {
|
it('executes a single tool call and feeds the result back', async () => {
|
||||||
const llm = new MockLlm()
|
const llm = new MockLlmClient()
|
||||||
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
||||||
.enqueueStop('all done');
|
.enqueueStop('all done');
|
||||||
|
|
||||||
|
|
@ -124,7 +74,7 @@ describe('runPlannerLoop', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('executes parallel tool calls sequentially', async () => {
|
it('executes parallel tool calls sequentially', async () => {
|
||||||
const llm = new MockLlm()
|
const llm = new MockLlmClient()
|
||||||
.enqueueToolCalls([
|
.enqueueToolCalls([
|
||||||
{ name: 'create_thing', args: { title: 'a' } },
|
{ name: 'create_thing', args: { title: 'a' } },
|
||||||
{ name: 'create_thing', args: { title: 'b' } },
|
{ name: 'create_thing', args: { title: 'b' } },
|
||||||
|
|
@ -149,7 +99,7 @@ describe('runPlannerLoop', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('propagates tool failures as tool-messages (LLM can react)', async () => {
|
it('propagates tool failures as tool-messages (LLM can react)', async () => {
|
||||||
const llm = new MockLlm()
|
const llm = new MockLlmClient()
|
||||||
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
||||||
.enqueueStop('ack');
|
.enqueueStop('ack');
|
||||||
|
|
||||||
|
|
@ -171,7 +121,7 @@ describe('runPlannerLoop', () => {
|
||||||
});
|
});
|
||||||
|
|
||||||
it('honours the maxRounds ceiling', async () => {
|
it('honours the maxRounds ceiling', async () => {
|
||||||
const llm = new MockLlm();
|
const llm = new MockLlmClient();
|
||||||
// Seed enough tool-call turns to exceed the cap
|
// Seed enough tool-call turns to exceed the cap
|
||||||
for (let i = 0; i < 10; i++) {
|
for (let i = 0; i < 10; i++) {
|
||||||
llm.enqueueToolCalls([{ name: 'list_things', args: {} }]);
|
llm.enqueueToolCalls([{ name: 'list_things', args: {} }]);
|
||||||
|
|
|
||||||
50
packages/shared-ai/src/planner/mock-llm.ts
Normal file
50
packages/shared-ai/src/planner/mock-llm.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
/**
|
||||||
|
* Scriptable MockLlmClient — drives runPlannerLoop in tests without
|
||||||
|
* hitting a real LLM. Each ``enqueue*`` call queues one assistant
|
||||||
|
* turn; the loop consumes them FIFO. Unscripted turns throw loudly
|
||||||
|
* so tests fail fast instead of hanging.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { ChatMessage, LlmClient, LlmCompletionRequest, LlmCompletionResponse } from './loop';
|
||||||
|
|
||||||
|
export interface MockLlmTurn {
|
||||||
|
messages: readonly ChatMessage[];
|
||||||
|
toolNames: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export class MockLlmClient implements LlmClient {
|
||||||
|
private queue: LlmCompletionResponse[] = [];
|
||||||
|
/** Snapshots of each inbound call. Use to assert what the LLM saw
|
||||||
|
* on each round (messages + tool schemas). */
|
||||||
|
public readonly calls: MockLlmTurn[] = [];
|
||||||
|
|
||||||
|
enqueueToolCalls(calls: Array<{ name: string; args: Record<string, unknown> }>): this {
|
||||||
|
this.queue.push({
|
||||||
|
content: null,
|
||||||
|
toolCalls: calls.map((c, i) => ({
|
||||||
|
id: `call_${this.queue.length}_${i}`,
|
||||||
|
name: c.name,
|
||||||
|
arguments: c.args,
|
||||||
|
})),
|
||||||
|
finishReason: 'tool_calls',
|
||||||
|
});
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
enqueueStop(content: string | null = null): this {
|
||||||
|
this.queue.push({ content, toolCalls: [], finishReason: 'stop' });
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
async complete(req: LlmCompletionRequest): Promise<LlmCompletionResponse> {
|
||||||
|
// Snapshot at call-time — the loop mutates the array after, and
|
||||||
|
// tests want to assert the state the LLM actually saw.
|
||||||
|
this.calls.push({
|
||||||
|
messages: [...req.messages],
|
||||||
|
toolNames: req.tools.map((t) => t.function.name),
|
||||||
|
});
|
||||||
|
const next = this.queue.shift();
|
||||||
|
if (!next) throw new Error('MockLlmClient: no more responses enqueued');
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue