mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-17 17:49:39 +02:00
Closes the loop on M2: when the compactor fires, the LLM needs to know
it's now seeing a <compact-summary> instead of raw turns so it
doesn't waste a turn asking about lost details or re-executing tools
whose responses are gone.
shared-ai:
- LoopState grows `compactionsDone: number` (cap-1 by current loop
policy, but shape kept as count for future multi-compact cycles).
- runPlannerLoop populates it on each reminder-channel call. New
loop test asserts [0, 1] sequence: round 1 before compaction,
round 2 after.
mana-ai:
- New producer `compactedReminder` — fires severity=info when
compactionsDone >= 1, wrapped in a German one-liner ("frag nicht
nach verlorenen Details").
- Injected FIRST in buildReminderChannel so the LLM frames the rest
of the round with "I'm looking at a summary" context. Metric
surface stays `{producer='compacted', severity='info'}`.
4 new reminder tests (3 pure producer + 1 composition-ordering) +
1 loop-wiring test. 77 shared-ai, 20 reminders.test.ts — green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
733 lines
20 KiB
TypeScript
733 lines
20 KiB
TypeScript
import { describe, expect, it, vi } from 'vitest';
|
|
import { runPlannerLoop, type ToolCallRequest, type ToolResult } from './loop';
|
|
import { MockLlmClient } from './mock-llm';
|
|
import type { ToolSchema } from '../tools/schemas';
|
|
|
|
const tools: ToolSchema[] = [
|
|
{
|
|
name: 'list_things',
|
|
module: 'test',
|
|
description: 'list things',
|
|
defaultPolicy: 'auto',
|
|
parameters: [],
|
|
},
|
|
{
|
|
name: 'create_thing',
|
|
module: 'test',
|
|
description: 'create a thing',
|
|
defaultPolicy: 'propose',
|
|
parameters: [{ name: 'title', type: 'string', description: 'title', required: true }],
|
|
},
|
|
];
|
|
|
|
describe('runPlannerLoop', () => {
|
|
it('stops immediately when the LLM emits no tool_calls', async () => {
|
|
const llm = new MockLlmClient().enqueueStop('done');
|
|
const onToolCall = vi.fn();
|
|
const result = await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'test/model',
|
|
},
|
|
onToolCall,
|
|
});
|
|
expect(result.rounds).toBe(1);
|
|
expect(result.executedCalls).toHaveLength(0);
|
|
expect(result.summary).toBe('done');
|
|
expect(result.stopReason).toBe('assistant-stop');
|
|
expect(onToolCall).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('executes a single tool call and feeds the result back', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
|
.enqueueStop('all done');
|
|
|
|
const onToolCall = vi.fn(
|
|
async (_call: ToolCallRequest): Promise<ToolResult> => ({
|
|
success: true,
|
|
data: ['a', 'b'],
|
|
message: '2 things',
|
|
})
|
|
);
|
|
|
|
const result = await runPlannerLoop({
|
|
llm,
|
|
input: { systemPrompt: 's', userPrompt: 'u', tools, model: 'm' },
|
|
onToolCall,
|
|
});
|
|
|
|
expect(result.rounds).toBe(2);
|
|
expect(result.executedCalls).toHaveLength(1);
|
|
expect(result.executedCalls[0].call.name).toBe('list_things');
|
|
expect(result.summary).toBe('all done');
|
|
expect(result.stopReason).toBe('assistant-stop');
|
|
|
|
// Second LLM call must have seen the tool result in its messages.
|
|
expect(llm.calls[1].messages).toHaveLength(4); // system + user + assistant + tool
|
|
const toolMsg = llm.calls[1].messages[3];
|
|
expect(toolMsg.role).toBe('tool');
|
|
expect(toolMsg.content).toContain('2 things');
|
|
});
|
|
|
|
it('executes parallel tool calls sequentially', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([
|
|
{ name: 'create_thing', args: { title: 'a' } },
|
|
{ name: 'create_thing', args: { title: 'b' } },
|
|
{ name: 'create_thing', args: { title: 'c' } },
|
|
])
|
|
.enqueueStop();
|
|
|
|
const executedInOrder: string[] = [];
|
|
const onToolCall = async (call: ToolCallRequest): Promise<ToolResult> => {
|
|
executedInOrder.push(call.arguments.title as string);
|
|
return { success: true, message: 'ok' };
|
|
};
|
|
|
|
const result = await runPlannerLoop({
|
|
llm,
|
|
input: { systemPrompt: 's', userPrompt: 'u', tools, model: 'm' },
|
|
onToolCall,
|
|
});
|
|
|
|
expect(executedInOrder).toEqual(['a', 'b', 'c']);
|
|
expect(result.executedCalls).toHaveLength(3);
|
|
});
|
|
|
|
it('propagates tool failures as tool-messages (LLM can react)', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
|
.enqueueStop('ack');
|
|
|
|
const onToolCall = async (): Promise<ToolResult> => ({
|
|
success: false,
|
|
message: 'db locked',
|
|
});
|
|
|
|
const result = await runPlannerLoop({
|
|
llm,
|
|
input: { systemPrompt: 's', userPrompt: 'u', tools, model: 'm' },
|
|
onToolCall,
|
|
});
|
|
|
|
const toolMsg = llm.calls[1].messages[3];
|
|
expect(toolMsg.content).toContain('db locked');
|
|
expect(toolMsg.content).toContain('"success":false');
|
|
expect(result.executedCalls[0].result.success).toBe(false);
|
|
});
|
|
|
|
it('honours the maxRounds ceiling', async () => {
|
|
const llm = new MockLlmClient();
|
|
// Seed enough tool-call turns to exceed the cap
|
|
for (let i = 0; i < 10; i++) {
|
|
llm.enqueueToolCalls([{ name: 'list_things', args: {} }]);
|
|
}
|
|
const onToolCall = async (): Promise<ToolResult> => ({
|
|
success: true,
|
|
message: 'ok',
|
|
});
|
|
|
|
const result = await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
maxRounds: 3,
|
|
},
|
|
onToolCall,
|
|
});
|
|
|
|
expect(result.rounds).toBe(3);
|
|
expect(result.stopReason).toBe('max-rounds');
|
|
expect(result.executedCalls).toHaveLength(3);
|
|
});
|
|
});
|
|
|
|
describe('runPlannerLoop — parallel reads', () => {
|
|
it('runs a batch of parallel-safe tools via Promise.all', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([
|
|
{ name: 'list_things', args: { i: 1 } },
|
|
{ name: 'list_things', args: { i: 2 } },
|
|
{ name: 'list_things', args: { i: 3 } },
|
|
])
|
|
.enqueueStop();
|
|
|
|
let concurrent = 0;
|
|
let peakConcurrent = 0;
|
|
let completed = 0;
|
|
const onToolCall = async (_call: ToolCallRequest): Promise<ToolResult> => {
|
|
concurrent++;
|
|
peakConcurrent = Math.max(peakConcurrent, concurrent);
|
|
await new Promise((r) => setTimeout(r, 10));
|
|
concurrent--;
|
|
completed++;
|
|
return { success: true, message: `done-${completed}` };
|
|
};
|
|
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
isParallelSafe: (name) => name === 'list_things',
|
|
},
|
|
onToolCall,
|
|
});
|
|
|
|
// All three ran concurrently — peak should be 3, not 1.
|
|
expect(peakConcurrent).toBe(3);
|
|
});
|
|
|
|
it('preserves source order in messages despite parallel completion', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([
|
|
{ name: 'list_things', args: { i: 'a' } },
|
|
{ name: 'list_things', args: { i: 'b' } },
|
|
{ name: 'list_things', args: { i: 'c' } },
|
|
])
|
|
.enqueueStop();
|
|
|
|
// Reverse completion order: first call finishes last.
|
|
const delays: Record<string, number> = { a: 30, b: 10, c: 1 };
|
|
const onToolCall = async (call: ToolCallRequest): Promise<ToolResult> => {
|
|
const i = call.arguments.i as string;
|
|
await new Promise((r) => setTimeout(r, delays[i]));
|
|
return { success: true, message: `item-${i}` };
|
|
};
|
|
|
|
const result = await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
isParallelSafe: () => true,
|
|
},
|
|
onToolCall,
|
|
});
|
|
|
|
// executedCalls follows source order
|
|
expect(result.executedCalls.map((ec) => ec.call.arguments.i)).toEqual(['a', 'b', 'c']);
|
|
|
|
// Tool messages on the NEXT LLM call are in source order too
|
|
const toolMsgs = llm.calls[1].messages.filter((m) => m.role === 'tool');
|
|
expect(toolMsgs.map((m) => m.content)).toEqual([
|
|
expect.stringContaining('item-a'),
|
|
expect.stringContaining('item-b'),
|
|
expect.stringContaining('item-c'),
|
|
]);
|
|
});
|
|
|
|
it('falls back to sequential when any call is not parallel-safe', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([
|
|
{ name: 'list_things', args: {} },
|
|
{ name: 'create_thing', args: { title: 'x' } }, // unsafe
|
|
{ name: 'list_things', args: {} },
|
|
])
|
|
.enqueueStop();
|
|
|
|
let concurrent = 0;
|
|
let peakConcurrent = 0;
|
|
const onToolCall = async (): Promise<ToolResult> => {
|
|
concurrent++;
|
|
peakConcurrent = Math.max(peakConcurrent, concurrent);
|
|
await new Promise((r) => setTimeout(r, 5));
|
|
concurrent--;
|
|
return { success: true, message: 'ok' };
|
|
};
|
|
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
isParallelSafe: (name) => name === 'list_things',
|
|
},
|
|
onToolCall,
|
|
});
|
|
|
|
// Mixed batch ran sequentially — peak concurrency stayed at 1.
|
|
expect(peakConcurrent).toBe(1);
|
|
});
|
|
|
|
it('batches more than PARALLEL_TOOL_BATCH_SIZE calls', async () => {
|
|
const N = 15; // > 10-call ceiling
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls(Array.from({ length: N }, (_, i) => ({ name: 'list_things', args: { i } })))
|
|
.enqueueStop();
|
|
|
|
let concurrent = 0;
|
|
let peakConcurrent = 0;
|
|
const onToolCall = async (): Promise<ToolResult> => {
|
|
concurrent++;
|
|
peakConcurrent = Math.max(peakConcurrent, concurrent);
|
|
await new Promise((r) => setTimeout(r, 15));
|
|
concurrent--;
|
|
return { success: true, message: 'ok' };
|
|
};
|
|
|
|
const result = await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
isParallelSafe: () => true,
|
|
},
|
|
onToolCall,
|
|
});
|
|
|
|
// Capped at the batch size — the 11th onwards had to wait.
|
|
expect(peakConcurrent).toBeLessThanOrEqual(10);
|
|
// All still executed, all in source order.
|
|
expect(result.executedCalls).toHaveLength(N);
|
|
expect(result.executedCalls.map((ec) => ec.call.arguments.i)).toEqual(
|
|
Array.from({ length: N }, (_, i) => i)
|
|
);
|
|
});
|
|
|
|
it('stays sequential when isParallelSafe is not provided (pre-M1 default)', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([
|
|
{ name: 'list_things', args: {} },
|
|
{ name: 'list_things', args: {} },
|
|
])
|
|
.enqueueStop();
|
|
|
|
let concurrent = 0;
|
|
let peakConcurrent = 0;
|
|
const onToolCall = async (): Promise<ToolResult> => {
|
|
concurrent++;
|
|
peakConcurrent = Math.max(peakConcurrent, concurrent);
|
|
await new Promise((r) => setTimeout(r, 5));
|
|
concurrent--;
|
|
return { success: true, message: 'ok' };
|
|
};
|
|
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: { systemPrompt: 's', userPrompt: 'u', tools, model: 'm' },
|
|
onToolCall,
|
|
});
|
|
|
|
expect(peakConcurrent).toBe(1);
|
|
});
|
|
});
|
|
|
|
describe('runPlannerLoop — compactor', () => {
|
|
it('does not compact below the threshold', async () => {
|
|
const llm = new MockLlmClient();
|
|
(llm as unknown as { queue: unknown[] }).queue.push({
|
|
content: null,
|
|
toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
|
|
finishReason: 'tool_calls',
|
|
usage: { promptTokens: 500, completionTokens: 0, totalTokens: 500 }, // 50%
|
|
});
|
|
llm.enqueueStop('done');
|
|
|
|
const compactSpy = vi.fn();
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
compactor: {
|
|
maxContextTokens: 1000,
|
|
compact: async (m) => {
|
|
compactSpy();
|
|
return { messages: m, compactedTurns: 0 };
|
|
},
|
|
},
|
|
},
|
|
onToolCall: async () => ({ success: true, message: 'ok' }),
|
|
});
|
|
|
|
expect(compactSpy).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('fires when usage crosses the threshold and replaces messages', async () => {
|
|
const llm = new MockLlmClient();
|
|
// Round 1: tool call that reports 92% of the 1000-token budget
|
|
(llm as unknown as { queue: unknown[] }).queue.push({
|
|
content: null,
|
|
toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
|
|
finishReason: 'tool_calls',
|
|
usage: { promptTokens: 920, completionTokens: 0, totalTokens: 920 },
|
|
});
|
|
// Round 2: after compaction fires, the LLM stops
|
|
llm.enqueueStop('done');
|
|
|
|
let compactorInput: readonly { role: string; content?: string | null }[] = [];
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's-prompt',
|
|
userPrompt: 'u-prompt',
|
|
tools,
|
|
model: 'm',
|
|
compactor: {
|
|
maxContextTokens: 1000,
|
|
compact: async (m) => {
|
|
compactorInput = m;
|
|
return {
|
|
messages: [
|
|
{ role: 'system', content: 's-prompt' },
|
|
{ role: 'user', content: 'u-prompt' },
|
|
{ role: 'assistant', content: '<compact-summary>FOLDED</compact-summary>' },
|
|
],
|
|
compactedTurns: 2,
|
|
};
|
|
},
|
|
},
|
|
},
|
|
onToolCall: async () => ({ success: true, message: 'ok' }),
|
|
});
|
|
|
|
// The compactor received the full post-round-1 history
|
|
expect(compactorInput.length).toBeGreaterThan(2);
|
|
// The round-2 LLM request saw the compacted history, not the raw one
|
|
const round2Seen = llm.calls[1].messages;
|
|
expect(round2Seen).toHaveLength(3);
|
|
expect(round2Seen[2].content).toContain('FOLDED');
|
|
});
|
|
|
|
it('fires at most once per run', async () => {
|
|
const llm = new MockLlmClient();
|
|
for (let i = 0; i < 4; i++) {
|
|
(llm as unknown as { queue: unknown[] }).queue.push({
|
|
content: null,
|
|
toolCalls: [{ id: `c${i}`, name: 'list_things', arguments: {} }],
|
|
finishReason: 'tool_calls',
|
|
usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 }, // always over threshold
|
|
});
|
|
}
|
|
llm.enqueueStop('done');
|
|
|
|
let compactCallCount = 0;
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
maxRounds: 10,
|
|
compactor: {
|
|
maxContextTokens: 1000,
|
|
compact: async () => {
|
|
compactCallCount++;
|
|
return {
|
|
messages: [
|
|
{ role: 'system', content: 's' },
|
|
{ role: 'user', content: 'u' },
|
|
{ role: 'assistant', content: '<compact>' },
|
|
],
|
|
compactedTurns: 2,
|
|
};
|
|
},
|
|
},
|
|
},
|
|
onToolCall: async () => ({ success: true, message: 'ok' }),
|
|
});
|
|
|
|
expect(compactCallCount).toBe(1);
|
|
});
|
|
|
|
it('bails out silently when maxContextTokens is 0', async () => {
|
|
const llm = new MockLlmClient();
|
|
(llm as unknown as { queue: unknown[] }).queue.push({
|
|
content: 'done',
|
|
toolCalls: [],
|
|
finishReason: 'stop',
|
|
usage: { promptTokens: 9_999, completionTokens: 0, totalTokens: 9_999 },
|
|
});
|
|
|
|
const compactSpy = vi.fn();
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
compactor: {
|
|
maxContextTokens: 0, // disabled
|
|
compact: async (m) => {
|
|
compactSpy();
|
|
return { messages: m, compactedTurns: 0 };
|
|
},
|
|
},
|
|
},
|
|
onToolCall: async () => ({ success: true, message: 'ok' }),
|
|
});
|
|
|
|
expect(compactSpy).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('surfaces compactionsDone in LoopState for reminder producers', async () => {
|
|
const llm = new MockLlmClient();
|
|
// Round 1: over threshold
|
|
(llm as unknown as { queue: unknown[] }).queue.push({
|
|
content: null,
|
|
toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
|
|
finishReason: 'tool_calls',
|
|
usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 },
|
|
});
|
|
// Round 2: stop so we end cleanly
|
|
llm.enqueueStop('done');
|
|
|
|
const compactionsDoneSeen: number[] = [];
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
compactor: {
|
|
maxContextTokens: 1000,
|
|
compact: async () => ({
|
|
messages: [
|
|
{ role: 'system', content: 's' },
|
|
{ role: 'user', content: 'u' },
|
|
{ role: 'assistant', content: '<compact>' },
|
|
],
|
|
compactedTurns: 2,
|
|
}),
|
|
},
|
|
reminderChannel: (state) => {
|
|
compactionsDoneSeen.push(state.compactionsDone);
|
|
return [];
|
|
},
|
|
},
|
|
onToolCall: async () => ({ success: true, message: 'ok' }),
|
|
});
|
|
|
|
// Round 1 channel call: before compaction fires, so 0
|
|
// Round 2 channel call: after compaction, so 1
|
|
expect(compactionsDoneSeen).toEqual([0, 1]);
|
|
});
|
|
|
|
it('skips when the compactor returns 0 compacted turns', async () => {
|
|
const llm = new MockLlmClient();
|
|
(llm as unknown as { queue: unknown[] }).queue.push({
|
|
content: null,
|
|
toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
|
|
finishReason: 'tool_calls',
|
|
usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 },
|
|
});
|
|
llm.enqueueStop('done');
|
|
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
compactor: {
|
|
maxContextTokens: 1000,
|
|
compact: async (m) => ({ messages: m, compactedTurns: 0 }),
|
|
},
|
|
},
|
|
onToolCall: async () => ({ success: true, message: 'ok' }),
|
|
});
|
|
|
|
// Round 2 should have seen the ORIGINAL history (untouched by the
|
|
// no-op compactor) — just system + user + assistant + tool
|
|
const round2Seen = llm.calls[1].messages;
|
|
expect(round2Seen).toHaveLength(4);
|
|
});
|
|
});
|
|
|
|
describe('runPlannerLoop — reminderChannel', () => {
|
|
it('injects reminders as transient system messages on the LLM call', async () => {
|
|
const llm = new MockLlmClient().enqueueStop('done');
|
|
const result = await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
reminderChannel: () => ['budget 80%', 'mission overdue'],
|
|
},
|
|
onToolCall: vi.fn(),
|
|
});
|
|
|
|
// The request messages the mock saw must include the reminders
|
|
// AFTER the user turn, each wrapped in <reminder> tags.
|
|
const seenByLlm = llm.calls[0].messages;
|
|
expect(seenByLlm).toHaveLength(4); // system + user + 2 reminders
|
|
expect(seenByLlm[0].role).toBe('system');
|
|
expect(seenByLlm[0].content).toBe('s');
|
|
expect(seenByLlm[1].role).toBe('user');
|
|
expect(seenByLlm[2].role).toBe('system');
|
|
expect(seenByLlm[2].content).toBe('<reminder>budget 80%</reminder>');
|
|
expect(seenByLlm[3].role).toBe('system');
|
|
expect(seenByLlm[3].content).toBe('<reminder>mission overdue</reminder>');
|
|
|
|
// And the persisted history must NOT contain them.
|
|
expect(result.messages.find((m) => m.content?.includes('<reminder>'))).toBeUndefined();
|
|
});
|
|
|
|
it('is called per round with fresh state — round 2 does not see round 1 reminders', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
|
.enqueueStop('done');
|
|
|
|
const channelCalls: Array<{ round: number; reminders: string[] }> = [];
|
|
const channel = vi.fn((state) => {
|
|
const reminders = [`round-${state.round}`];
|
|
channelCalls.push({ round: state.round, reminders });
|
|
return reminders;
|
|
});
|
|
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
reminderChannel: channel,
|
|
},
|
|
onToolCall: async () => ({ success: true, message: 'ok' }),
|
|
});
|
|
|
|
expect(channel).toHaveBeenCalledTimes(2);
|
|
expect(channelCalls).toEqual([
|
|
{ round: 1, reminders: ['round-1'] },
|
|
{ round: 2, reminders: ['round-2'] },
|
|
]);
|
|
|
|
// Round 2's request must have ONLY round-2's reminder, not round-1's.
|
|
const round2Seen = llm.calls[1].messages;
|
|
const reminders = round2Seen.filter((m) => m.content?.includes('<reminder>'));
|
|
expect(reminders).toHaveLength(1);
|
|
expect(reminders[0].content).toBe('<reminder>round-2</reminder>');
|
|
});
|
|
|
|
it('exposes recentCalls as a sliding window, oldest-first', async () => {
|
|
// 7 rounds, each with one tool call, so by round 7 we have 6 prior
|
|
// results — the window must cap at LOOP_STATE_RECENT_CALLS_WINDOW = 5.
|
|
const llm = new MockLlmClient();
|
|
for (let i = 0; i < 7; i++) {
|
|
llm.enqueueToolCalls([{ name: 'list_things', args: { i } }]);
|
|
}
|
|
llm.enqueueStop();
|
|
|
|
const windowsSeen: Array<Array<{ i: unknown; ok: boolean }>> = [];
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
maxRounds: 10,
|
|
reminderChannel: (state) => {
|
|
windowsSeen.push(
|
|
state.recentCalls.map((ec) => ({
|
|
i: ec.call.arguments.i,
|
|
ok: ec.result.success,
|
|
}))
|
|
);
|
|
return [];
|
|
},
|
|
},
|
|
onToolCall: async (call) => ({
|
|
success: true,
|
|
message: `ok-${call.arguments.i}`,
|
|
}),
|
|
});
|
|
|
|
// Round 1 → window empty
|
|
expect(windowsSeen[0]).toEqual([]);
|
|
// Round 2 → one prior call
|
|
expect(windowsSeen[1]).toEqual([{ i: 0, ok: true }]);
|
|
// Round 6 → five prior calls, oldest-first
|
|
expect(windowsSeen[5]).toEqual([
|
|
{ i: 0, ok: true },
|
|
{ i: 1, ok: true },
|
|
{ i: 2, ok: true },
|
|
{ i: 3, ok: true },
|
|
{ i: 4, ok: true },
|
|
]);
|
|
// Round 7 → window slides; i=0 drops off, i=5 is newest
|
|
expect(windowsSeen[6]).toEqual([
|
|
{ i: 1, ok: true },
|
|
{ i: 2, ok: true },
|
|
{ i: 3, ok: true },
|
|
{ i: 4, ok: true },
|
|
{ i: 5, ok: true },
|
|
]);
|
|
});
|
|
|
|
it('surfaces loop state — toolCallCount and lastCall — to the channel', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
|
.enqueueToolCalls([{ name: 'create_thing', args: { title: 'x' } }])
|
|
.enqueueStop('done');
|
|
|
|
const snapshots: Array<{ round: number; toolCallCount: number; lastName?: string }> = [];
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
reminderChannel: (state) => {
|
|
snapshots.push({
|
|
round: state.round,
|
|
toolCallCount: state.toolCallCount,
|
|
lastName: state.lastCall?.call.name,
|
|
});
|
|
return [];
|
|
},
|
|
},
|
|
onToolCall: async () => ({ success: true, message: 'ok' }),
|
|
});
|
|
|
|
expect(snapshots).toEqual([
|
|
{ round: 1, toolCallCount: 0, lastName: undefined },
|
|
{ round: 2, toolCallCount: 1, lastName: 'list_things' },
|
|
{ round: 3, toolCallCount: 2, lastName: 'create_thing' },
|
|
]);
|
|
});
|
|
|
|
it('empty reminders array leaves the request unchanged', async () => {
|
|
const llm = new MockLlmClient().enqueueStop('done');
|
|
await runPlannerLoop({
|
|
llm,
|
|
input: {
|
|
systemPrompt: 's',
|
|
userPrompt: 'u',
|
|
tools,
|
|
model: 'm',
|
|
reminderChannel: () => [],
|
|
},
|
|
onToolCall: vi.fn(),
|
|
});
|
|
|
|
const seenByLlm = llm.calls[0].messages;
|
|
expect(seenByLlm).toHaveLength(2); // just system + user
|
|
});
|
|
});
|