mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:41:09 +02:00
feat(shared-ai): wire compactor into runPlannerLoop (M2.2)
PlannerLoopInput grows an optional compactor:
compactor?: {
maxContextTokens: number;
threshold?: number; // default 0.92, matches Claude Code wU2
compact: (messages) => Promise<{ messages, compactedTurns }>;
}
Before each LLM call the loop checks whether promptTokens+completion
has crossed threshold × maxContextTokens. If yes AND we haven't
compacted this run yet, the callback runs, its returned messages
REPLACE the live history, and compactionsDone flips to 1 so a
runaway tool can't re-trigger.
Design choices:
- Fires at most ONCE per loop run. If the fresh (compacted)
history hits the threshold again in the same run, the LLM
round budget will hit first; better to terminate than to
recursively compact a summary.
- No reminder emitted automatically — the caller can wire
that via reminderChannel by reading compactionsDone from
LoopState (next PR; compactionsDone isn't exposed yet to
keep the state surface small).
- compactor callback is injectable, not hardcoded to
compactHistory() from compact.ts. Lets mana-ai route the
compactor LLM call to a cheaper model (Haiku) without
changing the loop.
- Zero maxContextTokens → skip silently (same contract as
shouldCompact()).
Also cleaned up the isParallelSafe non-null-assertion warning by
hoisting the predicate to a local with proper narrowing.
5 new loop tests: below-threshold no-op, single-fire replacement,
once-per-run idempotency, zero-cap bail, no-op when compactor
returns 0 turns. 76 shared-ai tests total, green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
aab1e3045b
commit
3d8214a147
2 changed files with 236 additions and 2 deletions
|
|
@ -328,6 +328,190 @@ describe('runPlannerLoop — parallel reads', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('runPlannerLoop — compactor', () => {
|
||||
it('does not compact below the threshold', async () => {
|
||||
const llm = new MockLlmClient();
|
||||
(llm as unknown as { queue: unknown[] }).queue.push({
|
||||
content: null,
|
||||
toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
|
||||
finishReason: 'tool_calls',
|
||||
usage: { promptTokens: 500, completionTokens: 0, totalTokens: 500 }, // 50%
|
||||
});
|
||||
llm.enqueueStop('done');
|
||||
|
||||
const compactSpy = vi.fn();
|
||||
await runPlannerLoop({
|
||||
llm,
|
||||
input: {
|
||||
systemPrompt: 's',
|
||||
userPrompt: 'u',
|
||||
tools,
|
||||
model: 'm',
|
||||
compactor: {
|
||||
maxContextTokens: 1000,
|
||||
compact: async (m) => {
|
||||
compactSpy();
|
||||
return { messages: m, compactedTurns: 0 };
|
||||
},
|
||||
},
|
||||
},
|
||||
onToolCall: async () => ({ success: true, message: 'ok' }),
|
||||
});
|
||||
|
||||
expect(compactSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('fires when usage crosses the threshold and replaces messages', async () => {
|
||||
const llm = new MockLlmClient();
|
||||
// Round 1: tool call that reports 92% of the 1000-token budget
|
||||
(llm as unknown as { queue: unknown[] }).queue.push({
|
||||
content: null,
|
||||
toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
|
||||
finishReason: 'tool_calls',
|
||||
usage: { promptTokens: 920, completionTokens: 0, totalTokens: 920 },
|
||||
});
|
||||
// Round 2: after compaction fires, the LLM stops
|
||||
llm.enqueueStop('done');
|
||||
|
||||
let compactorInput: readonly { role: string; content?: string | null }[] = [];
|
||||
await runPlannerLoop({
|
||||
llm,
|
||||
input: {
|
||||
systemPrompt: 's-prompt',
|
||||
userPrompt: 'u-prompt',
|
||||
tools,
|
||||
model: 'm',
|
||||
compactor: {
|
||||
maxContextTokens: 1000,
|
||||
compact: async (m) => {
|
||||
compactorInput = m;
|
||||
return {
|
||||
messages: [
|
||||
{ role: 'system', content: 's-prompt' },
|
||||
{ role: 'user', content: 'u-prompt' },
|
||||
{ role: 'assistant', content: '<compact-summary>FOLDED</compact-summary>' },
|
||||
],
|
||||
compactedTurns: 2,
|
||||
};
|
||||
},
|
||||
},
|
||||
},
|
||||
onToolCall: async () => ({ success: true, message: 'ok' }),
|
||||
});
|
||||
|
||||
// The compactor received the full post-round-1 history
|
||||
expect(compactorInput.length).toBeGreaterThan(2);
|
||||
// The round-2 LLM request saw the compacted history, not the raw one
|
||||
const round2Seen = llm.calls[1].messages;
|
||||
expect(round2Seen).toHaveLength(3);
|
||||
expect(round2Seen[2].content).toContain('FOLDED');
|
||||
});
|
||||
|
||||
it('fires at most once per run', async () => {
|
||||
const llm = new MockLlmClient();
|
||||
for (let i = 0; i < 4; i++) {
|
||||
(llm as unknown as { queue: unknown[] }).queue.push({
|
||||
content: null,
|
||||
toolCalls: [{ id: `c${i}`, name: 'list_things', arguments: {} }],
|
||||
finishReason: 'tool_calls',
|
||||
usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 }, // always over threshold
|
||||
});
|
||||
}
|
||||
llm.enqueueStop('done');
|
||||
|
||||
let compactCallCount = 0;
|
||||
await runPlannerLoop({
|
||||
llm,
|
||||
input: {
|
||||
systemPrompt: 's',
|
||||
userPrompt: 'u',
|
||||
tools,
|
||||
model: 'm',
|
||||
maxRounds: 10,
|
||||
compactor: {
|
||||
maxContextTokens: 1000,
|
||||
compact: async () => {
|
||||
compactCallCount++;
|
||||
return {
|
||||
messages: [
|
||||
{ role: 'system', content: 's' },
|
||||
{ role: 'user', content: 'u' },
|
||||
{ role: 'assistant', content: '<compact>' },
|
||||
],
|
||||
compactedTurns: 2,
|
||||
};
|
||||
},
|
||||
},
|
||||
},
|
||||
onToolCall: async () => ({ success: true, message: 'ok' }),
|
||||
});
|
||||
|
||||
expect(compactCallCount).toBe(1);
|
||||
});
|
||||
|
||||
it('bails out silently when maxContextTokens is 0', async () => {
|
||||
const llm = new MockLlmClient();
|
||||
(llm as unknown as { queue: unknown[] }).queue.push({
|
||||
content: 'done',
|
||||
toolCalls: [],
|
||||
finishReason: 'stop',
|
||||
usage: { promptTokens: 9_999, completionTokens: 0, totalTokens: 9_999 },
|
||||
});
|
||||
|
||||
const compactSpy = vi.fn();
|
||||
await runPlannerLoop({
|
||||
llm,
|
||||
input: {
|
||||
systemPrompt: 's',
|
||||
userPrompt: 'u',
|
||||
tools,
|
||||
model: 'm',
|
||||
compactor: {
|
||||
maxContextTokens: 0, // disabled
|
||||
compact: async (m) => {
|
||||
compactSpy();
|
||||
return { messages: m, compactedTurns: 0 };
|
||||
},
|
||||
},
|
||||
},
|
||||
onToolCall: async () => ({ success: true, message: 'ok' }),
|
||||
});
|
||||
|
||||
expect(compactSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('skips when the compactor returns 0 compacted turns', async () => {
|
||||
const llm = new MockLlmClient();
|
||||
(llm as unknown as { queue: unknown[] }).queue.push({
|
||||
content: null,
|
||||
toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
|
||||
finishReason: 'tool_calls',
|
||||
usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 },
|
||||
});
|
||||
llm.enqueueStop('done');
|
||||
|
||||
await runPlannerLoop({
|
||||
llm,
|
||||
input: {
|
||||
systemPrompt: 's',
|
||||
userPrompt: 'u',
|
||||
tools,
|
||||
model: 'm',
|
||||
compactor: {
|
||||
maxContextTokens: 1000,
|
||||
compact: async (m) => ({ messages: m, compactedTurns: 0 }),
|
||||
},
|
||||
},
|
||||
onToolCall: async () => ({ success: true, message: 'ok' }),
|
||||
});
|
||||
|
||||
// Round 2 should have seen the ORIGINAL history (untouched by the
|
||||
// no-op compactor) — just system + user + assistant + tool
|
||||
const round2Seen = llm.calls[1].messages;
|
||||
expect(round2Seen).toHaveLength(4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('runPlannerLoop — reminderChannel', () => {
|
||||
it('injects reminders as transient system messages on the LLM call', async () => {
|
||||
const llm = new MockLlmClient().enqueueStop('done');
|
||||
|
|
|
|||
|
|
@ -144,6 +144,35 @@ export interface PlannerLoopInput {
|
|||
* constant-time lookups are expected (registry hit, name-prefix check).
|
||||
*/
|
||||
readonly isParallelSafe?: (toolName: string) => boolean;
|
||||
/**
|
||||
* Context-window compactor wiring (Claude-Code `wU2` pattern).
|
||||
*
|
||||
* When set AND usage crosses the threshold, the loop replaces the
|
||||
* middle of the message history with a compact summary before the
|
||||
* next LLM call. The compact summary is persisted in the returned
|
||||
* `messages` — unlike reminders, this IS part of the canonical
|
||||
* history because raw turns got dropped.
|
||||
*
|
||||
* Contract:
|
||||
* - `maxContextTokens`: provider ceiling; compactor skips when unset
|
||||
* (matches `shouldCompact()`'s safe-bail behaviour).
|
||||
* - `compact`: async callback that performs the compaction. Pass
|
||||
* `compactHistory` from this package or an adapter that uses a
|
||||
* cheaper model (e.g. Haiku) for the compactor's LLM call.
|
||||
* - `threshold`: optional override, default 0.92.
|
||||
*
|
||||
* Compaction fires at MOST once per loop run — once a round has been
|
||||
* compacted, we don't re-trigger until the next run, even if the
|
||||
* fresh history hits the threshold again (defence-in-depth against
|
||||
* a runaway tool that keeps bloating turns).
|
||||
*/
|
||||
readonly compactor?: {
|
||||
readonly maxContextTokens: number;
|
||||
readonly threshold?: number;
|
||||
readonly compact: (
|
||||
messages: readonly ChatMessage[]
|
||||
) => Promise<{ readonly messages: readonly ChatMessage[]; readonly compactedTurns: number }>;
|
||||
};
|
||||
}
|
||||
|
||||
/** Max concurrent tool executions per round. Mirrors Claude Code's gW5
|
||||
|
|
@ -206,10 +235,30 @@ export async function runPlannerLoop(opts: {
|
|||
let rounds = 0;
|
||||
let promptTokens = 0;
|
||||
let completionTokens = 0;
|
||||
let compactionsDone = 0;
|
||||
|
||||
while (rounds < maxRounds) {
|
||||
rounds++;
|
||||
|
||||
// Context-window compactor (Claude-Code `wU2`): check BEFORE the
|
||||
// next LLM call whether the previous round's usage crossed the
|
||||
// threshold; if so, replace the middle of `messages` with a
|
||||
// compact summary. Fire at most once per loop run so a runaway
|
||||
// tool can't keep re-triggering.
|
||||
if (input.compactor && compactionsDone === 0) {
|
||||
const total = promptTokens + completionTokens;
|
||||
const cap = input.compactor.maxContextTokens;
|
||||
const threshold = input.compactor.threshold ?? 0.92;
|
||||
if (cap > 0 && total > 0 && total / cap >= threshold) {
|
||||
const compactResult = await input.compactor.compact(messages);
|
||||
if (compactResult.compactedTurns > 0) {
|
||||
messages.length = 0;
|
||||
for (const m of compactResult.messages) messages.push(m);
|
||||
compactionsDone++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Per-round reminder injection: ask the channel for transient
|
||||
// hints, wrap each in <reminder> tags, and prepend them as system
|
||||
// messages to THIS request only. Nothing gets pushed to `messages`
|
||||
|
|
@ -277,10 +326,11 @@ export async function runPlannerLoop(opts: {
|
|||
// In both modes we append to `messages` in the LLM's original
|
||||
// call order, not completion order, so the debug-log stays linear.
|
||||
const calls = response.toolCalls;
|
||||
const parallelSafePredicate = input.isParallelSafe;
|
||||
const allParallelSafe =
|
||||
!!input.isParallelSafe &&
|
||||
!!parallelSafePredicate &&
|
||||
calls.length > 1 &&
|
||||
calls.every((c) => input.isParallelSafe!(c.name));
|
||||
calls.every((c) => parallelSafePredicate(c.name));
|
||||
|
||||
if (allParallelSafe) {
|
||||
for (let i = 0; i < calls.length; i += PARALLEL_TOOL_BATCH_SIZE) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue