From 3d8214a1476498be5a91cab81cdc41947ce221a2 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 23 Apr 2026 15:25:35 +0200 Subject: [PATCH] feat(shared-ai): wire compactor into runPlannerLoop (M2.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PlannerLoopInput grows an optional compactor: compactor?: { maxContextTokens: number; threshold?: number; // default 0.92, matches Claude Code wU2 compact: (messages) => Promise<{ messages, compactedTurns }>; } Before each LLM call the loop checks whether promptTokens+completion has crossed threshold × maxContextTokens. If yes AND we haven't compacted this run yet, the callback runs, its returned messages REPLACE the live history, and compactionsDone flips to 1 so a runaway tool can't re-trigger. Design choices: - Fires at most ONCE per loop run. If the fresh (compacted) history hits the threshold again in the same run, the LLM round budget will hit first; better to terminate than to recursively compact a summary. - No reminder emitted automatically — the caller can wire that via reminderChannel by reading compactionsDone from LoopState (next PR; compactionsDone isn't exposed yet to keep the state surface small). - compactor callback is injectable, not hardcoded to compactHistory() from compact.ts. Lets mana-ai route the compactor LLM call to a cheaper model (Haiku) without changing the loop. - Zero maxContextTokens → skip silently (same contract as shouldCompact()). Also cleaned up the isParallelSafe non-null-assertion warning by hoisting the predicate to a local with proper narrowing. 5 new loop tests: below-threshold no-op, single-fire replacement, once-per-run idempotency, zero-cap bail, no-op when compactor returns 0 turns. 76 shared-ai tests total, green. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/shared-ai/src/planner/loop.test.ts | 184 ++++++++++++++++++++ packages/shared-ai/src/planner/loop.ts | 54 +++++- 2 files changed, 236 insertions(+), 2 deletions(-) diff --git a/packages/shared-ai/src/planner/loop.test.ts b/packages/shared-ai/src/planner/loop.test.ts index 82263a0e9..1acd349f0 100644 --- a/packages/shared-ai/src/planner/loop.test.ts +++ b/packages/shared-ai/src/planner/loop.test.ts @@ -328,6 +328,190 @@ describe('runPlannerLoop — parallel reads', () => { }); }); +describe('runPlannerLoop — compactor', () => { + it('does not compact below the threshold', async () => { + const llm = new MockLlmClient(); + (llm as unknown as { queue: unknown[] }).queue.push({ + content: null, + toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }], + finishReason: 'tool_calls', + usage: { promptTokens: 500, completionTokens: 0, totalTokens: 500 }, // 50% + }); + llm.enqueueStop('done'); + + const compactSpy = vi.fn(); + await runPlannerLoop({ + llm, + input: { + systemPrompt: 's', + userPrompt: 'u', + tools, + model: 'm', + compactor: { + maxContextTokens: 1000, + compact: async (m) => { + compactSpy(); + return { messages: m, compactedTurns: 0 }; + }, + }, + }, + onToolCall: async () => ({ success: true, message: 'ok' }), + }); + + expect(compactSpy).not.toHaveBeenCalled(); + }); + + it('fires when usage crosses the threshold and replaces messages', async () => { + const llm = new MockLlmClient(); + // Round 1: tool call that reports 92% of the 1000-token budget + (llm as unknown as { queue: unknown[] }).queue.push({ + content: null, + toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }], + finishReason: 'tool_calls', + usage: { promptTokens: 920, completionTokens: 0, totalTokens: 920 }, + }); + // Round 2: after compaction fires, the LLM stops + llm.enqueueStop('done'); + + let compactorInput: readonly { role: string; content?: string | null }[] = []; + await runPlannerLoop({ + llm, + input: { + systemPrompt: 's-prompt', + userPrompt: 'u-prompt', + tools, + model: 'm', + compactor: { + maxContextTokens: 1000, + compact: async (m) => { + compactorInput = m; + return { + messages: [ + { role: 'system', content: 's-prompt' }, + { role: 'user', content: 'u-prompt' }, + { role: 'assistant', content: 'FOLDED' }, + ], + compactedTurns: 2, + }; + }, + }, + }, + onToolCall: async () => ({ success: true, message: 'ok' }), + }); + + // The compactor received the full post-round-1 history + expect(compactorInput.length).toBeGreaterThan(2); + // The round-2 LLM request saw the compacted history, not the raw one + const round2Seen = llm.calls[1].messages; + expect(round2Seen).toHaveLength(3); + expect(round2Seen[2].content).toContain('FOLDED'); + }); + + it('fires at most once per run', async () => { + const llm = new MockLlmClient(); + for (let i = 0; i < 4; i++) { + (llm as unknown as { queue: unknown[] }).queue.push({ + content: null, + toolCalls: [{ id: `c${i}`, name: 'list_things', arguments: {} }], + finishReason: 'tool_calls', + usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 }, // always over threshold + }); + } + llm.enqueueStop('done'); + + let compactCallCount = 0; + await runPlannerLoop({ + llm, + input: { + systemPrompt: 's', + userPrompt: 'u', + tools, + model: 'm', + maxRounds: 10, + compactor: { + maxContextTokens: 1000, + compact: async () => { + compactCallCount++; + return { + messages: [ + { role: 'system', content: 's' }, + { role: 'user', content: 'u' }, + { role: 'assistant', content: '' }, + ], + compactedTurns: 2, + }; + }, + }, + }, + onToolCall: async () => ({ success: true, message: 'ok' }), + }); + + expect(compactCallCount).toBe(1); + }); + + it('bails out silently when maxContextTokens is 0', async () => { + const llm = new MockLlmClient(); + (llm as unknown as { queue: unknown[] }).queue.push({ + content: 'done', + toolCalls: [], + finishReason: 'stop', + usage: { promptTokens: 9_999, completionTokens: 0, totalTokens: 9_999 }, + }); + + const compactSpy = vi.fn(); + await runPlannerLoop({ + llm, + input: { + systemPrompt: 's', + userPrompt: 'u', + tools, + model: 'm', + compactor: { + maxContextTokens: 0, // disabled + compact: async (m) => { + compactSpy(); + return { messages: m, compactedTurns: 0 }; + }, + }, + }, + onToolCall: async () => ({ success: true, message: 'ok' }), + }); + + expect(compactSpy).not.toHaveBeenCalled(); + }); + + it('skips when the compactor returns 0 compacted turns', async () => { + const llm = new MockLlmClient(); + (llm as unknown as { queue: unknown[] }).queue.push({ + content: null, + toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }], + finishReason: 'tool_calls', + usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 }, + }); + llm.enqueueStop('done'); + + await runPlannerLoop({ + llm, + input: { + systemPrompt: 's', + userPrompt: 'u', + tools, + model: 'm', + compactor: { + maxContextTokens: 1000, + compact: async (m) => ({ messages: m, compactedTurns: 0 }), + }, + }, + onToolCall: async () => ({ success: true, message: 'ok' }), + }); + + // Round 2 should have seen the ORIGINAL history (untouched by the + // no-op compactor) — just system + user + assistant + tool + const round2Seen = llm.calls[1].messages; + expect(round2Seen).toHaveLength(4); + }); +}); + describe('runPlannerLoop — reminderChannel', () => { it('injects reminders as transient system messages on the LLM call', async () => { const llm = new MockLlmClient().enqueueStop('done'); diff --git a/packages/shared-ai/src/planner/loop.ts b/packages/shared-ai/src/planner/loop.ts index 747cb247f..706a16689 100644 --- a/packages/shared-ai/src/planner/loop.ts +++ b/packages/shared-ai/src/planner/loop.ts @@ -144,6 +144,35 @@ export interface PlannerLoopInput { * constant-time lookups are expected (registry hit, name-prefix check). */ readonly isParallelSafe?: (toolName: string) => boolean; + /** + * Context-window compactor wiring (Claude-Code `wU2` pattern). + * + * When set AND usage crosses the threshold, the loop replaces the + * middle of the message history with a compact summary before the + * next LLM call. The compact summary is persisted in the returned + * `messages` — unlike reminders, this IS part of the canonical + * history because raw turns got dropped. + * + * Contract: + * - `maxContextTokens`: provider ceiling; compactor skips when unset + * (matches `shouldCompact()`'s safe-bail behaviour). + * - `compact`: async callback that performs the compaction. Pass + * `compactHistory` from this package or an adapter that uses a + * cheaper model (e.g. Haiku) for the compactor's LLM call. + * - `threshold`: optional override, default 0.92. + * + * Compaction fires at MOST once per loop run — once a round has been + * compacted, we don't re-trigger until the next run, even if the + * fresh history hits the threshold again (defence-in-depth against + * a runaway tool that keeps bloating turns). + */ + readonly compactor?: { + readonly maxContextTokens: number; + readonly threshold?: number; + readonly compact: ( + messages: readonly ChatMessage[] + ) => Promise<{ readonly messages: readonly ChatMessage[]; readonly compactedTurns: number }>; + }; } /** Max concurrent tool executions per round. Mirrors Claude Code's gW5 @@ -206,10 +235,30 @@ export async function runPlannerLoop(opts: { let rounds = 0; let promptTokens = 0; let completionTokens = 0; + let compactionsDone = 0; while (rounds < maxRounds) { rounds++; + // Context-window compactor (Claude-Code `wU2`): check BEFORE the + // next LLM call whether the previous round's usage crossed the + // threshold; if so, replace the middle of `messages` with a + // compact summary. Fire at most once per loop run so a runaway + // tool can't keep re-triggering. + if (input.compactor && compactionsDone === 0) { + const total = promptTokens + completionTokens; + const cap = input.compactor.maxContextTokens; + const threshold = input.compactor.threshold ?? 0.92; + if (cap > 0 && total > 0 && total / cap >= threshold) { + const compactResult = await input.compactor.compact(messages); + if (compactResult.compactedTurns > 0) { + messages.length = 0; + for (const m of compactResult.messages) messages.push(m); + compactionsDone++; + } + } + } + // Per-round reminder injection: ask the channel for transient // hints, wrap each in tags, and prepend them as system // messages to THIS request only. Nothing gets pushed to `messages` @@ -277,10 +326,11 @@ export async function runPlannerLoop(opts: { // In both modes we append to `messages` in the LLM's original // call order, not completion order, so the debug-log stays linear. const calls = response.toolCalls; + const parallelSafePredicate = input.isParallelSafe; const allParallelSafe = - !!input.isParallelSafe && + !!parallelSafePredicate && calls.length > 1 && - calls.every((c) => input.isParallelSafe!(c.name)); + calls.every((c) => parallelSafePredicate(c.name)); if (allParallelSafe) { for (let i = 0; i < calls.length; i += PARALLEL_TOOL_BATCH_SIZE) {