From 3d8214a1476498be5a91cab81cdc41947ce221a2 Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Thu, 23 Apr 2026 15:25:35 +0200
Subject: [PATCH] feat(shared-ai): wire compactor into runPlannerLoop (M2.2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PlannerLoopInput grows an optional compactor:

  compactor?: {
    maxContextTokens: number;
    threshold?: number;        // default 0.92, matches Claude Code wU2
    compact: (messages) => Promise<{ messages, compactedTurns }>;
  }

Before each LLM call the loop checks whether promptTokens+completion
has crossed threshold × maxContextTokens. If yes AND we haven't
compacted this run yet, the callback runs, its returned messages
REPLACE the live history, and compactionsDone flips to 1 so a
runaway tool can't re-trigger.

Design choices:
  - Fires at most ONCE per loop run. If the fresh (compacted)
    history hits the threshold again in the same run, the LLM
    round budget will hit first; better to terminate than to
    recursively compact a summary.
  - No reminder emitted automatically — the caller can wire
    that via reminderChannel by reading compactionsDone from
    LoopState (next PR; compactionsDone isn't exposed yet to
    keep the state surface small).
  - compactor callback is injectable, not hardcoded to
    compactHistory() from compact.ts. Lets mana-ai route the
    compactor LLM call to a cheaper model (Haiku) without
    changing the loop.
  - Zero maxContextTokens → skip silently (same contract as
    shouldCompact()).

Also cleaned up the isParallelSafe non-null-assertion warning by
hoisting the predicate to a local with proper narrowing.

5 new loop tests: below-threshold no-op, single-fire replacement,
once-per-run idempotency, zero-cap bail, no-op when compactor
returns 0 turns. 76 shared-ai tests total, green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/shared-ai/src/planner/loop.test.ts | 184 ++++++++++++++++++++
 packages/shared-ai/src/planner/loop.ts      |  54 +++++-
 2 files changed, 236 insertions(+), 2 deletions(-)

diff --git a/packages/shared-ai/src/planner/loop.test.ts b/packages/shared-ai/src/planner/loop.test.ts
index 82263a0e9..1acd349f0 100644
--- a/packages/shared-ai/src/planner/loop.test.ts
+++ b/packages/shared-ai/src/planner/loop.test.ts
@@ -328,6 +328,190 @@ describe('runPlannerLoop — parallel reads', () => {
 	});
 });
 
+describe('runPlannerLoop — compactor', () => {
+	it('does not compact below the threshold', async () => {
+		const llm = new MockLlmClient();
+		(llm as unknown as { queue: unknown[] }).queue.push({
+			content: null,
+			toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
+			finishReason: 'tool_calls',
+			usage: { promptTokens: 500, completionTokens: 0, totalTokens: 500 }, // 50%
+		});
+		llm.enqueueStop('done');
+
+		const compactSpy = vi.fn();
+		await runPlannerLoop({
+			llm,
+			input: {
+				systemPrompt: 's',
+				userPrompt: 'u',
+				tools,
+				model: 'm',
+				compactor: {
+					maxContextTokens: 1000,
+					compact: async (m) => {
+						compactSpy();
+						return { messages: m, compactedTurns: 0 };
+					},
+				},
+			},
+			onToolCall: async () => ({ success: true, message: 'ok' }),
+		});
+
+		expect(compactSpy).not.toHaveBeenCalled();
+	});
+
+	it('fires when usage crosses the threshold and replaces messages', async () => {
+		const llm = new MockLlmClient();
+		// Round 1: tool call that reports 92% of the 1000-token budget
+		(llm as unknown as { queue: unknown[] }).queue.push({
+			content: null,
+			toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
+			finishReason: 'tool_calls',
+			usage: { promptTokens: 920, completionTokens: 0, totalTokens: 920 },
+		});
+		// Round 2: after compaction fires, the LLM stops
+		llm.enqueueStop('done');
+
+		let compactorInput: readonly { role: string; content?: string | null }[] = [];
+		await runPlannerLoop({
+			llm,
+			input: {
+				systemPrompt: 's-prompt',
+				userPrompt: 'u-prompt',
+				tools,
+				model: 'm',
+				compactor: {
+					maxContextTokens: 1000,
+					compact: async (m) => {
+						compactorInput = m;
+						return {
+							messages: [
+								{ role: 'system', content: 's-prompt' },
+								{ role: 'user', content: 'u-prompt' },
+								{ role: 'assistant', content: '<compact-summary>FOLDED</compact-summary>' },
+							],
+							compactedTurns: 2,
+						};
+					},
+				},
+			},
+			onToolCall: async () => ({ success: true, message: 'ok' }),
+		});
+
+		// The compactor received the full post-round-1 history
+		expect(compactorInput.length).toBeGreaterThan(2);
+		// The round-2 LLM request saw the compacted history, not the raw one
+		const round2Seen = llm.calls[1].messages;
+		expect(round2Seen).toHaveLength(3);
+		expect(round2Seen[2].content).toContain('FOLDED');
+	});
+
+	it('fires at most once per run', async () => {
+		const llm = new MockLlmClient();
+		for (let i = 0; i < 4; i++) {
+			(llm as unknown as { queue: unknown[] }).queue.push({
+				content: null,
+				toolCalls: [{ id: `c${i}`, name: 'list_things', arguments: {} }],
+				finishReason: 'tool_calls',
+				usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 }, // always over threshold
+			});
+		}
+		llm.enqueueStop('done');
+
+		let compactCallCount = 0;
+		await runPlannerLoop({
+			llm,
+			input: {
+				systemPrompt: 's',
+				userPrompt: 'u',
+				tools,
+				model: 'm',
+				maxRounds: 10,
+				compactor: {
+					maxContextTokens: 1000,
+					compact: async () => {
+						compactCallCount++;
+						return {
+							messages: [
+								{ role: 'system', content: 's' },
+								{ role: 'user', content: 'u' },
+								{ role: 'assistant', content: '<compact>' },
+							],
+							compactedTurns: 2,
+						};
+					},
+				},
+			},
+			onToolCall: async () => ({ success: true, message: 'ok' }),
+		});
+
+		expect(compactCallCount).toBe(1);
+	});
+
+	it('bails out silently when maxContextTokens is 0', async () => {
+		const llm = new MockLlmClient();
+		(llm as unknown as { queue: unknown[] }).queue.push({
+			content: 'done',
+			toolCalls: [],
+			finishReason: 'stop',
+			usage: { promptTokens: 9_999, completionTokens: 0, totalTokens: 9_999 },
+		});
+
+		const compactSpy = vi.fn();
+		await runPlannerLoop({
+			llm,
+			input: {
+				systemPrompt: 's',
+				userPrompt: 'u',
+				tools,
+				model: 'm',
+				compactor: {
+					maxContextTokens: 0, // disabled
+					compact: async (m) => {
+						compactSpy();
+						return { messages: m, compactedTurns: 0 };
+					},
+				},
+			},
+			onToolCall: async () => ({ success: true, message: 'ok' }),
+		});
+
+		expect(compactSpy).not.toHaveBeenCalled();
+	});
+
+	it('skips when the compactor returns 0 compacted turns', async () => {
+		const llm = new MockLlmClient();
+		(llm as unknown as { queue: unknown[] }).queue.push({
+			content: null,
+			toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
+			finishReason: 'tool_calls',
+			usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 },
+		});
+		llm.enqueueStop('done');
+
+		await runPlannerLoop({
+			llm,
+			input: {
+				systemPrompt: 's',
+				userPrompt: 'u',
+				tools,
+				model: 'm',
+				compactor: {
+					maxContextTokens: 1000,
+					compact: async (m) => ({ messages: m, compactedTurns: 0 }),
+				},
+			},
+			onToolCall: async () => ({ success: true, message: 'ok' }),
+		});
+
+		// Round 2 should have seen the ORIGINAL history (untouched by the
+		// no-op compactor) — just system + user + assistant + tool
+		const round2Seen = llm.calls[1].messages;
+		expect(round2Seen).toHaveLength(4);
+	});
+});
+
 describe('runPlannerLoop — reminderChannel', () => {
 	it('injects reminders as transient system messages on the LLM call', async () => {
 		const llm = new MockLlmClient().enqueueStop('done');
diff --git a/packages/shared-ai/src/planner/loop.ts b/packages/shared-ai/src/planner/loop.ts
index 747cb247f..706a16689 100644
--- a/packages/shared-ai/src/planner/loop.ts
+++ b/packages/shared-ai/src/planner/loop.ts
@@ -144,6 +144,35 @@ export interface PlannerLoopInput {
 	 * constant-time lookups are expected (registry hit, name-prefix check).
 	 */
 	readonly isParallelSafe?: (toolName: string) => boolean;
+	/**
+	 * Context-window compactor wiring (Claude-Code `wU2` pattern).
+	 *
+	 * When set AND usage crosses the threshold, the loop replaces the
+	 * middle of the message history with a compact summary before the
+	 * next LLM call. The compact summary is persisted in the returned
+	 * `messages` — unlike reminders, this IS part of the canonical
+	 * history because raw turns got dropped.
+	 *
+	 * Contract:
+	 *   - `maxContextTokens`: provider ceiling; compactor skips when unset
+	 *     (matches `shouldCompact()`'s safe-bail behaviour).
+	 *   - `compact`: async callback that performs the compaction. Pass
+	 *     `compactHistory` from this package or an adapter that uses a
+	 *     cheaper model (e.g. Haiku) for the compactor's LLM call.
+	 *   - `threshold`: optional override, default 0.92.
+	 *
+	 * Compaction fires at MOST once per loop run — once a round has been
+	 * compacted, we don't re-trigger until the next run, even if the
+	 * fresh history hits the threshold again (defence-in-depth against
+	 * a runaway tool that keeps bloating turns).
+	 */
+	readonly compactor?: {
+		readonly maxContextTokens: number;
+		readonly threshold?: number;
+		readonly compact: (
+			messages: readonly ChatMessage[]
+		) => Promise<{ readonly messages: readonly ChatMessage[]; readonly compactedTurns: number }>;
+	};
 }
 
 /** Max concurrent tool executions per round. Mirrors Claude Code's gW5
@@ -206,10 +235,30 @@ export async function runPlannerLoop(opts: {
 	let rounds = 0;
 	let promptTokens = 0;
 	let completionTokens = 0;
+	let compactionsDone = 0;
 
 	while (rounds < maxRounds) {
 		rounds++;
 
+		// Context-window compactor (Claude-Code `wU2`): check BEFORE the
+		// next LLM call whether the previous round's usage crossed the
+		// threshold; if so, replace the middle of `messages` with a
+		// compact summary. Fire at most once per loop run so a runaway
+		// tool can't keep re-triggering.
+		if (input.compactor && compactionsDone === 0) {
+			const total = promptTokens + completionTokens;
+			const cap = input.compactor.maxContextTokens;
+			const threshold = input.compactor.threshold ?? 0.92;
+			if (cap > 0 && total > 0 && total / cap >= threshold) {
+				const compactResult = await input.compactor.compact(messages);
+				if (compactResult.compactedTurns > 0) {
+					messages.length = 0;
+					for (const m of compactResult.messages) messages.push(m);
+					compactionsDone++;
+				}
+			}
+		}
+
 		// Per-round reminder injection: ask the channel for transient
 		// hints, wrap each in <reminder> tags, and prepend them as system
 		// messages to THIS request only. Nothing gets pushed to `messages`
@@ -277,10 +326,11 @@ export async function runPlannerLoop(opts: {
 		// In both modes we append to `messages` in the LLM's original
 		// call order, not completion order, so the debug-log stays linear.
 		const calls = response.toolCalls;
+		const parallelSafePredicate = input.isParallelSafe;
 		const allParallelSafe =
-			!!input.isParallelSafe &&
+			!!parallelSafePredicate &&
 			calls.length > 1 &&
-			calls.every((c) => input.isParallelSafe!(c.name));
+			calls.every((c) => parallelSafePredicate(c.name));
 
 		if (allParallelSafe) {
 			for (let i = 0; i < calls.length; i += PARALLEL_TOOL_BATCH_SIZE) {