diff --git a/packages/shared-ai/src/planner/loop.test.ts b/packages/shared-ai/src/planner/loop.test.ts
index 82263a0e9..1acd349f0 100644
--- a/packages/shared-ai/src/planner/loop.test.ts
+++ b/packages/shared-ai/src/planner/loop.test.ts
@@ -328,6 +328,190 @@ describe('runPlannerLoop — parallel reads', () => {
});
});
+describe('runPlannerLoop — compactor', () => {
+ it('does not compact below the threshold', async () => {
+ const llm = new MockLlmClient();
+ (llm as unknown as { queue: unknown[] }).queue.push({
+ content: null,
+ toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
+ finishReason: 'tool_calls',
+ usage: { promptTokens: 500, completionTokens: 0, totalTokens: 500 }, // 50%
+ });
+ llm.enqueueStop('done');
+
+ const compactSpy = vi.fn();
+ await runPlannerLoop({
+ llm,
+ input: {
+ systemPrompt: 's',
+ userPrompt: 'u',
+ tools,
+ model: 'm',
+ compactor: {
+ maxContextTokens: 1000,
+ compact: async (m) => {
+ compactSpy();
+ return { messages: m, compactedTurns: 0 };
+ },
+ },
+ },
+ onToolCall: async () => ({ success: true, message: 'ok' }),
+ });
+
+ expect(compactSpy).not.toHaveBeenCalled();
+ });
+
+ it('fires when usage crosses the threshold and replaces messages', async () => {
+ const llm = new MockLlmClient();
+ // Round 1: tool call that reports 92% of the 1000-token budget
+ (llm as unknown as { queue: unknown[] }).queue.push({
+ content: null,
+ toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
+ finishReason: 'tool_calls',
+ usage: { promptTokens: 920, completionTokens: 0, totalTokens: 920 },
+ });
+ // Round 2: after compaction fires, the LLM stops
+ llm.enqueueStop('done');
+
+ let compactorInput: readonly { role: string; content?: string | null }[] = [];
+ await runPlannerLoop({
+ llm,
+ input: {
+ systemPrompt: 's-prompt',
+ userPrompt: 'u-prompt',
+ tools,
+ model: 'm',
+ compactor: {
+ maxContextTokens: 1000,
+ compact: async (m) => {
+ compactorInput = m;
+ return {
+ messages: [
+ { role: 'system', content: 's-prompt' },
+ { role: 'user', content: 'u-prompt' },
+ { role: 'assistant', content: 'FOLDED' },
+ ],
+ compactedTurns: 2,
+ };
+ },
+ },
+ },
+ onToolCall: async () => ({ success: true, message: 'ok' }),
+ });
+
+ // The compactor received the full post-round-1 history
+ expect(compactorInput.length).toBeGreaterThan(2);
+ // The round-2 LLM request saw the compacted history, not the raw one
+ const round2Seen = llm.calls[1].messages;
+ expect(round2Seen).toHaveLength(3);
+ expect(round2Seen[2].content).toContain('FOLDED');
+ });
+
+ it('fires at most once per run', async () => {
+ const llm = new MockLlmClient();
+ for (let i = 0; i < 4; i++) {
+ (llm as unknown as { queue: unknown[] }).queue.push({
+ content: null,
+ toolCalls: [{ id: `c${i}`, name: 'list_things', arguments: {} }],
+ finishReason: 'tool_calls',
+ usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 }, // always over threshold
+ });
+ }
+ llm.enqueueStop('done');
+
+ let compactCallCount = 0;
+ await runPlannerLoop({
+ llm,
+ input: {
+ systemPrompt: 's',
+ userPrompt: 'u',
+ tools,
+ model: 'm',
+ maxRounds: 10,
+ compactor: {
+ maxContextTokens: 1000,
+ compact: async () => {
+ compactCallCount++;
+ return {
+ messages: [
+ { role: 'system', content: 's' },
+ { role: 'user', content: 'u' },
+ { role: 'assistant', content: '' },
+ ],
+ compactedTurns: 2,
+ };
+ },
+ },
+ },
+ onToolCall: async () => ({ success: true, message: 'ok' }),
+ });
+
+ expect(compactCallCount).toBe(1);
+ });
+
+ it('bails out silently when maxContextTokens is 0', async () => {
+ const llm = new MockLlmClient();
+ (llm as unknown as { queue: unknown[] }).queue.push({
+ content: 'done',
+ toolCalls: [],
+ finishReason: 'stop',
+ usage: { promptTokens: 9_999, completionTokens: 0, totalTokens: 9_999 },
+ });
+
+ const compactSpy = vi.fn();
+ await runPlannerLoop({
+ llm,
+ input: {
+ systemPrompt: 's',
+ userPrompt: 'u',
+ tools,
+ model: 'm',
+ compactor: {
+ maxContextTokens: 0, // disabled
+ compact: async (m) => {
+ compactSpy();
+ return { messages: m, compactedTurns: 0 };
+ },
+ },
+ },
+ onToolCall: async () => ({ success: true, message: 'ok' }),
+ });
+
+ expect(compactSpy).not.toHaveBeenCalled();
+ });
+
+ it('skips when the compactor returns 0 compacted turns', async () => {
+ const llm = new MockLlmClient();
+ (llm as unknown as { queue: unknown[] }).queue.push({
+ content: null,
+ toolCalls: [{ id: 'c1', name: 'list_things', arguments: {} }],
+ finishReason: 'tool_calls',
+ usage: { promptTokens: 950, completionTokens: 0, totalTokens: 950 },
+ });
+ llm.enqueueStop('done');
+
+ await runPlannerLoop({
+ llm,
+ input: {
+ systemPrompt: 's',
+ userPrompt: 'u',
+ tools,
+ model: 'm',
+ compactor: {
+ maxContextTokens: 1000,
+ compact: async (m) => ({ messages: m, compactedTurns: 0 }),
+ },
+ },
+ onToolCall: async () => ({ success: true, message: 'ok' }),
+ });
+
+ // Round 2 should have seen the ORIGINAL history (untouched by the
+ // no-op compactor) — just system + user + assistant + tool
+ const round2Seen = llm.calls[1].messages;
+ expect(round2Seen).toHaveLength(4);
+ });
+});
+
describe('runPlannerLoop — reminderChannel', () => {
it('injects reminders as transient system messages on the LLM call', async () => {
const llm = new MockLlmClient().enqueueStop('done');
diff --git a/packages/shared-ai/src/planner/loop.ts b/packages/shared-ai/src/planner/loop.ts
index 747cb247f..706a16689 100644
--- a/packages/shared-ai/src/planner/loop.ts
+++ b/packages/shared-ai/src/planner/loop.ts
@@ -144,6 +144,35 @@ export interface PlannerLoopInput {
* constant-time lookups are expected (registry hit, name-prefix check).
*/
readonly isParallelSafe?: (toolName: string) => boolean;
+ /**
+ * Context-window compactor wiring (Claude-Code `wU2` pattern).
+ *
+ * When set AND usage crosses the threshold, the loop replaces the
+ * middle of the message history with a compact summary before the
+ * next LLM call. The compact summary is persisted in the returned
+ * `messages` — unlike reminders, this IS part of the canonical
+ * history because raw turns got dropped.
+ *
+ * Contract:
+ * - `maxContextTokens`: provider ceiling; compactor skips when unset
+ * (matches `shouldCompact()`'s safe-bail behaviour).
+ * - `compact`: async callback that performs the compaction. Pass
+ * `compactHistory` from this package or an adapter that uses a
+ * cheaper model (e.g. Haiku) for the compactor's LLM call.
+ * - `threshold`: optional override, default 0.92.
+ *
+ * Compaction fires at MOST once per loop run — once a round has been
+ * compacted, we don't re-trigger until the next run, even if the
+ * fresh history hits the threshold again (defence-in-depth against
+ * a runaway tool that keeps bloating turns).
+ */
+ readonly compactor?: {
+ readonly maxContextTokens: number;
+ readonly threshold?: number;
+ readonly compact: (
+ messages: readonly ChatMessage[]
+ ) => Promise<{ readonly messages: readonly ChatMessage[]; readonly compactedTurns: number }>;
+ };
}
/** Max concurrent tool executions per round. Mirrors Claude Code's gW5
@@ -206,10 +235,30 @@ export async function runPlannerLoop(opts: {
let rounds = 0;
let promptTokens = 0;
let completionTokens = 0;
+ let compactionsDone = 0;
while (rounds < maxRounds) {
rounds++;
+ // Context-window compactor (Claude-Code `wU2`): check BEFORE the
+ // next LLM call whether the previous round's usage crossed the
+ // threshold; if so, replace the middle of `messages` with a
+ // compact summary. Fire at most once per loop run so a runaway
+ // tool can't keep re-triggering.
+ if (input.compactor && compactionsDone === 0) {
+ const total = promptTokens + completionTokens;
+ const cap = input.compactor.maxContextTokens;
+ const threshold = input.compactor.threshold ?? 0.92;
+ if (cap > 0 && total > 0 && total / cap >= threshold) {
+ const compactResult = await input.compactor.compact(messages);
+ if (compactResult.compactedTurns > 0) {
+ messages.length = 0;
+ for (const m of compactResult.messages) messages.push(m);
+ compactionsDone++;
+ }
+ }
+ }
+
// Per-round reminder injection: ask the channel for transient
// hints, wrap each in tags, and prepend them as system
// messages to THIS request only. Nothing gets pushed to `messages`
@@ -277,10 +326,11 @@ export async function runPlannerLoop(opts: {
// In both modes we append to `messages` in the LLM's original
// call order, not completion order, so the debug-log stays linear.
const calls = response.toolCalls;
+ const parallelSafePredicate = input.isParallelSafe;
const allParallelSafe =
- !!input.isParallelSafe &&
+ !!parallelSafePredicate &&
calls.length > 1 &&
- calls.every((c) => input.isParallelSafe!(c.name));
+ calls.every((c) => parallelSafePredicate(c.name));
if (allParallelSafe) {
for (let i = 0; i < calls.length; i += PARALLEL_TOOL_BATCH_SIZE) {