feat(shared-ai): compactHistory() — context-window compactor primitive (M2.1)

The Claude-Code wU2 pattern: when token usage hits ~92% of the provider's context budget, fold all pre-tail turns into a single structured summary (Goal / Decisions / Tools Called / Current Progress) so subsequent rounds see a synopsis instead of the raw log. This commit ships ONLY the primitive. Wiring it into runPlannerLoop (auto-trigger before the next LLM call when shouldCompact() fires) is M2.2 so the surface stays small and testable. New exports from @mana/shared-ai: - shouldCompact(totalTokens, maxContextTokens, threshold?) → boolean; DEFAULT_COMPACT_THRESHOLD = 0.92, matching Claude Code. Bails safely when maxContextTokens is missing (local models often don't report usage). - compactHistory(messages, { llm, model, keepRecent?, temperature? }) → { messages, summary, compactedTurns, usage? } Preserves: [0]=system, [1]=first user, [last N]=recent turns (default 4). Everything between gets sent through the compact agent with COMPACT_SYSTEM_PROMPT — a fixed 4-section Markdown schema. Temperature default 0.2 because we want summarisation, not creativity. - parseCompactSummary / renderCompactSummary — round-trip helpers. Parser is tolerant (missing sections → empty string) so a partial compaction still produces a usable summary. The summary replaces the middle as a single role='assistant' message wrapped in <compact-summary> tags. Assistant role (not system) because some providers reject arbitrary system messages deep in history. Tests: 17 new across the 4 exports (trigger logic, Markdown round-trip, structural preservation of anchors + tail, usage passthrough, custom keepRecent). All 71 shared-ai tests green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 19:01:08 +02:00 · 2026-04-23 15:21:10 +02:00 · 2026-04-23 15:21:10 +02:00 · 13361eb083
commit 13361eb083
parent 13efae8cd2
4 changed files with 475 additions and 0 deletions
--- a/packages/shared-ai/src/index.ts
+++ b/packages/shared-ai/src/index.ts
@ -85,10 +85,18 @@ export type {
 export {
 	buildPlannerPrompt,
 	buildSystemPrompt,
+	compactHistory,
+	COMPACT_SYSTEM_PROMPT,
+	DEFAULT_COMPACT_KEEP_RECENT,
+	DEFAULT_COMPACT_THRESHOLD,
 	MockLlmClient,
+	parseCompactSummary,
 	parsePlannerResponse,
+	renderCompactSummary,
 	runPlannerLoop,
+	shouldCompact,
 } from './planner';
+export type { CompactHistoryOptions, CompactHistoryResult, CompactSummary } from './planner';

 export {
 	AI_PROPOSABLE_TOOL_NAMES,
--- a/packages/shared-ai/src/planner/compact.test.ts
+++ b/packages/shared-ai/src/planner/compact.test.ts
@ -0,0 +1,204 @@
+import { describe, expect, it } from 'vitest';
+import {
+	COMPACT_SYSTEM_PROMPT,
+	DEFAULT_COMPACT_KEEP_RECENT,
+	DEFAULT_COMPACT_THRESHOLD,
+	compactHistory,
+	parseCompactSummary,
+	renderCompactSummary,
+	shouldCompact,
+} from './compact';
+import { MockLlmClient } from './mock-llm';
+import type { ChatMessage } from './loop';
+
+// ─── shouldCompact ─────────────────────────────────────────────────
+
+describe('shouldCompact', () => {
+	it('returns true at the 92% threshold', () => {
+		expect(shouldCompact(92_000, 100_000)).toBe(true);
+	});
+	it('returns false below the threshold', () => {
+		expect(shouldCompact(91_000, 100_000)).toBe(false);
+	});
+	it('returns false when maxContextTokens is missing', () => {
+		expect(shouldCompact(50_000, undefined)).toBe(false);
+		expect(shouldCompact(50_000, 0)).toBe(false);
+	});
+	it('returns false on zero usage', () => {
+		expect(shouldCompact(0, 100_000)).toBe(false);
+	});
+	it('respects a custom threshold', () => {
+		expect(shouldCompact(50_000, 100_000, 0.5)).toBe(true);
+		expect(shouldCompact(49_999, 100_000, 0.5)).toBe(false);
+	});
+	it('constant matches Claude Code (0.92)', () => {
+		expect(DEFAULT_COMPACT_THRESHOLD).toBe(0.92);
+	});
+});
+
+// ─── parseCompactSummary ───────────────────────────────────────────
+
+describe('parseCompactSummary', () => {
+	it('parses the canonical 4-section block', () => {
+		const raw = `## Goal
+Alle offenen Tasks bis Freitag abschliessen.
+
+## Decisions
+- Prio: Release-Blocker zuerst
+- Keine neuen Features
+
+## Tools Called
+- list_tasks(open) -> 12 Tasks
+- complete_task(T-42) -> ok
+
+## Current Progress
+8 von 12 Tasks erledigt; naechste Aktion: T-19 in Angriff nehmen.`;
+		const s = parseCompactSummary(raw);
+		expect(s.goal).toContain('Alle offenen Tasks');
+		expect(s.decisions).toContain('Prio: Release-Blocker');
+		expect(s.toolsCalled).toContain('list_tasks');
+		expect(s.currentProgress).toContain('8 von 12');
+	});
+
+	it('tolerates missing sections', () => {
+		const raw = `## Goal\nFoo bar.\n\n## Decisions\n(keine)`;
+		const s = parseCompactSummary(raw);
+		expect(s.goal).toBe('Foo bar.');
+		expect(s.decisions).toBe('(keine)');
+		expect(s.toolsCalled).toBe('');
+		expect(s.currentProgress).toBe('');
+	});
+
+	it('is case-insensitive on headers', () => {
+		const s = parseCompactSummary(`## GOAL\nX\n\n## decisions\nY`);
+		expect(s.goal).toBe('X');
+		expect(s.decisions).toBe('Y');
+	});
+
+	it('returns empty summary for unparseable input', () => {
+		const s = parseCompactSummary('this is not a markdown block');
+		expect(s).toEqual({ goal: '', decisions: '', toolsCalled: '', currentProgress: '' });
+	});
+});
+
+// ─── renderCompactSummary ──────────────────────────────────────────
+
+describe('renderCompactSummary', () => {
+	it('wraps the summary in <compact-summary> tags', () => {
+		const out = renderCompactSummary({
+			goal: 'G',
+			decisions: 'D',
+			toolsCalled: 'T',
+			currentProgress: 'P',
+		});
+		expect(out.startsWith('<compact-summary>')).toBe(true);
+		expect(out.endsWith('</compact-summary>')).toBe(true);
+		expect(out).toContain('## Goal\nG');
+		expect(out).toContain('## Decisions\nD');
+	});
+
+	it('fills empty sections with placeholders', () => {
+		const out = renderCompactSummary({
+			goal: '',
+			decisions: '',
+			toolsCalled: '',
+			currentProgress: '',
+		});
+		expect(out).toContain('unklar');
+		expect(out).toContain('(keine)');
+	});
+});
+
+// ─── compactHistory ────────────────────────────────────────────────
+
+function buildHistory(middleLen: number, keepRecent: number): ChatMessage[] {
+	const msgs: ChatMessage[] = [
+		{ role: 'system', content: 'Original system prompt' },
+		{ role: 'user', content: 'Original user task' },
+	];
+	for (let i = 0; i < middleLen; i++) {
+		msgs.push({ role: 'assistant', content: `middle-assistant-${i}` });
+		msgs.push({ role: 'tool', toolCallId: `c${i}`, content: `middle-tool-${i}` });
+	}
+	for (let i = 0; i < keepRecent; i++) {
+		msgs.push({ role: 'assistant', content: `recent-${i}` });
+	}
+	return msgs;
+}
+
+describe('compactHistory', () => {
+	it('returns history unchanged when there is nothing to compact', async () => {
+		const llm = new MockLlmClient(); // no responses needed
+		const msgs: ChatMessage[] = [
+			{ role: 'system', content: 's' },
+			{ role: 'user', content: 'u' },
+			{ role: 'assistant', content: 'only-turn' },
+		];
+		const res = await compactHistory(msgs, { llm, model: 'm', keepRecent: 4 });
+		expect(res.messages).toBe(msgs); // same reference — bailed fast
+		expect(res.compactedTurns).toBe(0);
+	});
+
+	it('preserves system + first user + tail; replaces middle with compact-summary', async () => {
+		const history = buildHistory(5, DEFAULT_COMPACT_KEEP_RECENT); // 2 + 10 + 4 = 16 msgs
+		const llm = new MockLlmClient().enqueueStop(
+			'## Goal\nX\n\n## Decisions\n-\n\n## Tools Called\n-\n\n## Current Progress\nhalfway'
+		);
+
+		const res = await compactHistory(history, { llm, model: 'compact-model' });
+
+		expect(res.compactedTurns).toBe(10); // the 5 assistant+tool pairs
+		expect(res.messages).toHaveLength(2 + 1 + DEFAULT_COMPACT_KEEP_RECENT); // system + user + summary + tail
+
+		// Shape check
+		expect(res.messages[0]).toEqual(history[0]); // system verbatim
+		expect(res.messages[1]).toEqual(history[1]); // first user verbatim
+		expect(res.messages[2].role).toBe('assistant');
+		expect(res.messages[2].content).toContain('<compact-summary>');
+		expect(res.messages[2].content).toContain('halfway');
+		// Tail preserved in order
+		for (let i = 0; i < DEFAULT_COMPACT_KEEP_RECENT; i++) {
+			expect(res.messages[3 + i].content).toBe(`recent-${i}`);
+		}
+	});
+
+	it('sends the compact system prompt to the LLM', async () => {
+		const history = buildHistory(3, 4);
+		const llm = new MockLlmClient().enqueueStop(
+			'## Goal\n\n## Decisions\n\n## Tools Called\n\n## Current Progress\n'
+		);
+		await compactHistory(history, { llm, model: 'm' });
+
+		const seenByLlm = llm.calls[0].messages;
+		expect(seenByLlm[0].role).toBe('system');
+		expect(seenByLlm[0].content).toBe(COMPACT_SYSTEM_PROMPT);
+	});
+
+	it('returns summary + usage when the provider reports it', async () => {
+		const history = buildHistory(3, 4);
+		const llm = new MockLlmClient();
+		// Direct queue manipulation to inject usage
+		(llm as unknown as { queue: unknown[] }).queue.push({
+			content: '## Goal\nX\n\n## Decisions\n-\n\n## Tools Called\n-\n\n## Current Progress\nY',
+			toolCalls: [],
+			finishReason: 'stop',
+			usage: { promptTokens: 100, completionTokens: 30, totalTokens: 130 },
+		});
+
+		const res = await compactHistory(history, { llm, model: 'm' });
+		expect(res.summary.goal).toBe('X');
+		expect(res.summary.currentProgress).toBe('Y');
+		expect(res.usage).toEqual({ promptTokens: 100, completionTokens: 30 });
+	});
+
+	it('respects a custom keepRecent value', async () => {
+		const history = buildHistory(5, 6);
+		const llm = new MockLlmClient().enqueueStop('## Goal\n\n## Decisions\n');
+
+		const res = await compactHistory(history, { llm, model: 'm', keepRecent: 2 });
+		// keepRecent=2 is smaller than the 6 we built — more aggressive compaction
+		expect(res.messages).toHaveLength(2 + 1 + 2); // system + user + summary + 2 tail
+		expect(res.messages[3].content).toBe('recent-4');
+		expect(res.messages[4].content).toBe('recent-5');
+	});
+});
--- a/packages/shared-ai/src/planner/compact.ts
+++ b/packages/shared-ai/src/planner/compact.ts
@ -0,0 +1,253 @@
+/**
+ * Context-window compactor — the `wU2` pattern from Claude Code,
+ * adapted for our `runPlannerLoop` messages shape.
+ *
+ * Why we need it: when a mission (or companion chat) spans many rounds
+ * with chatty tool results, the `messages[]` list grows until the next
+ * LLM call overflows the provider's context window. The naive failure
+ * mode is a 400 from the provider; the subtler one is silent
+ * quality-degradation as the LLM loses earlier turns.
+ *
+ * Claude Code handles this with a pre-emptive trigger at ~92 % of the
+ * context budget: run the current history through a second LLM call
+ * with a compact-prompt that forces a fixed schema — Goal, Decisions,
+ * Tools Called, Current Progress — and splice that summary back into
+ * the live loop so subsequent rounds see a short synopsis instead of
+ * the raw turns.
+ *
+ * This module ships ONLY the pure primitive:
+ *   - `DEFAULT_COMPACT_THRESHOLD` — 0.92, matching Claude Code.
+ *   - `shouldCompact(totalTokens, maxContextTokens)` — boolean trigger.
+ *   - `compactHistory(messages, opts)` — async, calls the LLM with the
+ *     compact-prompt and returns a fresh messages array with the
+ *     pre-tail turns folded into one summary message.
+ *
+ * Wiring into `runPlannerLoop` (trigger + splice) is a follow-up PR;
+ * keeping the primitive separate means we can unit-test compression
+ * without mocking the loop state machine.
+ */
+
+import type { ChatMessage, LlmClient } from './loop';
+
+/** Default trigger threshold: Claude Code's `wU2` fires at ~92 %. */
+export const DEFAULT_COMPACT_THRESHOLD = 0.92;
+
+/** How many recent turns to keep VERBATIM, tail-first. The compactor
+ *  never touches these — the LLM's most recent in-progress reasoning
+ *  should stay intact for coherence. */
+export const DEFAULT_COMPACT_KEEP_RECENT = 4;
+
+/**
+ * Decide whether to compact based on token usage against a ceiling.
+ * Returns false on missing inputs so the caller can skip silently when
+ * the provider doesn't report usage (which is common for local models).
+ */
+export function shouldCompact(
+	totalTokens: number,
+	maxContextTokens: number | undefined,
+	threshold: number = DEFAULT_COMPACT_THRESHOLD
+): boolean {
+	if (!maxContextTokens || maxContextTokens <= 0) return false;
+	if (totalTokens <= 0) return false;
+	return totalTokens / maxContextTokens >= threshold;
+}
+
+/**
+ * Structured shape the compactor prompt asks the LLM to produce. We
+ * parse loosely — if any field is missing we fill with empty strings,
+ * because a partial compaction is still better than no compaction.
+ */
+export interface CompactSummary {
+	readonly goal: string;
+	readonly decisions: string;
+	readonly toolsCalled: string;
+	readonly currentProgress: string;
+}
+
+export const COMPACT_SYSTEM_PROMPT = `Du bist ein Compact-Agent. Komprimiere die nachfolgende Konversation nach festem Schema, damit sie in einen knappen Kontext passt.
+
+Beantworte AUSSCHLIESSLICH mit einem Markdown-Block in exakt dieser Struktur:
+
+## Goal
+<Ein Satz. Was war das urspruengliche Ziel?>
+
+## Decisions
+<Stichpunkte. Welche Entscheidungen wurden getroffen (Richtung, Prioritaet, Scope)?>
+
+## Tools Called
+<Stichpunkte: toolname(arg-kurzform) -> Ergebnis-Kurzfassung. Fehler explizit nennen.>
+
+## Current Progress
+<Ein Satz. Wo steht die Arbeit JETZT? Was ist der naechste konkrete Schritt?>
+
+Regeln:
+- Keine Einleitung, keine Nachbemerkung. Nur der Markdown-Block.
+- Keine erfundenen Fakten. Wenn du unsicher bist, schreib "unklar".
+- Zitate und Begriffe 1:1 wenn sie fachlich sind (IDs, Feldnamen).
+- Deutsche Antwort, auch wenn Tool-Responses englisch sind.`;
+
+/**
+ * Parse the compact-agent's response into a `CompactSummary`. Tolerant
+ * — missing sections become empty strings rather than failing the
+ * whole compaction.
+ */
+export function parseCompactSummary(raw: string): CompactSummary {
+	function section(header: string): string {
+		const re = new RegExp(`##\\s+${header}\\s*\\n([\\s\\S]*?)(?=\\n##\\s|$)`, 'i');
+		const m = raw.match(re);
+		return m ? m[1].trim() : '';
+	}
+	return {
+		goal: section('Goal'),
+		decisions: section('Decisions'),
+		toolsCalled: section('Tools Called'),
+		currentProgress: section('Current Progress'),
+	};
+}
+
+/** Render a `CompactSummary` back to a single chat-message-ready string. */
+export function renderCompactSummary(s: CompactSummary): string {
+	return [
+		'<compact-summary>',
+		`## Goal\n${s.goal || 'unklar'}`,
+		'',
+		`## Decisions\n${s.decisions || '(keine)'}`,
+		'',
+		`## Tools Called\n${s.toolsCalled || '(keine)'}`,
+		'',
+		`## Current Progress\n${s.currentProgress || 'unklar'}`,
+		'</compact-summary>',
+	].join('\n');
+}
+
+export interface CompactHistoryOptions {
+	readonly llm: LlmClient;
+	readonly model: string;
+	/** How many most-recent turns to preserve verbatim. Default 4. */
+	readonly keepRecent?: number;
+	/** Upper bound on compactor-LLM temperature — we want summarisation,
+	 *  not creativity. Default 0.2. */
+	readonly temperature?: number;
+}
+
+export interface CompactHistoryResult {
+	readonly messages: readonly ChatMessage[];
+	readonly summary: CompactSummary;
+	readonly compactedTurns: number;
+	/** Token usage from the compactor call itself, when reported. */
+	readonly usage?: { promptTokens: number; completionTokens: number };
+}
+
+/**
+ * Compact a message history:
+ *   1. Preserve the `system` prompt verbatim (always index 0).
+ *   2. Preserve the first `user` turn (the original objective).
+ *   3. Send everything in between + the turns up to `keepRecent` BEFORE
+ *      the tail to the compact agent.
+ *   4. Preserve the last `keepRecent` turns verbatim.
+ *
+ * Returned messages:
+ *   [ system, user, assistant(compact-summary), ...recentTurns ]
+ *
+ * Notes:
+ *   - The compact-summary message is tagged role='assistant' because
+ *     some providers reject arbitrary system messages deep in history.
+ *   - If there's nothing to compact (≤ keepRecent+2 messages), the
+ *     function returns the original messages unchanged — no LLM call.
+ */
+export async function compactHistory(
+	messages: readonly ChatMessage[],
+	opts: CompactHistoryOptions
+): Promise<CompactHistoryResult> {
+	const keepRecent = opts.keepRecent ?? DEFAULT_COMPACT_KEEP_RECENT;
+
+	// Find anchor points.
+	const firstSystem = messages.findIndex((m) => m.role === 'system');
+	const firstUser = messages.findIndex((m) => m.role === 'user');
+
+	// Bail if there's nothing to compact. Always need at least
+	// system + user + keepRecent + 1 compactable turn before it's worth it.
+	const minLength = (firstSystem >= 0 ? 1 : 0) + (firstUser >= 0 ? 1 : 0) + keepRecent + 1;
+	if (messages.length < minLength) {
+		return {
+			messages,
+			summary: { goal: '', decisions: '', toolsCalled: '', currentProgress: '' },
+			compactedTurns: 0,
+		};
+	}
+
+	const systemMsg = firstSystem >= 0 ? messages[firstSystem] : null;
+	const userMsg = firstUser >= 0 ? messages[firstUser] : null;
+
+	// Split: middle = everything between the 2 anchors and the tail;
+	// tail = last keepRecent turns.
+	const tailStart = messages.length - keepRecent;
+	const middle = messages.slice(
+		Math.max((firstUser >= 0 ? firstUser : firstSystem) + 1, 0),
+		tailStart
+	);
+	const tail = messages.slice(tailStart);
+
+	if (middle.length === 0) {
+		return {
+			messages,
+			summary: { goal: '', decisions: '', toolsCalled: '', currentProgress: '' },
+			compactedTurns: 0,
+		};
+	}
+
+	// Ask the compact agent to summarise the MIDDLE. We give it the
+	// original system+user as context so it can ground the summary
+	// against the original goal, but instruct it to only produce the
+	// Markdown block — not a continuation of the conversation.
+	const compactRequestMessages: ChatMessage[] = [
+		{ role: 'system', content: COMPACT_SYSTEM_PROMPT },
+		...(systemMsg
+			? [
+					{
+						...systemMsg,
+						content: `Urspruenglicher System-Prompt:\n${systemMsg.content ?? ''}`,
+					} as ChatMessage,
+				]
+			: []),
+		...(userMsg ? [userMsg] : []),
+		...middle,
+		{
+			role: 'user',
+			content:
+				'Komprimiere das obige in das Schema (## Goal / ## Decisions / ## Tools Called / ## Current Progress). Nur der Markdown-Block, keine Einleitung.',
+		},
+	];
+
+	const response = await opts.llm.complete({
+		messages: compactRequestMessages,
+		tools: [],
+		model: opts.model,
+		temperature: opts.temperature ?? 0.2,
+	});
+
+	const summary = parseCompactSummary(response.content ?? '');
+	const summaryMsg: ChatMessage = {
+		role: 'assistant',
+		content: renderCompactSummary(summary),
+	};
+
+	const compactedMessages: ChatMessage[] = [
+		...(systemMsg ? [systemMsg] : []),
+		...(userMsg ? [userMsg] : []),
+		summaryMsg,
+		...tail,
+	];
+
+	return {
+		messages: compactedMessages,
+		summary,
+		compactedTurns: middle.length,
+		usage: response.usage
+			? {
+					promptTokens: response.usage.promptTokens,
+					completionTokens: response.usage.completionTokens,
+				}
+			: undefined,
+	};
+}
--- a/packages/shared-ai/src/planner/index.ts
+++ b/packages/shared-ai/src/planner/index.ts
@ -10,6 +10,16 @@ export type { AiPlanInput, AiPlanOutput, AvailableTool, PlannedStep, ResolvedInp
 export { buildSystemPrompt } from './system-prompt';
 export type { SystemPromptInput, SystemPromptOutput } from './system-prompt';
 export { runPlannerLoop, LOOP_STATE_RECENT_CALLS_WINDOW, PARALLEL_TOOL_BATCH_SIZE } from './loop';
+export {
+	COMPACT_SYSTEM_PROMPT,
+	DEFAULT_COMPACT_KEEP_RECENT,
+	DEFAULT_COMPACT_THRESHOLD,
+	compactHistory,
+	parseCompactSummary,
+	renderCompactSummary,
+	shouldCompact,
+} from './compact';
+export type { CompactHistoryOptions, CompactHistoryResult, CompactSummary } from './compact';
 export { MockLlmClient } from './mock-llm';
 export type { MockLlmTurn } from './mock-llm';
 export type {