feat(shared-ai): compactHistory() — context-window compactor primitive (M2.1)

The Claude-Code wU2 pattern: when token usage hits ~92% of the provider's
context budget, fold all pre-tail turns into a single structured summary
(Goal / Decisions / Tools Called / Current Progress) so subsequent
rounds see a synopsis instead of the raw log.

This commit ships ONLY the primitive. Wiring it into runPlannerLoop
(auto-trigger before the next LLM call when shouldCompact() fires)
is M2.2 so the surface stays small and testable.

New exports from @mana/shared-ai:

  - shouldCompact(totalTokens, maxContextTokens, threshold?)
      → boolean; DEFAULT_COMPACT_THRESHOLD = 0.92, matching Claude Code.
      Bails safely when maxContextTokens is missing (local models often
      don't report usage).

  - compactHistory(messages, { llm, model, keepRecent?, temperature? })
      → { messages, summary, compactedTurns, usage? }
      Preserves: [0]=system, [1]=first user, [last N]=recent turns
      (default 4). Everything between gets sent through the compact
      agent with COMPACT_SYSTEM_PROMPT — a fixed 4-section Markdown
      schema. Temperature default 0.2 because we want summarisation,
      not creativity.

  - parseCompactSummary / renderCompactSummary — round-trip helpers.
      Parser is tolerant (missing sections → empty string) so a partial
      compaction still produces a usable summary.

The summary replaces the middle as a single role='assistant' message
wrapped in <compact-summary> tags. Assistant role (not system) because
some providers reject arbitrary system messages deep in history.

Tests: 17 new across the 4 exports (trigger logic, Markdown round-trip,
structural preservation of anchors + tail, usage passthrough, custom
keepRecent). All 71 shared-ai tests green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-23 15:21:10 +02:00
parent 13efae8cd2
commit 13361eb083
4 changed files with 475 additions and 0 deletions

View file

@ -85,10 +85,18 @@ export type {
export {
buildPlannerPrompt,
buildSystemPrompt,
compactHistory,
COMPACT_SYSTEM_PROMPT,
DEFAULT_COMPACT_KEEP_RECENT,
DEFAULT_COMPACT_THRESHOLD,
MockLlmClient,
parseCompactSummary,
parsePlannerResponse,
renderCompactSummary,
runPlannerLoop,
shouldCompact,
} from './planner';
export type { CompactHistoryOptions, CompactHistoryResult, CompactSummary } from './planner';
export {
AI_PROPOSABLE_TOOL_NAMES,

View file

@ -0,0 +1,204 @@
import { describe, expect, it } from 'vitest';
import {
COMPACT_SYSTEM_PROMPT,
DEFAULT_COMPACT_KEEP_RECENT,
DEFAULT_COMPACT_THRESHOLD,
compactHistory,
parseCompactSummary,
renderCompactSummary,
shouldCompact,
} from './compact';
import { MockLlmClient } from './mock-llm';
import type { ChatMessage } from './loop';
// ─── shouldCompact ─────────────────────────────────────────────────
describe('shouldCompact', () => {
it('returns true at the 92% threshold', () => {
expect(shouldCompact(92_000, 100_000)).toBe(true);
});
it('returns false below the threshold', () => {
expect(shouldCompact(91_000, 100_000)).toBe(false);
});
it('returns false when maxContextTokens is missing', () => {
expect(shouldCompact(50_000, undefined)).toBe(false);
expect(shouldCompact(50_000, 0)).toBe(false);
});
it('returns false on zero usage', () => {
expect(shouldCompact(0, 100_000)).toBe(false);
});
it('respects a custom threshold', () => {
expect(shouldCompact(50_000, 100_000, 0.5)).toBe(true);
expect(shouldCompact(49_999, 100_000, 0.5)).toBe(false);
});
it('constant matches Claude Code (0.92)', () => {
expect(DEFAULT_COMPACT_THRESHOLD).toBe(0.92);
});
});
// ─── parseCompactSummary ───────────────────────────────────────────
describe('parseCompactSummary', () => {
it('parses the canonical 4-section block', () => {
const raw = `## Goal
Alle offenen Tasks bis Freitag abschliessen.
## Decisions
- Prio: Release-Blocker zuerst
- Keine neuen Features
## Tools Called
- list_tasks(open) -> 12 Tasks
- complete_task(T-42) -> ok
## Current Progress
8 von 12 Tasks erledigt; naechste Aktion: T-19 in Angriff nehmen.`;
const s = parseCompactSummary(raw);
expect(s.goal).toContain('Alle offenen Tasks');
expect(s.decisions).toContain('Prio: Release-Blocker');
expect(s.toolsCalled).toContain('list_tasks');
expect(s.currentProgress).toContain('8 von 12');
});
it('tolerates missing sections', () => {
const raw = `## Goal\nFoo bar.\n\n## Decisions\n(keine)`;
const s = parseCompactSummary(raw);
expect(s.goal).toBe('Foo bar.');
expect(s.decisions).toBe('(keine)');
expect(s.toolsCalled).toBe('');
expect(s.currentProgress).toBe('');
});
it('is case-insensitive on headers', () => {
const s = parseCompactSummary(`## GOAL\nX\n\n## decisions\nY`);
expect(s.goal).toBe('X');
expect(s.decisions).toBe('Y');
});
it('returns empty summary for unparseable input', () => {
const s = parseCompactSummary('this is not a markdown block');
expect(s).toEqual({ goal: '', decisions: '', toolsCalled: '', currentProgress: '' });
});
});
// ─── renderCompactSummary ──────────────────────────────────────────
describe('renderCompactSummary', () => {
it('wraps the summary in <compact-summary> tags', () => {
const out = renderCompactSummary({
goal: 'G',
decisions: 'D',
toolsCalled: 'T',
currentProgress: 'P',
});
expect(out.startsWith('<compact-summary>')).toBe(true);
expect(out.endsWith('</compact-summary>')).toBe(true);
expect(out).toContain('## Goal\nG');
expect(out).toContain('## Decisions\nD');
});
it('fills empty sections with placeholders', () => {
const out = renderCompactSummary({
goal: '',
decisions: '',
toolsCalled: '',
currentProgress: '',
});
expect(out).toContain('unklar');
expect(out).toContain('(keine)');
});
});
// ─── compactHistory ────────────────────────────────────────────────
function buildHistory(middleLen: number, keepRecent: number): ChatMessage[] {
const msgs: ChatMessage[] = [
{ role: 'system', content: 'Original system prompt' },
{ role: 'user', content: 'Original user task' },
];
for (let i = 0; i < middleLen; i++) {
msgs.push({ role: 'assistant', content: `middle-assistant-${i}` });
msgs.push({ role: 'tool', toolCallId: `c${i}`, content: `middle-tool-${i}` });
}
for (let i = 0; i < keepRecent; i++) {
msgs.push({ role: 'assistant', content: `recent-${i}` });
}
return msgs;
}
describe('compactHistory', () => {
it('returns history unchanged when there is nothing to compact', async () => {
const llm = new MockLlmClient(); // no responses needed
const msgs: ChatMessage[] = [
{ role: 'system', content: 's' },
{ role: 'user', content: 'u' },
{ role: 'assistant', content: 'only-turn' },
];
const res = await compactHistory(msgs, { llm, model: 'm', keepRecent: 4 });
expect(res.messages).toBe(msgs); // same reference — bailed fast
expect(res.compactedTurns).toBe(0);
});
it('preserves system + first user + tail; replaces middle with compact-summary', async () => {
const history = buildHistory(5, DEFAULT_COMPACT_KEEP_RECENT); // 2 + 10 + 4 = 16 msgs
const llm = new MockLlmClient().enqueueStop(
'## Goal\nX\n\n## Decisions\n-\n\n## Tools Called\n-\n\n## Current Progress\nhalfway'
);
const res = await compactHistory(history, { llm, model: 'compact-model' });
expect(res.compactedTurns).toBe(10); // the 5 assistant+tool pairs
expect(res.messages).toHaveLength(2 + 1 + DEFAULT_COMPACT_KEEP_RECENT); // system + user + summary + tail
// Shape check
expect(res.messages[0]).toEqual(history[0]); // system verbatim
expect(res.messages[1]).toEqual(history[1]); // first user verbatim
expect(res.messages[2].role).toBe('assistant');
expect(res.messages[2].content).toContain('<compact-summary>');
expect(res.messages[2].content).toContain('halfway');
// Tail preserved in order
for (let i = 0; i < DEFAULT_COMPACT_KEEP_RECENT; i++) {
expect(res.messages[3 + i].content).toBe(`recent-${i}`);
}
});
it('sends the compact system prompt to the LLM', async () => {
const history = buildHistory(3, 4);
const llm = new MockLlmClient().enqueueStop(
'## Goal\n\n## Decisions\n\n## Tools Called\n\n## Current Progress\n'
);
await compactHistory(history, { llm, model: 'm' });
const seenByLlm = llm.calls[0].messages;
expect(seenByLlm[0].role).toBe('system');
expect(seenByLlm[0].content).toBe(COMPACT_SYSTEM_PROMPT);
});
it('returns summary + usage when the provider reports it', async () => {
const history = buildHistory(3, 4);
const llm = new MockLlmClient();
// Direct queue manipulation to inject usage
(llm as unknown as { queue: unknown[] }).queue.push({
content: '## Goal\nX\n\n## Decisions\n-\n\n## Tools Called\n-\n\n## Current Progress\nY',
toolCalls: [],
finishReason: 'stop',
usage: { promptTokens: 100, completionTokens: 30, totalTokens: 130 },
});
const res = await compactHistory(history, { llm, model: 'm' });
expect(res.summary.goal).toBe('X');
expect(res.summary.currentProgress).toBe('Y');
expect(res.usage).toEqual({ promptTokens: 100, completionTokens: 30 });
});
it('respects a custom keepRecent value', async () => {
const history = buildHistory(5, 6);
const llm = new MockLlmClient().enqueueStop('## Goal\n\n## Decisions\n');
const res = await compactHistory(history, { llm, model: 'm', keepRecent: 2 });
// keepRecent=2 is smaller than the 6 we built — more aggressive compaction
expect(res.messages).toHaveLength(2 + 1 + 2); // system + user + summary + 2 tail
expect(res.messages[3].content).toBe('recent-4');
expect(res.messages[4].content).toBe('recent-5');
});
});

View file

@ -0,0 +1,253 @@
/**
* Context-window compactor the `wU2` pattern from Claude Code,
* adapted for our `runPlannerLoop` messages shape.
*
* Why we need it: when a mission (or companion chat) spans many rounds
* with chatty tool results, the `messages[]` list grows until the next
* LLM call overflows the provider's context window. The naive failure
* mode is a 400 from the provider; the subtler one is silent
* quality-degradation as the LLM loses earlier turns.
*
* Claude Code handles this with a pre-emptive trigger at ~92 % of the
* context budget: run the current history through a second LLM call
* with a compact-prompt that forces a fixed schema Goal, Decisions,
* Tools Called, Current Progress and splice that summary back into
* the live loop so subsequent rounds see a short synopsis instead of
* the raw turns.
*
* This module ships ONLY the pure primitive:
* - `DEFAULT_COMPACT_THRESHOLD` 0.92, matching Claude Code.
* - `shouldCompact(totalTokens, maxContextTokens)` boolean trigger.
* - `compactHistory(messages, opts)` async, calls the LLM with the
* compact-prompt and returns a fresh messages array with the
* pre-tail turns folded into one summary message.
*
* Wiring into `runPlannerLoop` (trigger + splice) is a follow-up PR;
* keeping the primitive separate means we can unit-test compression
* without mocking the loop state machine.
*/
import type { ChatMessage, LlmClient } from './loop';
/** Default trigger threshold: Claude Code's `wU2` fires at ~92 %. */
export const DEFAULT_COMPACT_THRESHOLD = 0.92;
/** How many recent turns to keep VERBATIM, tail-first. The compactor
* never touches these the LLM's most recent in-progress reasoning
* should stay intact for coherence. */
export const DEFAULT_COMPACT_KEEP_RECENT = 4;
/**
* Decide whether to compact based on token usage against a ceiling.
* Returns false on missing inputs so the caller can skip silently when
* the provider doesn't report usage (which is common for local models).
*/
export function shouldCompact(
totalTokens: number,
maxContextTokens: number | undefined,
threshold: number = DEFAULT_COMPACT_THRESHOLD
): boolean {
if (!maxContextTokens || maxContextTokens <= 0) return false;
if (totalTokens <= 0) return false;
return totalTokens / maxContextTokens >= threshold;
}
/**
* Structured shape the compactor prompt asks the LLM to produce. We
* parse loosely if any field is missing we fill with empty strings,
* because a partial compaction is still better than no compaction.
*/
export interface CompactSummary {
readonly goal: string;
readonly decisions: string;
readonly toolsCalled: string;
readonly currentProgress: string;
}
export const COMPACT_SYSTEM_PROMPT = `Du bist ein Compact-Agent. Komprimiere die nachfolgende Konversation nach festem Schema, damit sie in einen knappen Kontext passt.
Beantworte AUSSCHLIESSLICH mit einem Markdown-Block in exakt dieser Struktur:
## Goal
<Ein Satz. Was war das urspruengliche Ziel?>
## Decisions
<Stichpunkte. Welche Entscheidungen wurden getroffen (Richtung, Prioritaet, Scope)?>
## Tools Called
<Stichpunkte: toolname(arg-kurzform) -> Ergebnis-Kurzfassung. Fehler explizit nennen.>
## Current Progress
<Ein Satz. Wo steht die Arbeit JETZT? Was ist der naechste konkrete Schritt?>
Regeln:
- Keine Einleitung, keine Nachbemerkung. Nur der Markdown-Block.
- Keine erfundenen Fakten. Wenn du unsicher bist, schreib "unklar".
- Zitate und Begriffe 1:1 wenn sie fachlich sind (IDs, Feldnamen).
- Deutsche Antwort, auch wenn Tool-Responses englisch sind.`;
/**
* Parse the compact-agent's response into a `CompactSummary`. Tolerant
* missing sections become empty strings rather than failing the
* whole compaction.
*/
export function parseCompactSummary(raw: string): CompactSummary {
function section(header: string): string {
const re = new RegExp(`##\\s+${header}\\s*\\n([\\s\\S]*?)(?=\\n##\\s|$)`, 'i');
const m = raw.match(re);
return m ? m[1].trim() : '';
}
return {
goal: section('Goal'),
decisions: section('Decisions'),
toolsCalled: section('Tools Called'),
currentProgress: section('Current Progress'),
};
}
/** Render a `CompactSummary` back to a single chat-message-ready string. */
export function renderCompactSummary(s: CompactSummary): string {
return [
'<compact-summary>',
`## Goal\n${s.goal || 'unklar'}`,
'',
`## Decisions\n${s.decisions || '(keine)'}`,
'',
`## Tools Called\n${s.toolsCalled || '(keine)'}`,
'',
`## Current Progress\n${s.currentProgress || 'unklar'}`,
'</compact-summary>',
].join('\n');
}
export interface CompactHistoryOptions {
readonly llm: LlmClient;
readonly model: string;
/** How many most-recent turns to preserve verbatim. Default 4. */
readonly keepRecent?: number;
/** Upper bound on compactor-LLM temperature we want summarisation,
* not creativity. Default 0.2. */
readonly temperature?: number;
}
export interface CompactHistoryResult {
readonly messages: readonly ChatMessage[];
readonly summary: CompactSummary;
readonly compactedTurns: number;
/** Token usage from the compactor call itself, when reported. */
readonly usage?: { promptTokens: number; completionTokens: number };
}
/**
* Compact a message history:
* 1. Preserve the `system` prompt verbatim (always index 0).
* 2. Preserve the first `user` turn (the original objective).
* 3. Send everything in between + the turns up to `keepRecent` BEFORE
* the tail to the compact agent.
* 4. Preserve the last `keepRecent` turns verbatim.
*
* Returned messages:
* [ system, user, assistant(compact-summary), ...recentTurns ]
*
* Notes:
* - The compact-summary message is tagged role='assistant' because
* some providers reject arbitrary system messages deep in history.
* - If there's nothing to compact ( keepRecent+2 messages), the
* function returns the original messages unchanged no LLM call.
*/
export async function compactHistory(
messages: readonly ChatMessage[],
opts: CompactHistoryOptions
): Promise<CompactHistoryResult> {
const keepRecent = opts.keepRecent ?? DEFAULT_COMPACT_KEEP_RECENT;
// Find anchor points.
const firstSystem = messages.findIndex((m) => m.role === 'system');
const firstUser = messages.findIndex((m) => m.role === 'user');
// Bail if there's nothing to compact. Always need at least
// system + user + keepRecent + 1 compactable turn before it's worth it.
const minLength = (firstSystem >= 0 ? 1 : 0) + (firstUser >= 0 ? 1 : 0) + keepRecent + 1;
if (messages.length < minLength) {
return {
messages,
summary: { goal: '', decisions: '', toolsCalled: '', currentProgress: '' },
compactedTurns: 0,
};
}
const systemMsg = firstSystem >= 0 ? messages[firstSystem] : null;
const userMsg = firstUser >= 0 ? messages[firstUser] : null;
// Split: middle = everything between the 2 anchors and the tail;
// tail = last keepRecent turns.
const tailStart = messages.length - keepRecent;
const middle = messages.slice(
Math.max((firstUser >= 0 ? firstUser : firstSystem) + 1, 0),
tailStart
);
const tail = messages.slice(tailStart);
if (middle.length === 0) {
return {
messages,
summary: { goal: '', decisions: '', toolsCalled: '', currentProgress: '' },
compactedTurns: 0,
};
}
// Ask the compact agent to summarise the MIDDLE. We give it the
// original system+user as context so it can ground the summary
// against the original goal, but instruct it to only produce the
// Markdown block — not a continuation of the conversation.
const compactRequestMessages: ChatMessage[] = [
{ role: 'system', content: COMPACT_SYSTEM_PROMPT },
...(systemMsg
? [
{
...systemMsg,
content: `Urspruenglicher System-Prompt:\n${systemMsg.content ?? ''}`,
} as ChatMessage,
]
: []),
...(userMsg ? [userMsg] : []),
...middle,
{
role: 'user',
content:
'Komprimiere das obige in das Schema (## Goal / ## Decisions / ## Tools Called / ## Current Progress). Nur der Markdown-Block, keine Einleitung.',
},
];
const response = await opts.llm.complete({
messages: compactRequestMessages,
tools: [],
model: opts.model,
temperature: opts.temperature ?? 0.2,
});
const summary = parseCompactSummary(response.content ?? '');
const summaryMsg: ChatMessage = {
role: 'assistant',
content: renderCompactSummary(summary),
};
const compactedMessages: ChatMessage[] = [
...(systemMsg ? [systemMsg] : []),
...(userMsg ? [userMsg] : []),
summaryMsg,
...tail,
];
return {
messages: compactedMessages,
summary,
compactedTurns: middle.length,
usage: response.usage
? {
promptTokens: response.usage.promptTokens,
completionTokens: response.usage.completionTokens,
}
: undefined,
};
}

View file

@ -10,6 +10,16 @@ export type { AiPlanInput, AiPlanOutput, AvailableTool, PlannedStep, ResolvedInp
export { buildSystemPrompt } from './system-prompt';
export type { SystemPromptInput, SystemPromptOutput } from './system-prompt';
export { runPlannerLoop, LOOP_STATE_RECENT_CALLS_WINDOW, PARALLEL_TOOL_BATCH_SIZE } from './loop';
export {
COMPACT_SYSTEM_PROMPT,
DEFAULT_COMPACT_KEEP_RECENT,
DEFAULT_COMPACT_THRESHOLD,
compactHistory,
parseCompactSummary,
renderCompactSummary,
shouldCompact,
} from './compact';
export type { CompactHistoryOptions, CompactHistoryResult, CompactSummary } from './compact';
export { MockLlmClient } from './mock-llm';
export type { MockLlmTurn } from './mock-llm';
export type {