diff --git a/packages/shared-ai/src/index.ts b/packages/shared-ai/src/index.ts index 8c52ebcff..31f322348 100644 --- a/packages/shared-ai/src/index.ts +++ b/packages/shared-ai/src/index.ts @@ -85,10 +85,18 @@ export type { export { buildPlannerPrompt, buildSystemPrompt, + compactHistory, + COMPACT_SYSTEM_PROMPT, + DEFAULT_COMPACT_KEEP_RECENT, + DEFAULT_COMPACT_THRESHOLD, MockLlmClient, + parseCompactSummary, parsePlannerResponse, + renderCompactSummary, runPlannerLoop, + shouldCompact, } from './planner'; +export type { CompactHistoryOptions, CompactHistoryResult, CompactSummary } from './planner'; export { AI_PROPOSABLE_TOOL_NAMES, diff --git a/packages/shared-ai/src/planner/compact.test.ts b/packages/shared-ai/src/planner/compact.test.ts new file mode 100644 index 000000000..cbe858ae4 --- /dev/null +++ b/packages/shared-ai/src/planner/compact.test.ts @@ -0,0 +1,204 @@ +import { describe, expect, it } from 'vitest'; +import { + COMPACT_SYSTEM_PROMPT, + DEFAULT_COMPACT_KEEP_RECENT, + DEFAULT_COMPACT_THRESHOLD, + compactHistory, + parseCompactSummary, + renderCompactSummary, + shouldCompact, +} from './compact'; +import { MockLlmClient } from './mock-llm'; +import type { ChatMessage } from './loop'; + +// ─── shouldCompact ───────────────────────────────────────────────── + +describe('shouldCompact', () => { + it('returns true at the 92% threshold', () => { + expect(shouldCompact(92_000, 100_000)).toBe(true); + }); + it('returns false below the threshold', () => { + expect(shouldCompact(91_000, 100_000)).toBe(false); + }); + it('returns false when maxContextTokens is missing', () => { + expect(shouldCompact(50_000, undefined)).toBe(false); + expect(shouldCompact(50_000, 0)).toBe(false); + }); + it('returns false on zero usage', () => { + expect(shouldCompact(0, 100_000)).toBe(false); + }); + it('respects a custom threshold', () => { + expect(shouldCompact(50_000, 100_000, 0.5)).toBe(true); + expect(shouldCompact(49_999, 100_000, 0.5)).toBe(false); + }); + it('constant matches Claude Code (0.92)', () => { + expect(DEFAULT_COMPACT_THRESHOLD).toBe(0.92); + }); +}); + +// ─── parseCompactSummary ─────────────────────────────────────────── + +describe('parseCompactSummary', () => { + it('parses the canonical 4-section block', () => { + const raw = `## Goal +Alle offenen Tasks bis Freitag abschliessen. + +## Decisions +- Prio: Release-Blocker zuerst +- Keine neuen Features + +## Tools Called +- list_tasks(open) -> 12 Tasks +- complete_task(T-42) -> ok + +## Current Progress +8 von 12 Tasks erledigt; naechste Aktion: T-19 in Angriff nehmen.`; + const s = parseCompactSummary(raw); + expect(s.goal).toContain('Alle offenen Tasks'); + expect(s.decisions).toContain('Prio: Release-Blocker'); + expect(s.toolsCalled).toContain('list_tasks'); + expect(s.currentProgress).toContain('8 von 12'); + }); + + it('tolerates missing sections', () => { + const raw = `## Goal\nFoo bar.\n\n## Decisions\n(keine)`; + const s = parseCompactSummary(raw); + expect(s.goal).toBe('Foo bar.'); + expect(s.decisions).toBe('(keine)'); + expect(s.toolsCalled).toBe(''); + expect(s.currentProgress).toBe(''); + }); + + it('is case-insensitive on headers', () => { + const s = parseCompactSummary(`## GOAL\nX\n\n## decisions\nY`); + expect(s.goal).toBe('X'); + expect(s.decisions).toBe('Y'); + }); + + it('returns empty summary for unparseable input', () => { + const s = parseCompactSummary('this is not a markdown block'); + expect(s).toEqual({ goal: '', decisions: '', toolsCalled: '', currentProgress: '' }); + }); +}); + +// ─── renderCompactSummary ────────────────────────────────────────── + +describe('renderCompactSummary', () => { + it('wraps the summary in tags', () => { + const out = renderCompactSummary({ + goal: 'G', + decisions: 'D', + toolsCalled: 'T', + currentProgress: 'P', + }); + expect(out.startsWith('')).toBe(true); + expect(out.endsWith('')).toBe(true); + expect(out).toContain('## Goal\nG'); + expect(out).toContain('## Decisions\nD'); + }); + + it('fills empty sections with placeholders', () => { + const out = renderCompactSummary({ + goal: '', + decisions: '', + toolsCalled: '', + currentProgress: '', + }); + expect(out).toContain('unklar'); + expect(out).toContain('(keine)'); + }); +}); + +// ─── compactHistory ──────────────────────────────────────────────── + +function buildHistory(middleLen: number, keepRecent: number): ChatMessage[] { + const msgs: ChatMessage[] = [ + { role: 'system', content: 'Original system prompt' }, + { role: 'user', content: 'Original user task' }, + ]; + for (let i = 0; i < middleLen; i++) { + msgs.push({ role: 'assistant', content: `middle-assistant-${i}` }); + msgs.push({ role: 'tool', toolCallId: `c${i}`, content: `middle-tool-${i}` }); + } + for (let i = 0; i < keepRecent; i++) { + msgs.push({ role: 'assistant', content: `recent-${i}` }); + } + return msgs; +} + +describe('compactHistory', () => { + it('returns history unchanged when there is nothing to compact', async () => { + const llm = new MockLlmClient(); // no responses needed + const msgs: ChatMessage[] = [ + { role: 'system', content: 's' }, + { role: 'user', content: 'u' }, + { role: 'assistant', content: 'only-turn' }, + ]; + const res = await compactHistory(msgs, { llm, model: 'm', keepRecent: 4 }); + expect(res.messages).toBe(msgs); // same reference — bailed fast + expect(res.compactedTurns).toBe(0); + }); + + it('preserves system + first user + tail; replaces middle with compact-summary', async () => { + const history = buildHistory(5, DEFAULT_COMPACT_KEEP_RECENT); // 2 + 10 + 4 = 16 msgs + const llm = new MockLlmClient().enqueueStop( + '## Goal\nX\n\n## Decisions\n-\n\n## Tools Called\n-\n\n## Current Progress\nhalfway' + ); + + const res = await compactHistory(history, { llm, model: 'compact-model' }); + + expect(res.compactedTurns).toBe(10); // the 5 assistant+tool pairs + expect(res.messages).toHaveLength(2 + 1 + DEFAULT_COMPACT_KEEP_RECENT); // system + user + summary + tail + + // Shape check + expect(res.messages[0]).toEqual(history[0]); // system verbatim + expect(res.messages[1]).toEqual(history[1]); // first user verbatim + expect(res.messages[2].role).toBe('assistant'); + expect(res.messages[2].content).toContain(''); + expect(res.messages[2].content).toContain('halfway'); + // Tail preserved in order + for (let i = 0; i < DEFAULT_COMPACT_KEEP_RECENT; i++) { + expect(res.messages[3 + i].content).toBe(`recent-${i}`); + } + }); + + it('sends the compact system prompt to the LLM', async () => { + const history = buildHistory(3, 4); + const llm = new MockLlmClient().enqueueStop( + '## Goal\n\n## Decisions\n\n## Tools Called\n\n## Current Progress\n' + ); + await compactHistory(history, { llm, model: 'm' }); + + const seenByLlm = llm.calls[0].messages; + expect(seenByLlm[0].role).toBe('system'); + expect(seenByLlm[0].content).toBe(COMPACT_SYSTEM_PROMPT); + }); + + it('returns summary + usage when the provider reports it', async () => { + const history = buildHistory(3, 4); + const llm = new MockLlmClient(); + // Direct queue manipulation to inject usage + (llm as unknown as { queue: unknown[] }).queue.push({ + content: '## Goal\nX\n\n## Decisions\n-\n\n## Tools Called\n-\n\n## Current Progress\nY', + toolCalls: [], + finishReason: 'stop', + usage: { promptTokens: 100, completionTokens: 30, totalTokens: 130 }, + }); + + const res = await compactHistory(history, { llm, model: 'm' }); + expect(res.summary.goal).toBe('X'); + expect(res.summary.currentProgress).toBe('Y'); + expect(res.usage).toEqual({ promptTokens: 100, completionTokens: 30 }); + }); + + it('respects a custom keepRecent value', async () => { + const history = buildHistory(5, 6); + const llm = new MockLlmClient().enqueueStop('## Goal\n\n## Decisions\n'); + + const res = await compactHistory(history, { llm, model: 'm', keepRecent: 2 }); + // keepRecent=2 is smaller than the 6 we built — more aggressive compaction + expect(res.messages).toHaveLength(2 + 1 + 2); // system + user + summary + 2 tail + expect(res.messages[3].content).toBe('recent-4'); + expect(res.messages[4].content).toBe('recent-5'); + }); +}); diff --git a/packages/shared-ai/src/planner/compact.ts b/packages/shared-ai/src/planner/compact.ts new file mode 100644 index 000000000..57f4acdf4 --- /dev/null +++ b/packages/shared-ai/src/planner/compact.ts @@ -0,0 +1,253 @@ +/** + * Context-window compactor — the `wU2` pattern from Claude Code, + * adapted for our `runPlannerLoop` messages shape. + * + * Why we need it: when a mission (or companion chat) spans many rounds + * with chatty tool results, the `messages[]` list grows until the next + * LLM call overflows the provider's context window. The naive failure + * mode is a 400 from the provider; the subtler one is silent + * quality-degradation as the LLM loses earlier turns. + * + * Claude Code handles this with a pre-emptive trigger at ~92 % of the + * context budget: run the current history through a second LLM call + * with a compact-prompt that forces a fixed schema — Goal, Decisions, + * Tools Called, Current Progress — and splice that summary back into + * the live loop so subsequent rounds see a short synopsis instead of + * the raw turns. + * + * This module ships ONLY the pure primitive: + * - `DEFAULT_COMPACT_THRESHOLD` — 0.92, matching Claude Code. + * - `shouldCompact(totalTokens, maxContextTokens)` — boolean trigger. + * - `compactHistory(messages, opts)` — async, calls the LLM with the + * compact-prompt and returns a fresh messages array with the + * pre-tail turns folded into one summary message. + * + * Wiring into `runPlannerLoop` (trigger + splice) is a follow-up PR; + * keeping the primitive separate means we can unit-test compression + * without mocking the loop state machine. + */ + +import type { ChatMessage, LlmClient } from './loop'; + +/** Default trigger threshold: Claude Code's `wU2` fires at ~92 %. */ +export const DEFAULT_COMPACT_THRESHOLD = 0.92; + +/** How many recent turns to keep VERBATIM, tail-first. The compactor + * never touches these — the LLM's most recent in-progress reasoning + * should stay intact for coherence. */ +export const DEFAULT_COMPACT_KEEP_RECENT = 4; + +/** + * Decide whether to compact based on token usage against a ceiling. + * Returns false on missing inputs so the caller can skip silently when + * the provider doesn't report usage (which is common for local models). + */ +export function shouldCompact( + totalTokens: number, + maxContextTokens: number | undefined, + threshold: number = DEFAULT_COMPACT_THRESHOLD +): boolean { + if (!maxContextTokens || maxContextTokens <= 0) return false; + if (totalTokens <= 0) return false; + return totalTokens / maxContextTokens >= threshold; +} + +/** + * Structured shape the compactor prompt asks the LLM to produce. We + * parse loosely — if any field is missing we fill with empty strings, + * because a partial compaction is still better than no compaction. + */ +export interface CompactSummary { + readonly goal: string; + readonly decisions: string; + readonly toolsCalled: string; + readonly currentProgress: string; +} + +export const COMPACT_SYSTEM_PROMPT = `Du bist ein Compact-Agent. Komprimiere die nachfolgende Konversation nach festem Schema, damit sie in einen knappen Kontext passt. + +Beantworte AUSSCHLIESSLICH mit einem Markdown-Block in exakt dieser Struktur: + +## Goal + + +## Decisions + + +## Tools Called + Ergebnis-Kurzfassung. Fehler explizit nennen.> + +## Current Progress + + +Regeln: +- Keine Einleitung, keine Nachbemerkung. Nur der Markdown-Block. +- Keine erfundenen Fakten. Wenn du unsicher bist, schreib "unklar". +- Zitate und Begriffe 1:1 wenn sie fachlich sind (IDs, Feldnamen). +- Deutsche Antwort, auch wenn Tool-Responses englisch sind.`; + +/** + * Parse the compact-agent's response into a `CompactSummary`. Tolerant + * — missing sections become empty strings rather than failing the + * whole compaction. + */ +export function parseCompactSummary(raw: string): CompactSummary { + function section(header: string): string { + const re = new RegExp(`##\\s+${header}\\s*\\n([\\s\\S]*?)(?=\\n##\\s|$)`, 'i'); + const m = raw.match(re); + return m ? m[1].trim() : ''; + } + return { + goal: section('Goal'), + decisions: section('Decisions'), + toolsCalled: section('Tools Called'), + currentProgress: section('Current Progress'), + }; +} + +/** Render a `CompactSummary` back to a single chat-message-ready string. */ +export function renderCompactSummary(s: CompactSummary): string { + return [ + '', + `## Goal\n${s.goal || 'unklar'}`, + '', + `## Decisions\n${s.decisions || '(keine)'}`, + '', + `## Tools Called\n${s.toolsCalled || '(keine)'}`, + '', + `## Current Progress\n${s.currentProgress || 'unklar'}`, + '', + ].join('\n'); +} + +export interface CompactHistoryOptions { + readonly llm: LlmClient; + readonly model: string; + /** How many most-recent turns to preserve verbatim. Default 4. */ + readonly keepRecent?: number; + /** Upper bound on compactor-LLM temperature — we want summarisation, + * not creativity. Default 0.2. */ + readonly temperature?: number; +} + +export interface CompactHistoryResult { + readonly messages: readonly ChatMessage[]; + readonly summary: CompactSummary; + readonly compactedTurns: number; + /** Token usage from the compactor call itself, when reported. */ + readonly usage?: { promptTokens: number; completionTokens: number }; +} + +/** + * Compact a message history: + * 1. Preserve the `system` prompt verbatim (always index 0). + * 2. Preserve the first `user` turn (the original objective). + * 3. Send everything in between + the turns up to `keepRecent` BEFORE + * the tail to the compact agent. + * 4. Preserve the last `keepRecent` turns verbatim. + * + * Returned messages: + * [ system, user, assistant(compact-summary), ...recentTurns ] + * + * Notes: + * - The compact-summary message is tagged role='assistant' because + * some providers reject arbitrary system messages deep in history. + * - If there's nothing to compact (≤ keepRecent+2 messages), the + * function returns the original messages unchanged — no LLM call. + */ +export async function compactHistory( + messages: readonly ChatMessage[], + opts: CompactHistoryOptions +): Promise { + const keepRecent = opts.keepRecent ?? DEFAULT_COMPACT_KEEP_RECENT; + + // Find anchor points. + const firstSystem = messages.findIndex((m) => m.role === 'system'); + const firstUser = messages.findIndex((m) => m.role === 'user'); + + // Bail if there's nothing to compact. Always need at least + // system + user + keepRecent + 1 compactable turn before it's worth it. + const minLength = (firstSystem >= 0 ? 1 : 0) + (firstUser >= 0 ? 1 : 0) + keepRecent + 1; + if (messages.length < minLength) { + return { + messages, + summary: { goal: '', decisions: '', toolsCalled: '', currentProgress: '' }, + compactedTurns: 0, + }; + } + + const systemMsg = firstSystem >= 0 ? messages[firstSystem] : null; + const userMsg = firstUser >= 0 ? messages[firstUser] : null; + + // Split: middle = everything between the 2 anchors and the tail; + // tail = last keepRecent turns. + const tailStart = messages.length - keepRecent; + const middle = messages.slice( + Math.max((firstUser >= 0 ? firstUser : firstSystem) + 1, 0), + tailStart + ); + const tail = messages.slice(tailStart); + + if (middle.length === 0) { + return { + messages, + summary: { goal: '', decisions: '', toolsCalled: '', currentProgress: '' }, + compactedTurns: 0, + }; + } + + // Ask the compact agent to summarise the MIDDLE. We give it the + // original system+user as context so it can ground the summary + // against the original goal, but instruct it to only produce the + // Markdown block — not a continuation of the conversation. + const compactRequestMessages: ChatMessage[] = [ + { role: 'system', content: COMPACT_SYSTEM_PROMPT }, + ...(systemMsg + ? [ + { + ...systemMsg, + content: `Urspruenglicher System-Prompt:\n${systemMsg.content ?? ''}`, + } as ChatMessage, + ] + : []), + ...(userMsg ? [userMsg] : []), + ...middle, + { + role: 'user', + content: + 'Komprimiere das obige in das Schema (## Goal / ## Decisions / ## Tools Called / ## Current Progress). Nur der Markdown-Block, keine Einleitung.', + }, + ]; + + const response = await opts.llm.complete({ + messages: compactRequestMessages, + tools: [], + model: opts.model, + temperature: opts.temperature ?? 0.2, + }); + + const summary = parseCompactSummary(response.content ?? ''); + const summaryMsg: ChatMessage = { + role: 'assistant', + content: renderCompactSummary(summary), + }; + + const compactedMessages: ChatMessage[] = [ + ...(systemMsg ? [systemMsg] : []), + ...(userMsg ? [userMsg] : []), + summaryMsg, + ...tail, + ]; + + return { + messages: compactedMessages, + summary, + compactedTurns: middle.length, + usage: response.usage + ? { + promptTokens: response.usage.promptTokens, + completionTokens: response.usage.completionTokens, + } + : undefined, + }; +} diff --git a/packages/shared-ai/src/planner/index.ts b/packages/shared-ai/src/planner/index.ts index ae837a314..eab08651c 100644 --- a/packages/shared-ai/src/planner/index.ts +++ b/packages/shared-ai/src/planner/index.ts @@ -10,6 +10,16 @@ export type { AiPlanInput, AiPlanOutput, AvailableTool, PlannedStep, ResolvedInp export { buildSystemPrompt } from './system-prompt'; export type { SystemPromptInput, SystemPromptOutput } from './system-prompt'; export { runPlannerLoop, LOOP_STATE_RECENT_CALLS_WINDOW, PARALLEL_TOOL_BATCH_SIZE } from './loop'; +export { + COMPACT_SYSTEM_PROMPT, + DEFAULT_COMPACT_KEEP_RECENT, + DEFAULT_COMPACT_THRESHOLD, + compactHistory, + parseCompactSummary, + renderCompactSummary, + shouldCompact, +} from './compact'; +export type { CompactHistoryOptions, CompactHistoryResult, CompactSummary } from './compact'; export { MockLlmClient } from './mock-llm'; export type { MockLlmTurn } from './mock-llm'; export type {