From 703ef69ca9a41d2ff0b89a04b75069a20a2abe13 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 23 Apr 2026 15:33:18 +0200 Subject: [PATCH] feat(webapp): wire context-window compactor into Companion + Mission runner (M2.4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Symmetrical to 83a4606a9 which wired the compactor into mana-ai. Both webapp consumers of runPlannerLoop (Companion chat engine, Mission runner) now pass a compactor that folds the middle of messages into a when cumulative token usage hits 92% of maxContextTokens. COMPACT_MAX_CTX is a module constant — gemini-2.5-flash's 1M-token ceiling — not env-wired. Vite builds for the browser and PUBLIC_* flags are the wrong tool for a value that only matters to the loop runtime; changing the model means changing the constant alongside the model reference anyway. Uses the same LlmClient + model as the planner's own calls. A cheaper compactor-tier model (Haiku) is the optional M2.5 follow-up and does not require changing this wiring — only the compactHistory `opts.model` gets swapped. Type-check clean (svelte-check 0 errors 0 warnings across 7389 files). All 31 companion + mission tests green. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../web/src/lib/data/ai/missions/runner.ts | 21 ++++++++++++++++++ .../web/src/lib/modules/companion/engine.ts | 22 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts index 75fc5fc1a..4bfee83b3 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts @@ -42,6 +42,7 @@ import type { Mission, MissionIteration, PlanStep } from './types'; import { AI_TOOL_CATALOG_BY_NAME, buildSystemPrompt, + compactHistory, runPlannerLoop, runPrePlanGuardrails, runPreExecuteGuardrails, @@ -62,6 +63,12 @@ const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neue * the shared-ai default; re-declared here for clarity. */ const MAX_PLANNER_ROUNDS = 5; +/** Context-window ceiling for the compactor. Matches gemini-2.5-flash's + * 1M-token budget. Missions can accumulate many iterations over time + * and — with read-heavy reasoning — chatty tool results; the compactor + * folds pre-tail turns at 92% so we never hit a 400 from the provider. */ +const COMPACT_MAX_CTX = 1_000_000; + /** Hard timeout for one mission run. 180 s is comfortable for a cloud * model doing up to 5 reasoning rounds; anything longer means a wedged * backend and should fail the iteration rather than sit in `running`. */ @@ -273,6 +280,20 @@ async function runMissionInner( // pre-execute guardrail can reason about state built up by // prior steps in the same round. isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto', + // Fold older turns into a compact-summary at 92% of + // maxContextTokens. Same LlmClient + model as the + // planner; one extra LLM call, but only when usage + // actually approaches the ceiling. + compactor: { + maxContextTokens: COMPACT_MAX_CTX, + compact: async (msgs) => { + const res = await compactHistory(msgs, { + llm: deps.llm, + model: deps.model ?? 'google/gemini-2.5-flash', + }); + return { messages: res.messages, compactedTurns: res.compactedTurns }; + }, + }, }, onToolCall: async (call: ToolCallRequest): Promise => { await checkCancel(); diff --git a/apps/mana/apps/web/src/lib/modules/companion/engine.ts b/apps/mana/apps/web/src/lib/modules/companion/engine.ts index a1581270c..acdcff3dd 100644 --- a/apps/mana/apps/web/src/lib/modules/companion/engine.ts +++ b/apps/mana/apps/web/src/lib/modules/companion/engine.ts @@ -15,6 +15,7 @@ import { runPlannerLoop, AI_TOOL_CATALOG, AI_TOOL_CATALOG_BY_NAME, + compactHistory, type ChatMessage, type ToolCallRequest, type ToolResult, @@ -29,6 +30,17 @@ import type { LocalMessage } from './types'; const MAX_TOOL_ROUNDS = 3; +/** + * Context-window ceiling for the compactor. gemini-2.5-flash supports + * 1M tokens; the Companion chat rarely gets anywhere near that because + * we cap rounds at 3, but long chat histories plus chatty tool results + * (list_tasks on a power user) can still push us toward it. Kept as a + * module constant rather than env-wired — the webapp's Vite build would + * need a PUBLIC_ prefix and local-first apps shouldn't ship that kind + * of flag to the browser when the default already works. + */ +const COMPACT_MAX_CTX = 1_000_000; + const llm = createManaLlmClient(); interface EngineResult { @@ -110,6 +122,16 @@ export async function runCompanionChat( // Writes (propose policy) stay sequential to preserve // user-visible intent order in the proposal inbox. isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto', + // Fold the middle of messages into a compact-summary at + // 92% of the model's context window. Mirrors the mana-ai + // wiring; one call to the same LLM client, same model. + compactor: { + maxContextTokens: COMPACT_MAX_CTX, + compact: async (msgs) => { + const res = await compactHistory(msgs, { llm, model: 'google/gemini-2.5-flash' }); + return { messages: res.messages, compactedTurns: res.compactedTurns }; + }, + }, }, onToolCall: async (call: ToolCallRequest): Promise => { const startedAt = Date.now();