feat(webapp): wire context-window compactor into Companion + Mission runner (M2.4)

Symmetrical to 83a4606a9 which wired the compactor into mana-ai. Both webapp consumers of runPlannerLoop (Companion chat engine, Mission runner) now pass a compactor that folds the middle of messages into a <compact-summary> when cumulative token usage hits 92% of maxContextTokens. COMPACT_MAX_CTX is a module constant — gemini-2.5-flash's 1M-token ceiling — not env-wired. Vite builds for the browser and PUBLIC_* flags are the wrong tool for a value that only matters to the loop runtime; changing the model means changing the constant alongside the model reference anyway. Uses the same LlmClient + model as the planner's own calls. A cheaper compactor-tier model (Haiku) is the optional M2.5 follow-up and does not require changing this wiring — only the compactHistory `opts.model` gets swapped. Type-check clean (svelte-check 0 errors 0 warnings across 7389 files). All 31 companion + mission tests green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 19:41:09 +02:00 · 2026-04-23 15:33:18 +02:00 · 2026-04-23 15:33:18 +02:00 · 703ef69ca9
commit 703ef69ca9
parent 3eca5ac201
2 changed files with 43 additions and 0 deletions
--- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
@ -42,6 +42,7 @@ import type { Mission, MissionIteration, PlanStep } from './types';
 import {
 	AI_TOOL_CATALOG_BY_NAME,
 	buildSystemPrompt,
+	compactHistory,
 	runPlannerLoop,
 	runPrePlanGuardrails,
 	runPreExecuteGuardrails,
@ -62,6 +63,12 @@ const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neue
 *  the shared-ai default; re-declared here for clarity. */
 const MAX_PLANNER_ROUNDS = 5;

+/** Context-window ceiling for the compactor. Matches gemini-2.5-flash's
+ *  1M-token budget. Missions can accumulate many iterations over time
+ *  and — with read-heavy reasoning — chatty tool results; the compactor
+ *  folds pre-tail turns at 92% so we never hit a 400 from the provider. */
+const COMPACT_MAX_CTX = 1_000_000;
+
 /** Hard timeout for one mission run. 180 s is comfortable for a cloud
 *  model doing up to 5 reasoning rounds; anything longer means a wedged
 *  backend and should fail the iteration rather than sit in `running`. */
@ -273,6 +280,20 @@ async function runMissionInner(
 				// pre-execute guardrail can reason about state built up by
 				// prior steps in the same round.
 				isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto',
+				// Fold older turns into a compact-summary at 92% of
+				// maxContextTokens. Same LlmClient + model as the
+				// planner; one extra LLM call, but only when usage
+				// actually approaches the ceiling.
+				compactor: {
+					maxContextTokens: COMPACT_MAX_CTX,
+					compact: async (msgs) => {
+						const res = await compactHistory(msgs, {
+							llm: deps.llm,
+							model: deps.model ?? 'google/gemini-2.5-flash',
+						});
+						return { messages: res.messages, compactedTurns: res.compactedTurns };
+					},
+				},
 			},
 			onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
 				await checkCancel();
--- a/apps/mana/apps/web/src/lib/modules/companion/engine.ts
+++ b/apps/mana/apps/web/src/lib/modules/companion/engine.ts
@ -15,6 +15,7 @@ import {
 	runPlannerLoop,
 	AI_TOOL_CATALOG,
 	AI_TOOL_CATALOG_BY_NAME,
+	compactHistory,
 	type ChatMessage,
 	type ToolCallRequest,
 	type ToolResult,
@ -29,6 +30,17 @@ import type { LocalMessage } from './types';

 const MAX_TOOL_ROUNDS = 3;

+/**
+ * Context-window ceiling for the compactor. gemini-2.5-flash supports
+ * 1M tokens; the Companion chat rarely gets anywhere near that because
+ * we cap rounds at 3, but long chat histories plus chatty tool results
+ * (list_tasks on a power user) can still push us toward it. Kept as a
+ * module constant rather than env-wired — the webapp's Vite build would
+ * need a PUBLIC_ prefix and local-first apps shouldn't ship that kind
+ * of flag to the browser when the default already works.
+ */
+const COMPACT_MAX_CTX = 1_000_000;
+
 const llm = createManaLlmClient();

 interface EngineResult {
@ -110,6 +122,16 @@ export async function runCompanionChat(
 				// Writes (propose policy) stay sequential to preserve
 				// user-visible intent order in the proposal inbox.
 				isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto',
+				// Fold the middle of messages into a compact-summary at
+				// 92% of the model's context window. Mirrors the mana-ai
+				// wiring; one call to the same LLM client, same model.
+				compactor: {
+					maxContextTokens: COMPACT_MAX_CTX,
+					compact: async (msgs) => {
+						const res = await compactHistory(msgs, { llm, model: 'google/gemini-2.5-flash' });
+						return { messages: res.messages, compactedTurns: res.compactedTurns };
+					},
+				},
 			},
 			onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
 				const startedAt = Date.now();