From 703ef69ca9a41d2ff0b89a04b75069a20a2abe13 Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Thu, 23 Apr 2026 15:33:18 +0200
Subject: [PATCH] feat(webapp): wire context-window compactor into Companion +
 Mission runner (M2.4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Symmetrical to 83a4606a9 which wired the compactor into mana-ai. Both
webapp consumers of runPlannerLoop (Companion chat engine, Mission
runner) now pass a compactor that folds the middle of messages into
a <compact-summary> when cumulative token usage hits 92% of
maxContextTokens.

COMPACT_MAX_CTX is a module constant — gemini-2.5-flash's 1M-token
ceiling — not env-wired. Vite builds for the browser and PUBLIC_*
flags are the wrong tool for a value that only matters to the loop
runtime; changing the model means changing the constant alongside the
model reference anyway.

Uses the same LlmClient + model as the planner's own calls. A cheaper
compactor-tier model (Haiku) is the optional M2.5 follow-up and does
not require changing this wiring — only the compactHistory `opts.model`
gets swapped.

Type-check clean (svelte-check 0 errors 0 warnings across 7389 files).
All 31 companion + mission tests green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../web/src/lib/data/ai/missions/runner.ts    | 21 ++++++++++++++++++
 .../web/src/lib/modules/companion/engine.ts   | 22 +++++++++++++++++++
 2 files changed, 43 insertions(+)
diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
index 75fc5fc1a..4bfee83b3 100644
--- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
@@ -42,6 +42,7 @@ import type { Mission, MissionIteration, PlanStep } from './types';
 import {
 	AI_TOOL_CATALOG_BY_NAME,
 	buildSystemPrompt,
+	compactHistory,
 	runPlannerLoop,
 	runPrePlanGuardrails,
 	runPreExecuteGuardrails,
@@ -62,6 +63,12 @@ const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neue
  *  the shared-ai default; re-declared here for clarity. */
 const MAX_PLANNER_ROUNDS = 5;
 
+/** Context-window ceiling for the compactor. Matches gemini-2.5-flash's
+ *  1M-token budget. Missions can accumulate many iterations over time
+ *  and — with read-heavy reasoning — chatty tool results; the compactor
+ *  folds pre-tail turns at 92% so we never hit a 400 from the provider. */
+const COMPACT_MAX_CTX = 1_000_000;
+
 /** Hard timeout for one mission run. 180 s is comfortable for a cloud
  *  model doing up to 5 reasoning rounds; anything longer means a wedged
  *  backend and should fail the iteration rather than sit in `running`. */
@@ -273,6 +280,20 @@ async function runMissionInner(
 				// pre-execute guardrail can reason about state built up by
 				// prior steps in the same round.
 				isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto',
+				// Fold older turns into a compact-summary at 92% of
+				// maxContextTokens. Same LlmClient + model as the
+				// planner; one extra LLM call, but only when usage
+				// actually approaches the ceiling.
+				compactor: {
+					maxContextTokens: COMPACT_MAX_CTX,
+					compact: async (msgs) => {
+						const res = await compactHistory(msgs, {
+							llm: deps.llm,
+							model: deps.model ?? 'google/gemini-2.5-flash',
+						});
+						return { messages: res.messages, compactedTurns: res.compactedTurns };
+					},
+				},
 			},
 			onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
 				await checkCancel();
diff --git a/apps/mana/apps/web/src/lib/modules/companion/engine.ts b/apps/mana/apps/web/src/lib/modules/companion/engine.ts
index a1581270c..acdcff3dd 100644
--- a/apps/mana/apps/web/src/lib/modules/companion/engine.ts
+++ b/apps/mana/apps/web/src/lib/modules/companion/engine.ts
@@ -15,6 +15,7 @@ import {
 	runPlannerLoop,
 	AI_TOOL_CATALOG,
 	AI_TOOL_CATALOG_BY_NAME,
+	compactHistory,
 	type ChatMessage,
 	type ToolCallRequest,
 	type ToolResult,
@@ -29,6 +30,17 @@ import type { LocalMessage } from './types';
 
 const MAX_TOOL_ROUNDS = 3;
 
+/**
+ * Context-window ceiling for the compactor. gemini-2.5-flash supports
+ * 1M tokens; the Companion chat rarely gets anywhere near that because
+ * we cap rounds at 3, but long chat histories plus chatty tool results
+ * (list_tasks on a power user) can still push us toward it. Kept as a
+ * module constant rather than env-wired — the webapp's Vite build would
+ * need a PUBLIC_ prefix and local-first apps shouldn't ship that kind
+ * of flag to the browser when the default already works.
+ */
+const COMPACT_MAX_CTX = 1_000_000;
+
 const llm = createManaLlmClient();
 
 interface EngineResult {
@@ -110,6 +122,16 @@ export async function runCompanionChat(
 				// Writes (propose policy) stay sequential to preserve
 				// user-visible intent order in the proposal inbox.
 				isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto',
+				// Fold the middle of messages into a compact-summary at
+				// 92% of the model's context window. Mirrors the mana-ai
+				// wiring; one call to the same LLM client, same model.
+				compactor: {
+					maxContextTokens: COMPACT_MAX_CTX,
+					compact: async (msgs) => {
+						const res = await compactHistory(msgs, { llm, model: 'google/gemini-2.5-flash' });
+						return { messages: res.messages, compactedTurns: res.compactedTurns };
+					},
+				},
 			},
 			onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
 				const startedAt = Date.now();