feat(webapp): wire context-window compactor into Companion + Mission runner (M2.4)

Symmetrical to 83a4606a9 which wired the compactor into mana-ai. Both
webapp consumers of runPlannerLoop (Companion chat engine, Mission
runner) now pass a compactor that folds the middle of messages into
a <compact-summary> when cumulative token usage hits 92% of
maxContextTokens.

COMPACT_MAX_CTX is a module constant — gemini-2.5-flash's 1M-token
ceiling — not env-wired. Vite builds for the browser and PUBLIC_*
flags are the wrong tool for a value that only matters to the loop
runtime; changing the model means changing the constant alongside the
model reference anyway.

Uses the same LlmClient + model as the planner's own calls. A cheaper
compactor-tier model (Haiku) is the optional M2.5 follow-up and does
not require changing this wiring — only the compactHistory `opts.model`
gets swapped.

Type-check clean (svelte-check 0 errors 0 warnings across 7389 files).
All 31 companion + mission tests green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-23 15:33:18 +02:00
parent 3eca5ac201
commit 703ef69ca9
2 changed files with 43 additions and 0 deletions

View file

@ -42,6 +42,7 @@ import type { Mission, MissionIteration, PlanStep } from './types';
import {
AI_TOOL_CATALOG_BY_NAME,
buildSystemPrompt,
compactHistory,
runPlannerLoop,
runPrePlanGuardrails,
runPreExecuteGuardrails,
@ -62,6 +63,12 @@ const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neue
* the shared-ai default; re-declared here for clarity. */
const MAX_PLANNER_ROUNDS = 5;
/** Context-window ceiling for the compactor. Matches gemini-2.5-flash's
* 1M-token budget. Missions can accumulate many iterations over time
* and with read-heavy reasoning chatty tool results; the compactor
* folds pre-tail turns at 92% so we never hit a 400 from the provider. */
const COMPACT_MAX_CTX = 1_000_000;
/** Hard timeout for one mission run. 180 s is comfortable for a cloud
* model doing up to 5 reasoning rounds; anything longer means a wedged
* backend and should fail the iteration rather than sit in `running`. */
@ -273,6 +280,20 @@ async function runMissionInner(
// pre-execute guardrail can reason about state built up by
// prior steps in the same round.
isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto',
// Fold older turns into a compact-summary at 92% of
// maxContextTokens. Same LlmClient + model as the
// planner; one extra LLM call, but only when usage
// actually approaches the ceiling.
compactor: {
maxContextTokens: COMPACT_MAX_CTX,
compact: async (msgs) => {
const res = await compactHistory(msgs, {
llm: deps.llm,
model: deps.model ?? 'google/gemini-2.5-flash',
});
return { messages: res.messages, compactedTurns: res.compactedTurns };
},
},
},
onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
await checkCancel();

View file

@ -15,6 +15,7 @@ import {
runPlannerLoop,
AI_TOOL_CATALOG,
AI_TOOL_CATALOG_BY_NAME,
compactHistory,
type ChatMessage,
type ToolCallRequest,
type ToolResult,
@ -29,6 +30,17 @@ import type { LocalMessage } from './types';
const MAX_TOOL_ROUNDS = 3;
/**
* Context-window ceiling for the compactor. gemini-2.5-flash supports
* 1M tokens; the Companion chat rarely gets anywhere near that because
* we cap rounds at 3, but long chat histories plus chatty tool results
* (list_tasks on a power user) can still push us toward it. Kept as a
* module constant rather than env-wired the webapp's Vite build would
* need a PUBLIC_ prefix and local-first apps shouldn't ship that kind
* of flag to the browser when the default already works.
*/
const COMPACT_MAX_CTX = 1_000_000;
const llm = createManaLlmClient();
interface EngineResult {
@ -110,6 +122,16 @@ export async function runCompanionChat(
// Writes (propose policy) stay sequential to preserve
// user-visible intent order in the proposal inbox.
isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto',
// Fold the middle of messages into a compact-summary at
// 92% of the model's context window. Mirrors the mana-ai
// wiring; one call to the same LLM client, same model.
compactor: {
maxContextTokens: COMPACT_MAX_CTX,
compact: async (msgs) => {
const res = await compactHistory(msgs, { llm, model: 'google/gemini-2.5-flash' });
return { messages: res.messages, compactedTurns: res.compactedTurns };
},
},
},
onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
const startedAt = Date.now();