diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts index 75fc5fc1a..4bfee83b3 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts @@ -42,6 +42,7 @@ import type { Mission, MissionIteration, PlanStep } from './types'; import { AI_TOOL_CATALOG_BY_NAME, buildSystemPrompt, + compactHistory, runPlannerLoop, runPrePlanGuardrails, runPreExecuteGuardrails, @@ -62,6 +63,12 @@ const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neue * the shared-ai default; re-declared here for clarity. */ const MAX_PLANNER_ROUNDS = 5; +/** Context-window ceiling for the compactor. Matches gemini-2.5-flash's + * 1M-token budget. Missions can accumulate many iterations over time + * and — with read-heavy reasoning — chatty tool results; the compactor + * folds pre-tail turns at 92% so we never hit a 400 from the provider. */ +const COMPACT_MAX_CTX = 1_000_000; + /** Hard timeout for one mission run. 180 s is comfortable for a cloud * model doing up to 5 reasoning rounds; anything longer means a wedged * backend and should fail the iteration rather than sit in `running`. */ @@ -273,6 +280,20 @@ async function runMissionInner( // pre-execute guardrail can reason about state built up by // prior steps in the same round. isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto', + // Fold older turns into a compact-summary at 92% of + // maxContextTokens. Same LlmClient + model as the + // planner; one extra LLM call, but only when usage + // actually approaches the ceiling. + compactor: { + maxContextTokens: COMPACT_MAX_CTX, + compact: async (msgs) => { + const res = await compactHistory(msgs, { + llm: deps.llm, + model: deps.model ?? 'google/gemini-2.5-flash', + }); + return { messages: res.messages, compactedTurns: res.compactedTurns }; + }, + }, }, onToolCall: async (call: ToolCallRequest): Promise => { await checkCancel(); diff --git a/apps/mana/apps/web/src/lib/modules/companion/engine.ts b/apps/mana/apps/web/src/lib/modules/companion/engine.ts index a1581270c..acdcff3dd 100644 --- a/apps/mana/apps/web/src/lib/modules/companion/engine.ts +++ b/apps/mana/apps/web/src/lib/modules/companion/engine.ts @@ -15,6 +15,7 @@ import { runPlannerLoop, AI_TOOL_CATALOG, AI_TOOL_CATALOG_BY_NAME, + compactHistory, type ChatMessage, type ToolCallRequest, type ToolResult, @@ -29,6 +30,17 @@ import type { LocalMessage } from './types'; const MAX_TOOL_ROUNDS = 3; +/** + * Context-window ceiling for the compactor. gemini-2.5-flash supports + * 1M tokens; the Companion chat rarely gets anywhere near that because + * we cap rounds at 3, but long chat histories plus chatty tool results + * (list_tasks on a power user) can still push us toward it. Kept as a + * module constant rather than env-wired — the webapp's Vite build would + * need a PUBLIC_ prefix and local-first apps shouldn't ship that kind + * of flag to the browser when the default already works. + */ +const COMPACT_MAX_CTX = 1_000_000; + const llm = createManaLlmClient(); interface EngineResult { @@ -110,6 +122,16 @@ export async function runCompanionChat( // Writes (propose policy) stay sequential to preserve // user-visible intent order in the proposal inbox. isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto', + // Fold the middle of messages into a compact-summary at + // 92% of the model's context window. Mirrors the mana-ai + // wiring; one call to the same LLM client, same model. + compactor: { + maxContextTokens: COMPACT_MAX_CTX, + compact: async (msgs) => { + const res = await compactHistory(msgs, { llm, model: 'google/gemini-2.5-flash' }); + return { messages: res.messages, compactedTurns: res.compactedTurns }; + }, + }, }, onToolCall: async (call: ToolCallRequest): Promise => { const startedAt = Date.now();