mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:41:09 +02:00
feat(webapp): wire context-window compactor into Companion + Mission runner (M2.4)
Symmetrical to 83a4606a9 which wired the compactor into mana-ai. Both
webapp consumers of runPlannerLoop (Companion chat engine, Mission
runner) now pass a compactor that folds the middle of messages into
a <compact-summary> when cumulative token usage hits 92% of
maxContextTokens.
COMPACT_MAX_CTX is a module constant — gemini-2.5-flash's 1M-token
ceiling — not env-wired. Vite builds for the browser and PUBLIC_*
flags are the wrong tool for a value that only matters to the loop
runtime; changing the model means changing the constant alongside the
model reference anyway.
Uses the same LlmClient + model as the planner's own calls. A cheaper
compactor-tier model (Haiku) is the optional M2.5 follow-up and does
not require changing this wiring — only the compactHistory `opts.model`
gets swapped.
Type-check clean (svelte-check 0 errors 0 warnings across 7389 files).
All 31 companion + mission tests green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3eca5ac201
commit
703ef69ca9
2 changed files with 43 additions and 0 deletions
|
|
@ -42,6 +42,7 @@ import type { Mission, MissionIteration, PlanStep } from './types';
|
|||
import {
|
||||
AI_TOOL_CATALOG_BY_NAME,
|
||||
buildSystemPrompt,
|
||||
compactHistory,
|
||||
runPlannerLoop,
|
||||
runPrePlanGuardrails,
|
||||
runPreExecuteGuardrails,
|
||||
|
|
@ -62,6 +63,12 @@ const RESEARCH_TRIGGER = /\b(recherchier|research|news|finde|suche|aktuelle|neue
|
|||
* the shared-ai default; re-declared here for clarity. */
|
||||
const MAX_PLANNER_ROUNDS = 5;
|
||||
|
||||
/** Context-window ceiling for the compactor. Matches gemini-2.5-flash's
|
||||
* 1M-token budget. Missions can accumulate many iterations over time
|
||||
* and — with read-heavy reasoning — chatty tool results; the compactor
|
||||
* folds pre-tail turns at 92% so we never hit a 400 from the provider. */
|
||||
const COMPACT_MAX_CTX = 1_000_000;
|
||||
|
||||
/** Hard timeout for one mission run. 180 s is comfortable for a cloud
|
||||
* model doing up to 5 reasoning rounds; anything longer means a wedged
|
||||
* backend and should fail the iteration rather than sit in `running`. */
|
||||
|
|
@ -273,6 +280,20 @@ async function runMissionInner(
|
|||
// pre-execute guardrail can reason about state built up by
|
||||
// prior steps in the same round.
|
||||
isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto',
|
||||
// Fold older turns into a compact-summary at 92% of
|
||||
// maxContextTokens. Same LlmClient + model as the
|
||||
// planner; one extra LLM call, but only when usage
|
||||
// actually approaches the ceiling.
|
||||
compactor: {
|
||||
maxContextTokens: COMPACT_MAX_CTX,
|
||||
compact: async (msgs) => {
|
||||
const res = await compactHistory(msgs, {
|
||||
llm: deps.llm,
|
||||
model: deps.model ?? 'google/gemini-2.5-flash',
|
||||
});
|
||||
return { messages: res.messages, compactedTurns: res.compactedTurns };
|
||||
},
|
||||
},
|
||||
},
|
||||
onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
|
||||
await checkCancel();
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import {
|
|||
runPlannerLoop,
|
||||
AI_TOOL_CATALOG,
|
||||
AI_TOOL_CATALOG_BY_NAME,
|
||||
compactHistory,
|
||||
type ChatMessage,
|
||||
type ToolCallRequest,
|
||||
type ToolResult,
|
||||
|
|
@ -29,6 +30,17 @@ import type { LocalMessage } from './types';
|
|||
|
||||
const MAX_TOOL_ROUNDS = 3;
|
||||
|
||||
/**
|
||||
* Context-window ceiling for the compactor. gemini-2.5-flash supports
|
||||
* 1M tokens; the Companion chat rarely gets anywhere near that because
|
||||
* we cap rounds at 3, but long chat histories plus chatty tool results
|
||||
* (list_tasks on a power user) can still push us toward it. Kept as a
|
||||
* module constant rather than env-wired — the webapp's Vite build would
|
||||
* need a PUBLIC_ prefix and local-first apps shouldn't ship that kind
|
||||
* of flag to the browser when the default already works.
|
||||
*/
|
||||
const COMPACT_MAX_CTX = 1_000_000;
|
||||
|
||||
const llm = createManaLlmClient();
|
||||
|
||||
interface EngineResult {
|
||||
|
|
@ -110,6 +122,16 @@ export async function runCompanionChat(
|
|||
// Writes (propose policy) stay sequential to preserve
|
||||
// user-visible intent order in the proposal inbox.
|
||||
isParallelSafe: (name) => AI_TOOL_CATALOG_BY_NAME.get(name)?.defaultPolicy === 'auto',
|
||||
// Fold the middle of messages into a compact-summary at
|
||||
// 92% of the model's context window. Mirrors the mana-ai
|
||||
// wiring; one call to the same LLM client, same model.
|
||||
compactor: {
|
||||
maxContextTokens: COMPACT_MAX_CTX,
|
||||
compact: async (msgs) => {
|
||||
const res = await compactHistory(msgs, { llm, model: 'google/gemini-2.5-flash' });
|
||||
return { messages: res.messages, compactedTurns: res.compactedTurns };
|
||||
},
|
||||
},
|
||||
},
|
||||
onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
|
||||
const startedAt = Date.now();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue