diff --git a/services/mana-ai/CLAUDE.md b/services/mana-ai/CLAUDE.md index 7d083565b..afae2c199 100644 --- a/services/mana-ai/CLAUDE.md +++ b/services/mana-ai/CLAUDE.md @@ -103,6 +103,7 @@ Claude-Code-inspirierte Primitive in `runPlannerLoop` (live in `@mana/shared-ai` - [x] `reminderChannel` wired via `buildReminderChannel()` in `src/planner/reminders.ts`. Live-Producer 1: `tokenBudgetReminder` — warnt ab 75% Tagesbudget, eskaliert ab 100% mit "JETZT abschliessen"-Prompt. Round-usage wird on-the-fly drauf addiert, so dass der Warn-Level mitwandert. - [x] `retryLoopReminder` live — feuert ab Round 3 wenn die letzten 2 Tool-Calls beide fehlschlugen. Liest das `recentCalls`-Sliding-Window (5 Einträge, oldest-first) aus `LoopState`. +- [x] **Context-window compactor** (wU2 pattern) via `compactHistory()` aus `@mana/shared-ai`. Trigger bei 92% von `MANA_AI_COMPACT_MAX_CTX` (default 1M für gemini-2.5-flash); faltet pre-tail-Turns in ein `` nach fixem Schema (Goal/Decisions/Tools Called/Current Progress). Metriken: `mana_ai_compactions_triggered_total`, `mana_ai_compacted_turns`. - [x] `POLICY_MODE` env (off/log-only/enforce, default log-only) für die mana-ai-seitige Freitext-Inspection (`detectInjectionMarker`). Rate-Limit und destructive-opt-in sind hier NICHT aktiv — tools werden nur als PlanSteps aufgezeichnet, die echte Enforcement passiert im Webapp-Client. - [ ] Parallel-Reads im Server-Tick haben keinen Effekt, weil `SERVER_TOOLS` per Konstruktion propose-only ist. Könnte relevant werden sobald mana-ai die vollständige tool-registry absorbiert (M4 des Personas-Plans). diff --git a/services/mana-ai/src/config.ts b/services/mana-ai/src/config.ts index 35379e40d..87a85e76c 100644 --- a/services/mana-ai/src/config.ts +++ b/services/mana-ai/src/config.ts @@ -55,6 +55,16 @@ export interface Config { * Defaults to 'log-only' to match the M1 rollout plan. */ policyMode: 'off' | 'log-only' | 'enforce'; + /** + * Context-window ceiling used by the compactor (Claude-Code `wU2` + * pattern). When cumulative prompt+completion tokens cross 92% of + * this, the loop folds the middle of messages into a compact + * summary before the next LLM call. Default matches + * gemini-2.5-flash's 1M-token context window; override via + * MANA_AI_COMPACT_MAX_CTX for deployments on smaller models. Set + * to 0 to disable compaction entirely. + */ + compactMaxContextTokens: number; } function requireEnv(key: string, fallback?: string): string { @@ -85,5 +95,6 @@ export function loadConfig(): Config { tickEnabled: process.env.TICK_ENABLED !== 'false', missionGrantPrivateKeyPem: process.env.MANA_AI_PRIVATE_KEY_PEM || undefined, policyMode: parsePolicyMode(process.env.POLICY_MODE), + compactMaxContextTokens: parseInt(process.env.MANA_AI_COMPACT_MAX_CTX ?? '1000000', 10), }; } diff --git a/services/mana-ai/src/cron/tick.ts b/services/mana-ai/src/cron/tick.ts index b304ffbbc..823c55533 100644 --- a/services/mana-ai/src/cron/tick.ts +++ b/services/mana-ai/src/cron/tick.ts @@ -15,6 +15,7 @@ import { buildSystemPrompt, + compactHistory, runPlannerLoop, type Mission, type PlannedStep, @@ -46,6 +47,8 @@ import { toolCallsTotal, plannerRoundsHistogram, providerErrorsTotal, + compactionsTriggeredTotal, + compactedTurnsHistogram, } from '../metrics'; import { unwrapMissionGrant } from '../crypto/unwrap-grant'; import { detectInjectionMarker } from '@mana/tool-registry'; @@ -390,6 +393,31 @@ async function planOneMission( pretickUsage24h, }); + const plannerModel = 'google/gemini-2.5-flash'; + + // Claude-Code wU2 pattern: fold the middle of messages into a structured + // summary once cumulative tokens cross 92% of maxContextTokens. Uses + // the same LLM + model as the planner itself; later we can route this + // to a cheaper model (Haiku tier) when mana-llm supports it. + const compactor = + config.compactMaxContextTokens > 0 + ? { + maxContextTokens: config.compactMaxContextTokens, + compact: async (msgs: Parameters[0]) => { + const result = await compactHistory(msgs, { llm, model: plannerModel }); + if (result.compactedTurns > 0) { + compactionsTriggeredTotal.inc(); + compactedTurnsHistogram.observe(result.compactedTurns); + console.log( + `[mana-ai tick] mission=${m.id} compacted ${result.compactedTurns} turns ` + + `(goal=${result.summary.goal.slice(0, 60)}...)` + ); + } + return { messages: result.messages, compactedTurns: result.compactedTurns }; + }, + } + : undefined; + try { const loopResult = await runPlannerLoop({ llm, @@ -397,8 +425,9 @@ async function planOneMission( systemPrompt, userPrompt, tools, - model: 'google/gemini-2.5-flash', + model: plannerModel, reminderChannel, + compactor, }, // Server-side onToolCall: no execution, just acknowledge. // The captured call lands in loopResult.executedCalls and diff --git a/services/mana-ai/src/metrics.ts b/services/mana-ai/src/metrics.ts index 747c88820..e6e711f66 100644 --- a/services/mana-ai/src/metrics.ts +++ b/services/mana-ai/src/metrics.ts @@ -257,3 +257,29 @@ export const remindersEmittedTotal = new Counter({ labelNames: ['producer', 'severity'] as const, registers: [register], }); + +// ── Context-window compactor (Claude-Code wU2 pattern) ────────────── + +/** + * Bumped once per mission run that crossed the 92% threshold and + * triggered the compactor. Pair with `mana_ai_mission_errors_total` to + * detect "compactor fires often but missions still fail" regressions. + */ +export const compactionsTriggeredTotal = new Counter({ + name: 'mana_ai_compactions_triggered_total', + help: 'Mission runs where the context-window compactor fired at least once.', + registers: [register], +}); + +/** + * Histogram of how many middle turns got folded into each compact- + * summary. Low values (< 3) mean the trigger fired on a history that + * was already short — usually a signal that maxContextTokens is + * misconfigured. + */ +export const compactedTurnsHistogram = new Histogram({ + name: 'mana_ai_compacted_turns', + help: 'Number of messages folded into a compact-summary per compaction event.', + buckets: [1, 2, 4, 8, 16, 32, 64, 128], + registers: [register], +});