mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-23 00:46:43 +02:00
The Claude-Code wU2 pattern goes live. Every mission run now passes a
compactor into runPlannerLoop that will fire once if cumulative token
usage crosses 92% of MANA_AI_COMPACT_MAX_CTX (default 1_000_000, the
gemini-2.5-flash ceiling). Override via env for deployments on smaller
models; set to 0 to disable entirely.
The compactor reuses the planner's own LlmClient + gemini-2.5-flash
model for now. When mana-llm grows a Haiku tier we'll route the
compactor there — it's pure summarisation and a cheaper model saves
tokens exactly where they matter.
New metrics:
- mana_ai_compactions_triggered_total — counter, one per firing
- mana_ai_compacted_turns — histogram, how many middle turns got
folded each time (< 3 ⇒ maxCtx is probably misconfigured)
Logs print a 60-char tail of the summary.goal so the "what was this
mission doing again" question survives a compaction.
No new tests here — compactHistory and the loop wiring are already
covered by the 22 tests in shared-ai (M2.1 + M2.2). The 57 existing
mana-ai bun tests stay green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
100 lines
4.3 KiB
TypeScript
100 lines
4.3 KiB
TypeScript
/**
|
||
* Env-driven config for the mana-ai service.
|
||
*
|
||
* Only references the secrets/URLs the tick loop needs. Auth is
|
||
* service-to-service via MANA_SERVICE_KEY (same pattern as mana-credits,
|
||
* mana-user); no end-user JWTs reach this service.
|
||
*/
|
||
|
||
export interface Config {
|
||
port: number;
|
||
/** mana_sync DB — source of Mission rows (via sync_changes replay). */
|
||
syncDatabaseUrl: string;
|
||
/** mana-llm HTTP endpoint (OpenAI-compatible). */
|
||
manaLlmUrl: string;
|
||
/** Unified mana-api (Hono/Bun, port 3060). Hosts module-specific
|
||
* compute endpoints including news-research. Used by the pre-planning
|
||
* research step to feed web-research context into the planner prompt
|
||
* before it produces plan steps. */
|
||
manaApiUrl: string;
|
||
/** mana-research HTTP endpoint (Hono/Bun, port 3068). Hosts the
|
||
* async-research submit/poll endpoints that the deep-research pre-
|
||
* planning path delegates to for multi-minute Gemini tasks. */
|
||
manaResearchUrl: string;
|
||
/** Opt-in gate for the deep-research pre-planning path. Default off
|
||
* — deep runs cost $1–7 per mission, so we only want them triggered
|
||
* when explicitly enabled on the server. */
|
||
deepResearchEnabled: boolean;
|
||
/** Shared key for service-to-service calls. */
|
||
serviceKey: string;
|
||
/** How often the background tick scans for due Missions, in ms. */
|
||
tickIntervalMs: number;
|
||
/** Flip to false to boot the HTTP surface without the background tick
|
||
* — useful for local smoke-tests + Docker image build verification. */
|
||
tickEnabled: boolean;
|
||
/**
|
||
* PEM-encoded RSA-OAEP-2048 private key for unwrapping Mission Grants.
|
||
* Paired with the public key pinned in mana-auth's config. Provision
|
||
* via Docker secret / out-of-band env; never commit.
|
||
*
|
||
* Optional at boot so the service can start without grant support
|
||
* (development, legacy deployments). When absent, Missions that
|
||
* carry a Grant are skipped with state='grant-missing'.
|
||
*
|
||
* Generate with:
|
||
* openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:2048 -out priv.pem
|
||
* openssl pkey -in priv.pem -pubout -out pub.pem
|
||
*/
|
||
missionGrantPrivateKeyPem?: string;
|
||
/**
|
||
* Policy gate mode for server-side tool dispatch:
|
||
* 'off' — legacy, no policy evaluation.
|
||
* 'log-only' — evaluate and log decisions, never block.
|
||
* 'enforce' — convert deny decisions into failed ToolResults so the
|
||
* LLM sees the rejection and can course-correct.
|
||
* Defaults to 'log-only' to match the M1 rollout plan.
|
||
*/
|
||
policyMode: 'off' | 'log-only' | 'enforce';
|
||
/**
|
||
* Context-window ceiling used by the compactor (Claude-Code `wU2`
|
||
* pattern). When cumulative prompt+completion tokens cross 92% of
|
||
* this, the loop folds the middle of messages into a compact
|
||
* summary before the next LLM call. Default matches
|
||
* gemini-2.5-flash's 1M-token context window; override via
|
||
* MANA_AI_COMPACT_MAX_CTX for deployments on smaller models. Set
|
||
* to 0 to disable compaction entirely.
|
||
*/
|
||
compactMaxContextTokens: number;
|
||
}
|
||
|
||
function requireEnv(key: string, fallback?: string): string {
|
||
const value = process.env[key] ?? fallback;
|
||
if (!value) throw new Error(`Missing required env var: ${key}`);
|
||
return value;
|
||
}
|
||
|
||
function parsePolicyMode(raw: string | undefined): Config['policyMode'] {
|
||
const v = (raw ?? 'log-only').toLowerCase();
|
||
if (v === 'off' || v === 'log-only' || v === 'enforce') return v;
|
||
throw new Error(`POLICY_MODE must be off|log-only|enforce, got "${raw}"`);
|
||
}
|
||
|
||
export function loadConfig(): Config {
|
||
return {
|
||
port: parseInt(process.env.PORT ?? '3067', 10),
|
||
syncDatabaseUrl: requireEnv(
|
||
'SYNC_DATABASE_URL',
|
||
'postgresql://mana:devpassword@localhost:5432/mana_sync'
|
||
),
|
||
manaLlmUrl: requireEnv('MANA_LLM_URL', 'http://localhost:3020'),
|
||
manaApiUrl: requireEnv('MANA_API_URL', 'http://localhost:3060'),
|
||
manaResearchUrl: requireEnv('MANA_RESEARCH_URL', 'http://localhost:3068'),
|
||
deepResearchEnabled: process.env.MANA_AI_DEEP_RESEARCH_ENABLED === 'true',
|
||
serviceKey: requireEnv('MANA_SERVICE_KEY', 'dev-service-key'),
|
||
tickIntervalMs: parseInt(process.env.TICK_INTERVAL_MS ?? '60000', 10),
|
||
tickEnabled: process.env.TICK_ENABLED !== 'false',
|
||
missionGrantPrivateKeyPem: process.env.MANA_AI_PRIVATE_KEY_PEM || undefined,
|
||
policyMode: parsePolicyMode(process.env.POLICY_MODE),
|
||
compactMaxContextTokens: parseInt(process.env.MANA_AI_COMPACT_MAX_CTX ?? '1000000', 10),
|
||
};
|
||
}
|