managarten/services/mana-ai/src/config.ts
Till JS 83a4606a9a feat(mana-ai): wire context-window compactor into mission runner (M2.3)
The Claude-Code wU2 pattern goes live. Every mission run now passes a
compactor into runPlannerLoop that will fire once if cumulative token
usage crosses 92% of MANA_AI_COMPACT_MAX_CTX (default 1_000_000, the
gemini-2.5-flash ceiling). Override via env for deployments on smaller
models; set to 0 to disable entirely.

The compactor reuses the planner's own LlmClient + gemini-2.5-flash
model for now. When mana-llm grows a Haiku tier we'll route the
compactor there — it's pure summarisation and a cheaper model saves
tokens exactly where they matter.

New metrics:
  - mana_ai_compactions_triggered_total — counter, one per firing
  - mana_ai_compacted_turns — histogram, how many middle turns got
    folded each time (< 3 ⇒ maxCtx is probably misconfigured)

Logs print a 60-char tail of the summary.goal so the "what was this
mission doing again" question survives a compaction.

No new tests here — compactHistory and the loop wiring are already
covered by the 22 tests in shared-ai (M2.1 + M2.2). The 57 existing
mana-ai bun tests stay green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 15:28:20 +02:00

100 lines
4.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Env-driven config for the mana-ai service.
*
* Only references the secrets/URLs the tick loop needs. Auth is
* service-to-service via MANA_SERVICE_KEY (same pattern as mana-credits,
* mana-user); no end-user JWTs reach this service.
*/
export interface Config {
port: number;
/** mana_sync DB — source of Mission rows (via sync_changes replay). */
syncDatabaseUrl: string;
/** mana-llm HTTP endpoint (OpenAI-compatible). */
manaLlmUrl: string;
/** Unified mana-api (Hono/Bun, port 3060). Hosts module-specific
* compute endpoints including news-research. Used by the pre-planning
* research step to feed web-research context into the planner prompt
* before it produces plan steps. */
manaApiUrl: string;
/** mana-research HTTP endpoint (Hono/Bun, port 3068). Hosts the
* async-research submit/poll endpoints that the deep-research pre-
* planning path delegates to for multi-minute Gemini tasks. */
manaResearchUrl: string;
/** Opt-in gate for the deep-research pre-planning path. Default off
* — deep runs cost $17 per mission, so we only want them triggered
* when explicitly enabled on the server. */
deepResearchEnabled: boolean;
/** Shared key for service-to-service calls. */
serviceKey: string;
/** How often the background tick scans for due Missions, in ms. */
tickIntervalMs: number;
/** Flip to false to boot the HTTP surface without the background tick
* — useful for local smoke-tests + Docker image build verification. */
tickEnabled: boolean;
/**
* PEM-encoded RSA-OAEP-2048 private key for unwrapping Mission Grants.
* Paired with the public key pinned in mana-auth's config. Provision
* via Docker secret / out-of-band env; never commit.
*
* Optional at boot so the service can start without grant support
* (development, legacy deployments). When absent, Missions that
* carry a Grant are skipped with state='grant-missing'.
*
* Generate with:
* openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:2048 -out priv.pem
* openssl pkey -in priv.pem -pubout -out pub.pem
*/
missionGrantPrivateKeyPem?: string;
/**
* Policy gate mode for server-side tool dispatch:
* 'off' — legacy, no policy evaluation.
* 'log-only' — evaluate and log decisions, never block.
* 'enforce' — convert deny decisions into failed ToolResults so the
* LLM sees the rejection and can course-correct.
* Defaults to 'log-only' to match the M1 rollout plan.
*/
policyMode: 'off' | 'log-only' | 'enforce';
/**
* Context-window ceiling used by the compactor (Claude-Code `wU2`
* pattern). When cumulative prompt+completion tokens cross 92% of
* this, the loop folds the middle of messages into a compact
* summary before the next LLM call. Default matches
* gemini-2.5-flash's 1M-token context window; override via
* MANA_AI_COMPACT_MAX_CTX for deployments on smaller models. Set
* to 0 to disable compaction entirely.
*/
compactMaxContextTokens: number;
}
function requireEnv(key: string, fallback?: string): string {
const value = process.env[key] ?? fallback;
if (!value) throw new Error(`Missing required env var: ${key}`);
return value;
}
function parsePolicyMode(raw: string | undefined): Config['policyMode'] {
const v = (raw ?? 'log-only').toLowerCase();
if (v === 'off' || v === 'log-only' || v === 'enforce') return v;
throw new Error(`POLICY_MODE must be off|log-only|enforce, got "${raw}"`);
}
export function loadConfig(): Config {
return {
port: parseInt(process.env.PORT ?? '3067', 10),
syncDatabaseUrl: requireEnv(
'SYNC_DATABASE_URL',
'postgresql://mana:devpassword@localhost:5432/mana_sync'
),
manaLlmUrl: requireEnv('MANA_LLM_URL', 'http://localhost:3020'),
manaApiUrl: requireEnv('MANA_API_URL', 'http://localhost:3060'),
manaResearchUrl: requireEnv('MANA_RESEARCH_URL', 'http://localhost:3068'),
deepResearchEnabled: process.env.MANA_AI_DEEP_RESEARCH_ENABLED === 'true',
serviceKey: requireEnv('MANA_SERVICE_KEY', 'dev-service-key'),
tickIntervalMs: parseInt(process.env.TICK_INTERVAL_MS ?? '60000', 10),
tickEnabled: process.env.TICK_ENABLED !== 'false',
missionGrantPrivateKeyPem: process.env.MANA_AI_PRIVATE_KEY_PEM || undefined,
policyMode: parsePolicyMode(process.env.POLICY_MODE),
compactMaxContextTokens: parseInt(process.env.MANA_AI_COMPACT_MAX_CTX ?? '1000000', 10),
};
}