feat(mana-ai): server-side token budget enforcement per agent

Implement rolling 24h token budget enforcement in the mana-ai tick loop.
Agents with maxTokensPerDay set are now rate-limited server-side.

Changes:
- PlannerClient: extract usage.total_tokens from mana-llm response
- planOneMission: return {plan, tokensUsed} tuple
- tick loop: check getAgentTokenUsage24h() before planning; skip with
  'skipped-budget' decision if over limit
- tick loop: record token usage after successful plan via
  recordTokenUsage() INSERT into mana_ai.token_usage
- migrate.ts: new mana_ai.token_usage table with rolling window index
- metrics.ts: mana_ai_tokens_used_total counter (by agent_id)

Budget flow:
  Agent.maxTokensPerDay = 50000
  → tick checks: SELECT SUM(tokens_used) WHERE ts > now()-24h
  → if sum >= 50000: skip mission, emit skipped-budget metric
  → else: plan mission, INSERT token_usage row

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-16 14:41:31 +02:00
parent e2d540a958
commit ce57e11950
4 changed files with 89 additions and 5 deletions

View file

@ -41,6 +41,7 @@ import {
snapshotRowsAppliedTotal,
grantSkipsTotal,
agentDecisionsTotal,
tokensUsedTotal,
} from '../metrics';
import { unwrapMissionGrant } from '../crypto/unwrap-grant';
import { NewsResearchClient } from '../planner/news-research-client';
@ -166,16 +167,32 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
agentDecisionsTotal.inc({ decision: 'skipped-concurrency' });
continue;
}
// Budget enforcement: check rolling 24h token usage.
if (agent.maxTokensPerDay != null && agent.maxTokensPerDay >= 0) {
const windowUsage = await getAgentTokenUsage24h(sql, m.userId, agent.id);
if (windowUsage >= agent.maxTokensPerDay) {
agentDecisionsTotal.inc({ decision: 'skipped-budget' });
continue;
}
}
activeRuns.set(agent.id, used + 1);
}
try {
const plan = await planOneMission(m, planner, sql, agent, config);
if (plan === null) {
const planResult = await planOneMission(m, planner, sql, agent, config);
if (planResult === null) {
parseFailures++;
parseFailuresTotal.inc();
continue;
}
const { plan, tokensUsed } = planResult;
// Record token usage for budget tracking
if (tokensUsed > 0 && agent) {
await recordTokenUsage(sql, m.userId, agent.id, m.id, tokensUsed);
tokensUsedTotal.inc({ agent_id: agent.id }, tokensUsed);
}
plansProduced++;
plansProducedTotal.inc();
@ -234,7 +251,7 @@ async function planOneMission(
sql: Sql,
agent: ServerAgent | null,
config: Config
): Promise<AiPlanOutput | null> {
): Promise<{ plan: AiPlanOutput; tokensUsed: number } | null> {
const mission = serverMissionToSharedMission(m);
// Resolve the mission's Key-Grant (if any) once per tick. An absent
// grant is NOT an error — plaintext missions (goals-only) run fine
@ -283,7 +300,7 @@ async function planOneMission(
);
return null;
}
return parsed.value;
return { plan: parsed.value, tokensUsed: result.usage?.totalTokens ?? 0 };
}
/**
@ -395,6 +412,34 @@ function serverMissionToSharedMission(m: ServerMission): Mission {
};
}
// ── Token Budget Helpers ──────────────────────────────────────
/** Query the rolling 24h token usage for an agent. */
async function getAgentTokenUsage24h(sql: Sql, userId: string, agentId: string): Promise<number> {
const rows = await sql<{ total: string }[]>`
SELECT COALESCE(SUM(tokens_used), 0) AS total
FROM mana_ai.token_usage
WHERE user_id = ${userId}
AND agent_id = ${agentId}
AND ts > now() - interval '24 hours'
`;
return parseInt(rows[0]?.total ?? '0', 10);
}
/** Record token consumption for budget tracking. */
async function recordTokenUsage(
sql: Sql,
userId: string,
agentId: string,
missionId: string,
tokensUsed: number
): Promise<void> {
await sql`
INSERT INTO mana_ai.token_usage (user_id, agent_id, mission_id, tokens_used)
VALUES (${userId}, ${agentId}, ${missionId}, ${tokensUsed})
`;
}
let handle: ReturnType<typeof setInterval> | null = null;
export function startTick(config: Config): () => void {

View file

@ -118,4 +118,24 @@ export async function migrate(sql: Sql): Promise<void> {
ON mana_ai.agent_snapshots ((record->>'state'))
WHERE record->>'state' = 'active'
`;
// ─── Token usage tracking (Budget Enforcement) ──────────────
// Append-only log of token consumption per planner call. The tick
// loop queries the rolling 24h window to enforce Agent.maxTokensPerDay.
// Old rows (>48h) are periodically pruned by the tick.
await sql`
CREATE TABLE IF NOT EXISTS mana_ai.token_usage (
id BIGSERIAL PRIMARY KEY,
user_id TEXT NOT NULL,
agent_id TEXT NOT NULL,
mission_id TEXT NOT NULL,
tokens_used INT NOT NULL,
ts TIMESTAMPTZ NOT NULL DEFAULT now()
)
`;
await sql`
CREATE INDEX IF NOT EXISTS idx_token_usage_agent_window
ON mana_ai.token_usage (user_id, agent_id, ts DESC)
`;
}

View file

@ -145,3 +145,12 @@ export const agentDecisionsTotal = new Counter({
labelNames: ['decision'] as const,
registers: [register],
});
// ── Token Budget Enforcement ─────────────────────────────
export const tokensUsedTotal = new Counter({
name: 'mana_ai_tokens_used_total',
help: 'Total tokens consumed across all planner calls.',
labelNames: ['agent_id'] as const,
registers: [register],
});

View file

@ -17,6 +17,8 @@ export interface PlannerMessages {
export interface PlannerResult {
/** Raw text the LLM returned. Parser lives alongside the caller. */
content: string;
/** Token usage from the LLM response (if the provider includes it). */
usage?: { promptTokens: number; completionTokens: number; totalTokens: number };
}
export class PlannerClient {
@ -63,8 +65,16 @@ export class PlannerClient {
const body = (await res.json()) as {
choices?: { message?: { content?: string } }[];
usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
};
const content = body.choices?.[0]?.message?.content ?? '';
return { content };
const usage = body.usage
? {
promptTokens: body.usage.prompt_tokens ?? 0,
completionTokens: body.usage.completion_tokens ?? 0,
totalTokens: body.usage.total_tokens ?? 0,
}
: undefined;
return { content, usage };
}
}