mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 21:59:40 +02:00
feat(mana-ai): server-side token budget enforcement per agent
Implement rolling 24h token budget enforcement in the mana-ai tick loop.
Agents with maxTokensPerDay set are now rate-limited server-side.
Changes:
- PlannerClient: extract usage.total_tokens from mana-llm response
- planOneMission: return {plan, tokensUsed} tuple
- tick loop: check getAgentTokenUsage24h() before planning; skip with
'skipped-budget' decision if over limit
- tick loop: record token usage after successful plan via
recordTokenUsage() INSERT into mana_ai.token_usage
- migrate.ts: new mana_ai.token_usage table with rolling window index
- metrics.ts: mana_ai_tokens_used_total counter (by agent_id)
Budget flow:
Agent.maxTokensPerDay = 50000
→ tick checks: SELECT SUM(tokens_used) WHERE ts > now()-24h
→ if sum >= 50000: skip mission, emit skipped-budget metric
→ else: plan mission, INSERT token_usage row
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e2d540a958
commit
ce57e11950
4 changed files with 89 additions and 5 deletions
|
|
@ -41,6 +41,7 @@ import {
|
|||
snapshotRowsAppliedTotal,
|
||||
grantSkipsTotal,
|
||||
agentDecisionsTotal,
|
||||
tokensUsedTotal,
|
||||
} from '../metrics';
|
||||
import { unwrapMissionGrant } from '../crypto/unwrap-grant';
|
||||
import { NewsResearchClient } from '../planner/news-research-client';
|
||||
|
|
@ -166,16 +167,32 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
|
|||
agentDecisionsTotal.inc({ decision: 'skipped-concurrency' });
|
||||
continue;
|
||||
}
|
||||
// Budget enforcement: check rolling 24h token usage.
|
||||
if (agent.maxTokensPerDay != null && agent.maxTokensPerDay >= 0) {
|
||||
const windowUsage = await getAgentTokenUsage24h(sql, m.userId, agent.id);
|
||||
if (windowUsage >= agent.maxTokensPerDay) {
|
||||
agentDecisionsTotal.inc({ decision: 'skipped-budget' });
|
||||
continue;
|
||||
}
|
||||
}
|
||||
activeRuns.set(agent.id, used + 1);
|
||||
}
|
||||
|
||||
try {
|
||||
const plan = await planOneMission(m, planner, sql, agent, config);
|
||||
if (plan === null) {
|
||||
const planResult = await planOneMission(m, planner, sql, agent, config);
|
||||
if (planResult === null) {
|
||||
parseFailures++;
|
||||
parseFailuresTotal.inc();
|
||||
continue;
|
||||
}
|
||||
const { plan, tokensUsed } = planResult;
|
||||
|
||||
// Record token usage for budget tracking
|
||||
if (tokensUsed > 0 && agent) {
|
||||
await recordTokenUsage(sql, m.userId, agent.id, m.id, tokensUsed);
|
||||
tokensUsedTotal.inc({ agent_id: agent.id }, tokensUsed);
|
||||
}
|
||||
|
||||
plansProduced++;
|
||||
plansProducedTotal.inc();
|
||||
|
||||
|
|
@ -234,7 +251,7 @@ async function planOneMission(
|
|||
sql: Sql,
|
||||
agent: ServerAgent | null,
|
||||
config: Config
|
||||
): Promise<AiPlanOutput | null> {
|
||||
): Promise<{ plan: AiPlanOutput; tokensUsed: number } | null> {
|
||||
const mission = serverMissionToSharedMission(m);
|
||||
// Resolve the mission's Key-Grant (if any) once per tick. An absent
|
||||
// grant is NOT an error — plaintext missions (goals-only) run fine
|
||||
|
|
@ -283,7 +300,7 @@ async function planOneMission(
|
|||
);
|
||||
return null;
|
||||
}
|
||||
return parsed.value;
|
||||
return { plan: parsed.value, tokensUsed: result.usage?.totalTokens ?? 0 };
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -395,6 +412,34 @@ function serverMissionToSharedMission(m: ServerMission): Mission {
|
|||
};
|
||||
}
|
||||
|
||||
// ── Token Budget Helpers ──────────────────────────────────────
|
||||
|
||||
/** Query the rolling 24h token usage for an agent. */
|
||||
async function getAgentTokenUsage24h(sql: Sql, userId: string, agentId: string): Promise<number> {
|
||||
const rows = await sql<{ total: string }[]>`
|
||||
SELECT COALESCE(SUM(tokens_used), 0) AS total
|
||||
FROM mana_ai.token_usage
|
||||
WHERE user_id = ${userId}
|
||||
AND agent_id = ${agentId}
|
||||
AND ts > now() - interval '24 hours'
|
||||
`;
|
||||
return parseInt(rows[0]?.total ?? '0', 10);
|
||||
}
|
||||
|
||||
/** Record token consumption for budget tracking. */
|
||||
async function recordTokenUsage(
|
||||
sql: Sql,
|
||||
userId: string,
|
||||
agentId: string,
|
||||
missionId: string,
|
||||
tokensUsed: number
|
||||
): Promise<void> {
|
||||
await sql`
|
||||
INSERT INTO mana_ai.token_usage (user_id, agent_id, mission_id, tokens_used)
|
||||
VALUES (${userId}, ${agentId}, ${missionId}, ${tokensUsed})
|
||||
`;
|
||||
}
|
||||
|
||||
let handle: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
export function startTick(config: Config): () => void {
|
||||
|
|
|
|||
|
|
@ -118,4 +118,24 @@ export async function migrate(sql: Sql): Promise<void> {
|
|||
ON mana_ai.agent_snapshots ((record->>'state'))
|
||||
WHERE record->>'state' = 'active'
|
||||
`;
|
||||
|
||||
// ─── Token usage tracking (Budget Enforcement) ──────────────
|
||||
// Append-only log of token consumption per planner call. The tick
|
||||
// loop queries the rolling 24h window to enforce Agent.maxTokensPerDay.
|
||||
// Old rows (>48h) are periodically pruned by the tick.
|
||||
await sql`
|
||||
CREATE TABLE IF NOT EXISTS mana_ai.token_usage (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
user_id TEXT NOT NULL,
|
||||
agent_id TEXT NOT NULL,
|
||||
mission_id TEXT NOT NULL,
|
||||
tokens_used INT NOT NULL,
|
||||
ts TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
)
|
||||
`;
|
||||
|
||||
await sql`
|
||||
CREATE INDEX IF NOT EXISTS idx_token_usage_agent_window
|
||||
ON mana_ai.token_usage (user_id, agent_id, ts DESC)
|
||||
`;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -145,3 +145,12 @@ export const agentDecisionsTotal = new Counter({
|
|||
labelNames: ['decision'] as const,
|
||||
registers: [register],
|
||||
});
|
||||
|
||||
// ── Token Budget Enforcement ─────────────────────────────
|
||||
|
||||
export const tokensUsedTotal = new Counter({
|
||||
name: 'mana_ai_tokens_used_total',
|
||||
help: 'Total tokens consumed across all planner calls.',
|
||||
labelNames: ['agent_id'] as const,
|
||||
registers: [register],
|
||||
});
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ export interface PlannerMessages {
|
|||
export interface PlannerResult {
|
||||
/** Raw text the LLM returned. Parser lives alongside the caller. */
|
||||
content: string;
|
||||
/** Token usage from the LLM response (if the provider includes it). */
|
||||
usage?: { promptTokens: number; completionTokens: number; totalTokens: number };
|
||||
}
|
||||
|
||||
export class PlannerClient {
|
||||
|
|
@ -63,8 +65,16 @@ export class PlannerClient {
|
|||
|
||||
const body = (await res.json()) as {
|
||||
choices?: { message?: { content?: string } }[];
|
||||
usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
|
||||
};
|
||||
const content = body.choices?.[0]?.message?.content ?? '';
|
||||
return { content };
|
||||
const usage = body.usage
|
||||
? {
|
||||
promptTokens: body.usage.prompt_tokens ?? 0,
|
||||
completionTokens: body.usage.completion_tokens ?? 0,
|
||||
totalTokens: body.usage.total_tokens ?? 0,
|
||||
}
|
||||
: undefined;
|
||||
return { content, usage };
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue