feat(ai): thread TokenUsage through runPlannerLoop → mana-ai budget

Carries per-round token counts from the mana-llm response body
(prompt_tokens + completion_tokens) back through LlmCompletionResponse
→ PlannerLoopResult. The loop sums across rounds and exposes a single
aggregate on result.usage.

Lets mana-ai's tick re-activate per-agent daily-token budget tracking
— tokensUsed was stubbed to 0 in the migration commit (6) because the
loop didn't surface usage yet. Now recordTokenUsage + agentTokenUsage24h
get real numbers again, and the mana_ai_tokens_used_total Prometheus
counter is accurate.

Additive only: consumers without usage needs ignore the new field,
and providers that don't return usage produce zeros (not undefined —
the loop still exposes the object so downstream branches stay trivial).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-20 18:21:34 +02:00
parent b878ecfe1c
commit 0d613e1846
6 changed files with 59 additions and 5 deletions

View file

@ -105,8 +105,17 @@ export function createManaLlmClient(opts: ManaLlmClientOptions = {}): LlmClient
const content = choice.message?.content ?? null;
const toolCalls = (choice.message?.tool_calls ?? []).map(fromWireToolCall);
const finishReason = normaliseFinishReason(choice.finish_reason);
const usage = data.usage
? {
promptTokens: data.usage.prompt_tokens ?? 0,
completionTokens: data.usage.completion_tokens ?? 0,
totalTokens:
data.usage.total_tokens ??
(data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0),
}
: undefined;
return { content, toolCalls, finishReason };
return { content, toolCalls, finishReason, usage };
},
};
}
@ -153,6 +162,11 @@ interface ChatCompletionResponseShape {
};
finish_reason?: string | null;
}>;
usage?: {
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
};
}
function fromWireToolCall(raw: {

View file

@ -76,6 +76,7 @@ export type {
ResolvedInput,
SystemPromptInput,
SystemPromptOutput,
TokenUsage,
ToolCallRequest,
ToolResult,
} from './planner';

View file

@ -23,6 +23,7 @@ export type {
LoopStopReason,
PlannerLoopInput,
PlannerLoopResult,
TokenUsage,
ToolCallRequest,
ToolResult,
} from './loop';

View file

@ -48,10 +48,19 @@ export interface LlmCompletionRequest {
export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter';
export interface TokenUsage {
readonly promptTokens: number;
readonly completionTokens: number;
readonly totalTokens: number;
}
export interface LlmCompletionResponse {
readonly content: string | null;
readonly toolCalls: readonly ToolCallRequest[];
readonly finishReason: LlmFinishReason;
/** Token counts for this one call propagated from the provider
* response when available. Summed across rounds in PlannerLoopResult. */
readonly usage?: TokenUsage;
}
export interface LlmClient {
@ -95,6 +104,10 @@ export interface PlannerLoopResult {
* every assistant/tool turn). Never synced contains decrypted
* user content. */
readonly messages: readonly ChatMessage[];
/** Accumulated token usage across every LLM round. Zero counts when
* the provider didn't report usage. Consumers use this for budget
* tracking (mana-ai's per-agent daily limit) and cost telemetry. */
readonly usage: TokenUsage;
}
// ─── The loop ───────────────────────────────────────────────────────
@ -124,6 +137,8 @@ export async function runPlannerLoop(opts: {
let summary: string | null = null;
let stopReason: LoopStopReason = 'max-rounds';
let rounds = 0;
let promptTokens = 0;
let completionTokens = 0;
while (rounds < maxRounds) {
rounds++;
@ -134,6 +149,11 @@ export async function runPlannerLoop(opts: {
temperature: input.temperature,
});
if (response.usage) {
promptTokens += response.usage.promptTokens;
completionTokens += response.usage.completionTokens;
}
// Append the assistant turn to history before we execute any
// tools — the LLM needs to see its own prior tool_calls alongside
// the tool-message results in the next turn.
@ -181,5 +201,10 @@ export async function runPlannerLoop(opts: {
summary,
stopReason,
messages,
usage: {
promptTokens,
completionTokens,
totalTokens: promptTokens + completionTokens,
},
};
}

View file

@ -343,10 +343,7 @@ async function planOneMission(
rationale: '',
})),
},
// TODO: extract token usage from the loop's trailing LLM
// message once the client exposes it (currently 0 — budget
// enforcement on the server is effectively disabled).
tokensUsed: 0,
tokensUsed: loopResult.usage.totalTokens,
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);

View file

@ -73,10 +73,21 @@ export function createServerLlmClient(opts: ServerLlmClientOptions): LlmClient {
const choice = data.choices?.[0];
if (!choice) throw new Error('mana-llm response had no choices');
const usage = data.usage
? {
promptTokens: data.usage.prompt_tokens ?? 0,
completionTokens: data.usage.completion_tokens ?? 0,
totalTokens:
data.usage.total_tokens ??
(data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0),
}
: undefined;
return {
content: choice.message?.content ?? null,
toolCalls: (choice.message?.tool_calls ?? []).map(fromWireToolCall),
finishReason: normaliseFinishReason(choice.finish_reason),
usage,
};
},
};
@ -121,6 +132,11 @@ interface ChatCompletionResponseShape {
};
finish_reason?: string | null;
}>;
usage?: {
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
};
}
function fromWireToolCall(raw: {