feat(ai): thread TokenUsage through runPlannerLoop → mana-ai budget

Carries per-round token counts from the mana-llm response body
(prompt_tokens + completion_tokens) back through LlmCompletionResponse
→ PlannerLoopResult. The loop sums across rounds and exposes a single
aggregate on result.usage.

Lets mana-ai's tick re-activate per-agent daily-token budget tracking
— tokensUsed was stubbed to 0 in the migration commit (6) because the
loop didn't surface usage yet. Now recordTokenUsage + agentTokenUsage24h
get real numbers again, and the mana_ai_tokens_used_total Prometheus
counter is accurate.

Additive only: consumers without usage needs ignore the new field,
and providers that don't return usage produce zeros (not undefined —
the loop still exposes the object so downstream branches stay trivial).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-20 18:21:34 +02:00
parent b878ecfe1c
commit 0d613e1846
6 changed files with 59 additions and 5 deletions

View file

@ -76,6 +76,7 @@ export type {
ResolvedInput,
SystemPromptInput,
SystemPromptOutput,
TokenUsage,
ToolCallRequest,
ToolResult,
} from './planner';

View file

@ -23,6 +23,7 @@ export type {
LoopStopReason,
PlannerLoopInput,
PlannerLoopResult,
TokenUsage,
ToolCallRequest,
ToolResult,
} from './loop';

View file

@ -48,10 +48,19 @@ export interface LlmCompletionRequest {
export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter';
export interface TokenUsage {
readonly promptTokens: number;
readonly completionTokens: number;
readonly totalTokens: number;
}
export interface LlmCompletionResponse {
readonly content: string | null;
readonly toolCalls: readonly ToolCallRequest[];
readonly finishReason: LlmFinishReason;
/** Token counts for this one call propagated from the provider
* response when available. Summed across rounds in PlannerLoopResult. */
readonly usage?: TokenUsage;
}
export interface LlmClient {
@ -95,6 +104,10 @@ export interface PlannerLoopResult {
* every assistant/tool turn). Never synced contains decrypted
* user content. */
readonly messages: readonly ChatMessage[];
/** Accumulated token usage across every LLM round. Zero counts when
* the provider didn't report usage. Consumers use this for budget
* tracking (mana-ai's per-agent daily limit) and cost telemetry. */
readonly usage: TokenUsage;
}
// ─── The loop ───────────────────────────────────────────────────────
@ -124,6 +137,8 @@ export async function runPlannerLoop(opts: {
let summary: string | null = null;
let stopReason: LoopStopReason = 'max-rounds';
let rounds = 0;
let promptTokens = 0;
let completionTokens = 0;
while (rounds < maxRounds) {
rounds++;
@ -134,6 +149,11 @@ export async function runPlannerLoop(opts: {
temperature: input.temperature,
});
if (response.usage) {
promptTokens += response.usage.promptTokens;
completionTokens += response.usage.completionTokens;
}
// Append the assistant turn to history before we execute any
// tools — the LLM needs to see its own prior tool_calls alongside
// the tool-message results in the next turn.
@ -181,5 +201,10 @@ export async function runPlannerLoop(opts: {
summary,
stopReason,
messages,
usage: {
promptTokens,
completionTokens,
totalTokens: promptTokens + completionTokens,
},
};
}