feat(ai): thread TokenUsage through runPlannerLoop → mana-ai budget

Carries per-round token counts from the mana-llm response body
(prompt_tokens + completion_tokens) back through LlmCompletionResponse
→ PlannerLoopResult. The loop sums across rounds and exposes a single
aggregate on result.usage.

Lets mana-ai's tick re-activate per-agent daily-token budget tracking
— tokensUsed was stubbed to 0 in the migration commit (6) because the
loop didn't surface usage yet. Now recordTokenUsage + agentTokenUsage24h
get real numbers again, and the mana_ai_tokens_used_total Prometheus
counter is accurate.

Additive only: consumers without usage needs ignore the new field,
and providers that don't return usage produce zeros (not undefined —
the loop still exposes the object so downstream branches stay trivial).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-20 18:21:34 +02:00
parent b878ecfe1c
commit 0d613e1846
6 changed files with 59 additions and 5 deletions

View file

@ -343,10 +343,7 @@ async function planOneMission(
rationale: '',
})),
},
// TODO: extract token usage from the loop's trailing LLM
// message once the client exposes it (currently 0 — budget
// enforcement on the server is effectively disabled).
tokensUsed: 0,
tokensUsed: loopResult.usage.totalTokens,
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);

View file

@ -73,10 +73,21 @@ export function createServerLlmClient(opts: ServerLlmClientOptions): LlmClient {
const choice = data.choices?.[0];
if (!choice) throw new Error('mana-llm response had no choices');
const usage = data.usage
? {
promptTokens: data.usage.prompt_tokens ?? 0,
completionTokens: data.usage.completion_tokens ?? 0,
totalTokens:
data.usage.total_tokens ??
(data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0),
}
: undefined;
return {
content: choice.message?.content ?? null,
toolCalls: (choice.message?.tool_calls ?? []).map(fromWireToolCall),
finishReason: normaliseFinishReason(choice.finish_reason),
usage,
};
},
};
@ -121,6 +132,11 @@ interface ChatCompletionResponseShape {
};
finish_reason?: string | null;
}>;
usage?: {
prompt_tokens?: number;
completion_tokens?: number;
total_tokens?: number;
};
}
function fromWireToolCall(raw: {