feat(ai): thread TokenUsage through runPlannerLoop → mana-ai budget

Carries per-round token counts from the mana-llm response body (prompt_tokens + completion_tokens) back through LlmCompletionResponse → PlannerLoopResult. The loop sums across rounds and exposes a single aggregate on result.usage. Lets mana-ai's tick re-activate per-agent daily-token budget tracking — tokensUsed was stubbed to 0 in the migration commit (6) because the loop didn't surface usage yet. Now recordTokenUsage + agentTokenUsage24h get real numbers again, and the mana_ai_tokens_used_total Prometheus counter is accurate. Additive only: consumers without usage needs ignore the new field, and providers that don't return usage produce zeros (not undefined — the loop still exposes the object so downstream branches stay trivial). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 19:01:08 +02:00 · 2026-04-20 18:21:34 +02:00 · 2026-04-20 18:21:34 +02:00 · 0d613e1846
commit 0d613e1846
parent b878ecfe1c
6 changed files with 59 additions and 5 deletions
--- a/packages/shared-ai/src/index.ts
+++ b/packages/shared-ai/src/index.ts
@ -76,6 +76,7 @@ export type {
 	ResolvedInput,
 	SystemPromptInput,
 	SystemPromptOutput,
+	TokenUsage,
 	ToolCallRequest,
 	ToolResult,
 } from './planner';
--- a/packages/shared-ai/src/planner/index.ts
+++ b/packages/shared-ai/src/planner/index.ts
@ -23,6 +23,7 @@ export type {
 	LoopStopReason,
 	PlannerLoopInput,
 	PlannerLoopResult,
+	TokenUsage,
 	ToolCallRequest,
 	ToolResult,
 } from './loop';
--- a/packages/shared-ai/src/planner/loop.ts
+++ b/packages/shared-ai/src/planner/loop.ts
@ -48,10 +48,19 @@ export interface LlmCompletionRequest {

 export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter';

+export interface TokenUsage {
+	readonly promptTokens: number;
+	readonly completionTokens: number;
+	readonly totalTokens: number;
+}
+
 export interface LlmCompletionResponse {
 	readonly content: string | null;
 	readonly toolCalls: readonly ToolCallRequest[];
 	readonly finishReason: LlmFinishReason;
+	/** Token counts for this one call — propagated from the provider
+	 *  response when available. Summed across rounds in PlannerLoopResult. */
+	readonly usage?: TokenUsage;
 }

 export interface LlmClient {
@ -95,6 +104,10 @@ export interface PlannerLoopResult {
 	 *  every assistant/tool turn). Never synced — contains decrypted
 	 *  user content. */
 	readonly messages: readonly ChatMessage[];
+	/** Accumulated token usage across every LLM round. Zero counts when
+	 *  the provider didn't report usage. Consumers use this for budget
+	 *  tracking (mana-ai's per-agent daily limit) and cost telemetry. */
+	readonly usage: TokenUsage;
 }

 // ─── The loop ───────────────────────────────────────────────────────
@ -124,6 +137,8 @@ export async function runPlannerLoop(opts: {
 	let summary: string | null = null;
 	let stopReason: LoopStopReason = 'max-rounds';
 	let rounds = 0;
+	let promptTokens = 0;
+	let completionTokens = 0;

 	while (rounds < maxRounds) {
 		rounds++;
@ -134,6 +149,11 @@ export async function runPlannerLoop(opts: {
 			temperature: input.temperature,
 		});

+		if (response.usage) {
+			promptTokens += response.usage.promptTokens;
+			completionTokens += response.usage.completionTokens;
+		}
+
 		// Append the assistant turn to history before we execute any
 		// tools — the LLM needs to see its own prior tool_calls alongside
 		// the tool-message results in the next turn.
@ -181,5 +201,10 @@ export async function runPlannerLoop(opts: {
 		summary,
 		stopReason,
 		messages,
+		usage: {
+			promptTokens,
+			completionTokens,
+			totalTokens: promptTokens + completionTokens,
+		},
 	};
 }