feat(ai): thread TokenUsage through runPlannerLoop → mana-ai budget

Carries per-round token counts from the mana-llm response body (prompt_tokens + completion_tokens) back through LlmCompletionResponse → PlannerLoopResult. The loop sums across rounds and exposes a single aggregate on result.usage. Lets mana-ai's tick re-activate per-agent daily-token budget tracking — tokensUsed was stubbed to 0 in the migration commit (6) because the loop didn't surface usage yet. Now recordTokenUsage + agentTokenUsage24h get real numbers again, and the mana_ai_tokens_used_total Prometheus counter is accurate. Additive only: consumers without usage needs ignore the new field, and providers that don't return usage produce zeros (not undefined — the loop still exposes the object so downstream branches stay trivial). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 19:01:08 +02:00 · 2026-04-20 18:21:34 +02:00 · 2026-04-20 18:21:34 +02:00 · 0d613e1846
commit 0d613e1846
parent b878ecfe1c
6 changed files with 59 additions and 5 deletions
--- a/services/mana-ai/src/cron/tick.ts
+++ b/services/mana-ai/src/cron/tick.ts
@ -343,10 +343,7 @@ async function planOneMission(
 					rationale: '',
 				})),
 			},
-			// TODO: extract token usage from the loop's trailing LLM
-			// message once the client exposes it (currently 0 — budget
-			// enforcement on the server is effectively disabled).
-			tokensUsed: 0,
+			tokensUsed: loopResult.usage.totalTokens,
 		};
 	} catch (err) {
 		const msg = err instanceof Error ? err.message : String(err);
--- a/services/mana-ai/src/planner/llm-client.ts
+++ b/services/mana-ai/src/planner/llm-client.ts
@ -73,10 +73,21 @@ export function createServerLlmClient(opts: ServerLlmClientOptions): LlmClient {
 			const choice = data.choices?.[0];
 			if (!choice) throw new Error('mana-llm response had no choices');

+			const usage = data.usage
+				? {
+						promptTokens: data.usage.prompt_tokens ?? 0,
+						completionTokens: data.usage.completion_tokens ?? 0,
+						totalTokens:
+							data.usage.total_tokens ??
+							(data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0),
+					}
+				: undefined;
+
 			return {
 				content: choice.message?.content ?? null,
 				toolCalls: (choice.message?.tool_calls ?? []).map(fromWireToolCall),
 				finishReason: normaliseFinishReason(choice.finish_reason),
+				usage,
 			};
 		},
 	};
@ -121,6 +132,11 @@ interface ChatCompletionResponseShape {
 		};
 		finish_reason?: string | null;
 	}>;
+	usage?: {
+		prompt_tokens?: number;
+		completion_tokens?: number;
+		total_tokens?: number;
+	};
 }

 function fromWireToolCall(raw: {