From 0d613e18466f60d76dd8e8e3113f5392330ca099 Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Mon, 20 Apr 2026 18:21:34 +0200
Subject: [PATCH] =?UTF-8?q?feat(ai):=20thread=20TokenUsage=20through=20run?=
 =?UTF-8?q?PlannerLoop=20=E2=86=92=20mana-ai=20budget?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Carries per-round token counts from the mana-llm response body
(prompt_tokens + completion_tokens) back through LlmCompletionResponse
→ PlannerLoopResult. The loop sums across rounds and exposes a single
aggregate on result.usage.

Lets mana-ai's tick re-activate per-agent daily-token budget tracking
— tokensUsed was stubbed to 0 in the migration commit (6) because the
loop didn't surface usage yet. Now recordTokenUsage + agentTokenUsage24h
get real numbers again, and the mana_ai_tokens_used_total Prometheus
counter is accurate.

Additive only: consumers without usage needs ignore the new field,
and providers that don't return usage produce zeros (not undefined —
the loop still exposes the object so downstream branches stay trivial).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../src/lib/data/ai/missions/llm-client.ts    | 16 +++++++++++-
 packages/shared-ai/src/index.ts               |  1 +
 packages/shared-ai/src/planner/index.ts       |  1 +
 packages/shared-ai/src/planner/loop.ts        | 25 +++++++++++++++++++
 services/mana-ai/src/cron/tick.ts             |  5 +---
 services/mana-ai/src/planner/llm-client.ts    | 16 ++++++++++++
 6 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts b/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts
index 04f24afd4..b1adcbbf9 100644
--- a/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts
@@ -105,8 +105,17 @@ export function createManaLlmClient(opts: ManaLlmClientOptions = {}): LlmClient
 			const content = choice.message?.content ?? null;
 			const toolCalls = (choice.message?.tool_calls ?? []).map(fromWireToolCall);
 			const finishReason = normaliseFinishReason(choice.finish_reason);
+			const usage = data.usage
+				? {
+						promptTokens: data.usage.prompt_tokens ?? 0,
+						completionTokens: data.usage.completion_tokens ?? 0,
+						totalTokens:
+							data.usage.total_tokens ??
+							(data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0),
+					}
+				: undefined;
 
-			return { content, toolCalls, finishReason };
+			return { content, toolCalls, finishReason, usage };
 		},
 	};
 }
@@ -153,6 +162,11 @@ interface ChatCompletionResponseShape {
 		};
 		finish_reason?: string | null;
 	}>;
+	usage?: {
+		prompt_tokens?: number;
+		completion_tokens?: number;
+		total_tokens?: number;
+	};
 }
 
 function fromWireToolCall(raw: {
diff --git a/packages/shared-ai/src/index.ts b/packages/shared-ai/src/index.ts
index 3c236f157..e4ca0bc05 100644
--- a/packages/shared-ai/src/index.ts
+++ b/packages/shared-ai/src/index.ts
@@ -76,6 +76,7 @@ export type {
 	ResolvedInput,
 	SystemPromptInput,
 	SystemPromptOutput,
+	TokenUsage,
 	ToolCallRequest,
 	ToolResult,
 } from './planner';
diff --git a/packages/shared-ai/src/planner/index.ts b/packages/shared-ai/src/planner/index.ts
index a7cfd8127..b432997b8 100644
--- a/packages/shared-ai/src/planner/index.ts
+++ b/packages/shared-ai/src/planner/index.ts
@@ -23,6 +23,7 @@ export type {
 	LoopStopReason,
 	PlannerLoopInput,
 	PlannerLoopResult,
+	TokenUsage,
 	ToolCallRequest,
 	ToolResult,
 } from './loop';
diff --git a/packages/shared-ai/src/planner/loop.ts b/packages/shared-ai/src/planner/loop.ts
index 9ac9057ef..c9e6b84e1 100644
--- a/packages/shared-ai/src/planner/loop.ts
+++ b/packages/shared-ai/src/planner/loop.ts
@@ -48,10 +48,19 @@ export interface LlmCompletionRequest {
 
 export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter';
 
+export interface TokenUsage {
+	readonly promptTokens: number;
+	readonly completionTokens: number;
+	readonly totalTokens: number;
+}
+
 export interface LlmCompletionResponse {
 	readonly content: string | null;
 	readonly toolCalls: readonly ToolCallRequest[];
 	readonly finishReason: LlmFinishReason;
+	/** Token counts for this one call — propagated from the provider
+	 *  response when available. Summed across rounds in PlannerLoopResult. */
+	readonly usage?: TokenUsage;
 }
 
 export interface LlmClient {
@@ -95,6 +104,10 @@ export interface PlannerLoopResult {
 	 *  every assistant/tool turn). Never synced — contains decrypted
 	 *  user content. */
 	readonly messages: readonly ChatMessage[];
+	/** Accumulated token usage across every LLM round. Zero counts when
+	 *  the provider didn't report usage. Consumers use this for budget
+	 *  tracking (mana-ai's per-agent daily limit) and cost telemetry. */
+	readonly usage: TokenUsage;
 }
 
 // ─── The loop ───────────────────────────────────────────────────────
@@ -124,6 +137,8 @@ export async function runPlannerLoop(opts: {
 	let summary: string | null = null;
 	let stopReason: LoopStopReason = 'max-rounds';
 	let rounds = 0;
+	let promptTokens = 0;
+	let completionTokens = 0;
 
 	while (rounds < maxRounds) {
 		rounds++;
@@ -134,6 +149,11 @@ export async function runPlannerLoop(opts: {
 			temperature: input.temperature,
 		});
 
+		if (response.usage) {
+			promptTokens += response.usage.promptTokens;
+			completionTokens += response.usage.completionTokens;
+		}
+
 		// Append the assistant turn to history before we execute any
 		// tools — the LLM needs to see its own prior tool_calls alongside
 		// the tool-message results in the next turn.
@@ -181,5 +201,10 @@ export async function runPlannerLoop(opts: {
 		summary,
 		stopReason,
 		messages,
+		usage: {
+			promptTokens,
+			completionTokens,
+			totalTokens: promptTokens + completionTokens,
+		},
 	};
 }
diff --git a/services/mana-ai/src/cron/tick.ts b/services/mana-ai/src/cron/tick.ts
index 0b2ba708b..fa4db79a1 100644
--- a/services/mana-ai/src/cron/tick.ts
+++ b/services/mana-ai/src/cron/tick.ts
@@ -343,10 +343,7 @@ async function planOneMission(
 					rationale: '',
 				})),
 			},
-			// TODO: extract token usage from the loop's trailing LLM
-			// message once the client exposes it (currently 0 — budget
-			// enforcement on the server is effectively disabled).
-			tokensUsed: 0,
+			tokensUsed: loopResult.usage.totalTokens,
 		};
 	} catch (err) {
 		const msg = err instanceof Error ? err.message : String(err);
diff --git a/services/mana-ai/src/planner/llm-client.ts b/services/mana-ai/src/planner/llm-client.ts
index d1e466bbb..6b37d0a26 100644
--- a/services/mana-ai/src/planner/llm-client.ts
+++ b/services/mana-ai/src/planner/llm-client.ts
@@ -73,10 +73,21 @@ export function createServerLlmClient(opts: ServerLlmClientOptions): LlmClient {
 			const choice = data.choices?.[0];
 			if (!choice) throw new Error('mana-llm response had no choices');
 
+			const usage = data.usage
+				? {
+						promptTokens: data.usage.prompt_tokens ?? 0,
+						completionTokens: data.usage.completion_tokens ?? 0,
+						totalTokens:
+							data.usage.total_tokens ??
+							(data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0),
+					}
+				: undefined;
+
 			return {
 				content: choice.message?.content ?? null,
 				toolCalls: (choice.message?.tool_calls ?? []).map(fromWireToolCall),
 				finishReason: normaliseFinishReason(choice.finish_reason),
+				usage,
 			};
 		},
 	};
@@ -121,6 +132,11 @@ interface ChatCompletionResponseShape {
 		};
 		finish_reason?: string | null;
 	}>;
+	usage?: {
+		prompt_tokens?: number;
+		completion_tokens?: number;
+		total_tokens?: number;
+	};
 }
 
 function fromWireToolCall(raw: {