From 0d613e18466f60d76dd8e8e3113f5392330ca099 Mon Sep 17 00:00:00 2001 From: Till JS Date: Mon, 20 Apr 2026 18:21:34 +0200 Subject: [PATCH] =?UTF-8?q?feat(ai):=20thread=20TokenUsage=20through=20run?= =?UTF-8?q?PlannerLoop=20=E2=86=92=20mana-ai=20budget?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Carries per-round token counts from the mana-llm response body (prompt_tokens + completion_tokens) back through LlmCompletionResponse → PlannerLoopResult. The loop sums across rounds and exposes a single aggregate on result.usage. Lets mana-ai's tick re-activate per-agent daily-token budget tracking — tokensUsed was stubbed to 0 in the migration commit (6) because the loop didn't surface usage yet. Now recordTokenUsage + agentTokenUsage24h get real numbers again, and the mana_ai_tokens_used_total Prometheus counter is accurate. Additive only: consumers without usage needs ignore the new field, and providers that don't return usage produce zeros (not undefined — the loop still exposes the object so downstream branches stay trivial). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/lib/data/ai/missions/llm-client.ts | 16 +++++++++++- packages/shared-ai/src/index.ts | 1 + packages/shared-ai/src/planner/index.ts | 1 + packages/shared-ai/src/planner/loop.ts | 25 +++++++++++++++++++ services/mana-ai/src/cron/tick.ts | 5 +--- services/mana-ai/src/planner/llm-client.ts | 16 ++++++++++++ 6 files changed, 59 insertions(+), 5 deletions(-) diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts b/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts index 04f24afd4..b1adcbbf9 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/llm-client.ts @@ -105,8 +105,17 @@ export function createManaLlmClient(opts: ManaLlmClientOptions = {}): LlmClient const content = choice.message?.content ?? null; const toolCalls = (choice.message?.tool_calls ?? []).map(fromWireToolCall); const finishReason = normaliseFinishReason(choice.finish_reason); + const usage = data.usage + ? { + promptTokens: data.usage.prompt_tokens ?? 0, + completionTokens: data.usage.completion_tokens ?? 0, + totalTokens: + data.usage.total_tokens ?? + (data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0), + } + : undefined; - return { content, toolCalls, finishReason }; + return { content, toolCalls, finishReason, usage }; }, }; } @@ -153,6 +162,11 @@ interface ChatCompletionResponseShape { }; finish_reason?: string | null; }>; + usage?: { + prompt_tokens?: number; + completion_tokens?: number; + total_tokens?: number; + }; } function fromWireToolCall(raw: { diff --git a/packages/shared-ai/src/index.ts b/packages/shared-ai/src/index.ts index 3c236f157..e4ca0bc05 100644 --- a/packages/shared-ai/src/index.ts +++ b/packages/shared-ai/src/index.ts @@ -76,6 +76,7 @@ export type { ResolvedInput, SystemPromptInput, SystemPromptOutput, + TokenUsage, ToolCallRequest, ToolResult, } from './planner'; diff --git a/packages/shared-ai/src/planner/index.ts b/packages/shared-ai/src/planner/index.ts index a7cfd8127..b432997b8 100644 --- a/packages/shared-ai/src/planner/index.ts +++ b/packages/shared-ai/src/planner/index.ts @@ -23,6 +23,7 @@ export type { LoopStopReason, PlannerLoopInput, PlannerLoopResult, + TokenUsage, ToolCallRequest, ToolResult, } from './loop'; diff --git a/packages/shared-ai/src/planner/loop.ts b/packages/shared-ai/src/planner/loop.ts index 9ac9057ef..c9e6b84e1 100644 --- a/packages/shared-ai/src/planner/loop.ts +++ b/packages/shared-ai/src/planner/loop.ts @@ -48,10 +48,19 @@ export interface LlmCompletionRequest { export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter'; +export interface TokenUsage { + readonly promptTokens: number; + readonly completionTokens: number; + readonly totalTokens: number; +} + export interface LlmCompletionResponse { readonly content: string | null; readonly toolCalls: readonly ToolCallRequest[]; readonly finishReason: LlmFinishReason; + /** Token counts for this one call — propagated from the provider + * response when available. Summed across rounds in PlannerLoopResult. */ + readonly usage?: TokenUsage; } export interface LlmClient { @@ -95,6 +104,10 @@ export interface PlannerLoopResult { * every assistant/tool turn). Never synced — contains decrypted * user content. */ readonly messages: readonly ChatMessage[]; + /** Accumulated token usage across every LLM round. Zero counts when + * the provider didn't report usage. Consumers use this for budget + * tracking (mana-ai's per-agent daily limit) and cost telemetry. */ + readonly usage: TokenUsage; } // ─── The loop ─────────────────────────────────────────────────────── @@ -124,6 +137,8 @@ export async function runPlannerLoop(opts: { let summary: string | null = null; let stopReason: LoopStopReason = 'max-rounds'; let rounds = 0; + let promptTokens = 0; + let completionTokens = 0; while (rounds < maxRounds) { rounds++; @@ -134,6 +149,11 @@ export async function runPlannerLoop(opts: { temperature: input.temperature, }); + if (response.usage) { + promptTokens += response.usage.promptTokens; + completionTokens += response.usage.completionTokens; + } + // Append the assistant turn to history before we execute any // tools — the LLM needs to see its own prior tool_calls alongside // the tool-message results in the next turn. @@ -181,5 +201,10 @@ export async function runPlannerLoop(opts: { summary, stopReason, messages, + usage: { + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }, }; } diff --git a/services/mana-ai/src/cron/tick.ts b/services/mana-ai/src/cron/tick.ts index 0b2ba708b..fa4db79a1 100644 --- a/services/mana-ai/src/cron/tick.ts +++ b/services/mana-ai/src/cron/tick.ts @@ -343,10 +343,7 @@ async function planOneMission( rationale: '', })), }, - // TODO: extract token usage from the loop's trailing LLM - // message once the client exposes it (currently 0 — budget - // enforcement on the server is effectively disabled). - tokensUsed: 0, + tokensUsed: loopResult.usage.totalTokens, }; } catch (err) { const msg = err instanceof Error ? err.message : String(err); diff --git a/services/mana-ai/src/planner/llm-client.ts b/services/mana-ai/src/planner/llm-client.ts index d1e466bbb..6b37d0a26 100644 --- a/services/mana-ai/src/planner/llm-client.ts +++ b/services/mana-ai/src/planner/llm-client.ts @@ -73,10 +73,21 @@ export function createServerLlmClient(opts: ServerLlmClientOptions): LlmClient { const choice = data.choices?.[0]; if (!choice) throw new Error('mana-llm response had no choices'); + const usage = data.usage + ? { + promptTokens: data.usage.prompt_tokens ?? 0, + completionTokens: data.usage.completion_tokens ?? 0, + totalTokens: + data.usage.total_tokens ?? + (data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0), + } + : undefined; + return { content: choice.message?.content ?? null, toolCalls: (choice.message?.tool_calls ?? []).map(fromWireToolCall), finishReason: normaliseFinishReason(choice.finish_reason), + usage, }; }, }; @@ -121,6 +132,11 @@ interface ChatCompletionResponseShape { }; finish_reason?: string | null; }>; + usage?: { + prompt_tokens?: number; + completion_tokens?: number; + total_tokens?: number; + }; } function fromWireToolCall(raw: {