mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-17 10:39:40 +02:00
feat(ai): thread TokenUsage through runPlannerLoop → mana-ai budget
Carries per-round token counts from the mana-llm response body (prompt_tokens + completion_tokens) back through LlmCompletionResponse → PlannerLoopResult. The loop sums across rounds and exposes a single aggregate on result.usage. Lets mana-ai's tick re-activate per-agent daily-token budget tracking — tokensUsed was stubbed to 0 in the migration commit (6) because the loop didn't surface usage yet. Now recordTokenUsage + agentTokenUsage24h get real numbers again, and the mana_ai_tokens_used_total Prometheus counter is accurate. Additive only: consumers without usage needs ignore the new field, and providers that don't return usage produce zeros (not undefined — the loop still exposes the object so downstream branches stay trivial). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b878ecfe1c
commit
0d613e1846
6 changed files with 59 additions and 5 deletions
|
|
@ -105,8 +105,17 @@ export function createManaLlmClient(opts: ManaLlmClientOptions = {}): LlmClient
|
|||
const content = choice.message?.content ?? null;
|
||||
const toolCalls = (choice.message?.tool_calls ?? []).map(fromWireToolCall);
|
||||
const finishReason = normaliseFinishReason(choice.finish_reason);
|
||||
const usage = data.usage
|
||||
? {
|
||||
promptTokens: data.usage.prompt_tokens ?? 0,
|
||||
completionTokens: data.usage.completion_tokens ?? 0,
|
||||
totalTokens:
|
||||
data.usage.total_tokens ??
|
||||
(data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0),
|
||||
}
|
||||
: undefined;
|
||||
|
||||
return { content, toolCalls, finishReason };
|
||||
return { content, toolCalls, finishReason, usage };
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -153,6 +162,11 @@ interface ChatCompletionResponseShape {
|
|||
};
|
||||
finish_reason?: string | null;
|
||||
}>;
|
||||
usage?: {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
total_tokens?: number;
|
||||
};
|
||||
}
|
||||
|
||||
function fromWireToolCall(raw: {
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ export type {
|
|||
ResolvedInput,
|
||||
SystemPromptInput,
|
||||
SystemPromptOutput,
|
||||
TokenUsage,
|
||||
ToolCallRequest,
|
||||
ToolResult,
|
||||
} from './planner';
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ export type {
|
|||
LoopStopReason,
|
||||
PlannerLoopInput,
|
||||
PlannerLoopResult,
|
||||
TokenUsage,
|
||||
ToolCallRequest,
|
||||
ToolResult,
|
||||
} from './loop';
|
||||
|
|
|
|||
|
|
@ -48,10 +48,19 @@ export interface LlmCompletionRequest {
|
|||
|
||||
export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter';
|
||||
|
||||
export interface TokenUsage {
|
||||
readonly promptTokens: number;
|
||||
readonly completionTokens: number;
|
||||
readonly totalTokens: number;
|
||||
}
|
||||
|
||||
export interface LlmCompletionResponse {
|
||||
readonly content: string | null;
|
||||
readonly toolCalls: readonly ToolCallRequest[];
|
||||
readonly finishReason: LlmFinishReason;
|
||||
/** Token counts for this one call — propagated from the provider
|
||||
* response when available. Summed across rounds in PlannerLoopResult. */
|
||||
readonly usage?: TokenUsage;
|
||||
}
|
||||
|
||||
export interface LlmClient {
|
||||
|
|
@ -95,6 +104,10 @@ export interface PlannerLoopResult {
|
|||
* every assistant/tool turn). Never synced — contains decrypted
|
||||
* user content. */
|
||||
readonly messages: readonly ChatMessage[];
|
||||
/** Accumulated token usage across every LLM round. Zero counts when
|
||||
* the provider didn't report usage. Consumers use this for budget
|
||||
* tracking (mana-ai's per-agent daily limit) and cost telemetry. */
|
||||
readonly usage: TokenUsage;
|
||||
}
|
||||
|
||||
// ─── The loop ───────────────────────────────────────────────────────
|
||||
|
|
@ -124,6 +137,8 @@ export async function runPlannerLoop(opts: {
|
|||
let summary: string | null = null;
|
||||
let stopReason: LoopStopReason = 'max-rounds';
|
||||
let rounds = 0;
|
||||
let promptTokens = 0;
|
||||
let completionTokens = 0;
|
||||
|
||||
while (rounds < maxRounds) {
|
||||
rounds++;
|
||||
|
|
@ -134,6 +149,11 @@ export async function runPlannerLoop(opts: {
|
|||
temperature: input.temperature,
|
||||
});
|
||||
|
||||
if (response.usage) {
|
||||
promptTokens += response.usage.promptTokens;
|
||||
completionTokens += response.usage.completionTokens;
|
||||
}
|
||||
|
||||
// Append the assistant turn to history before we execute any
|
||||
// tools — the LLM needs to see its own prior tool_calls alongside
|
||||
// the tool-message results in the next turn.
|
||||
|
|
@ -181,5 +201,10 @@ export async function runPlannerLoop(opts: {
|
|||
summary,
|
||||
stopReason,
|
||||
messages,
|
||||
usage: {
|
||||
promptTokens,
|
||||
completionTokens,
|
||||
totalTokens: promptTokens + completionTokens,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -343,10 +343,7 @@ async function planOneMission(
|
|||
rationale: '',
|
||||
})),
|
||||
},
|
||||
// TODO: extract token usage from the loop's trailing LLM
|
||||
// message once the client exposes it (currently 0 — budget
|
||||
// enforcement on the server is effectively disabled).
|
||||
tokensUsed: 0,
|
||||
tokensUsed: loopResult.usage.totalTokens,
|
||||
};
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
|
|
|
|||
|
|
@ -73,10 +73,21 @@ export function createServerLlmClient(opts: ServerLlmClientOptions): LlmClient {
|
|||
const choice = data.choices?.[0];
|
||||
if (!choice) throw new Error('mana-llm response had no choices');
|
||||
|
||||
const usage = data.usage
|
||||
? {
|
||||
promptTokens: data.usage.prompt_tokens ?? 0,
|
||||
completionTokens: data.usage.completion_tokens ?? 0,
|
||||
totalTokens:
|
||||
data.usage.total_tokens ??
|
||||
(data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0),
|
||||
}
|
||||
: undefined;
|
||||
|
||||
return {
|
||||
content: choice.message?.content ?? null,
|
||||
toolCalls: (choice.message?.tool_calls ?? []).map(fromWireToolCall),
|
||||
finishReason: normaliseFinishReason(choice.finish_reason),
|
||||
usage,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
|
@ -121,6 +132,11 @@ interface ChatCompletionResponseShape {
|
|||
};
|
||||
finish_reason?: string | null;
|
||||
}>;
|
||||
usage?: {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
total_tokens?: number;
|
||||
};
|
||||
}
|
||||
|
||||
function fromWireToolCall(raw: {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue