diff --git a/services/mana-ai/src/cron/tick.ts b/services/mana-ai/src/cron/tick.ts index fa4db79a1..d62ffa24f 100644 --- a/services/mana-ai/src/cron/tick.ts +++ b/services/mana-ai/src/cron/tick.ts @@ -28,7 +28,7 @@ import { listDueMissions, type ServerMission } from '../db/missions-projection'; import { loadActiveAgents, refreshAgentSnapshots, type ServerAgent } from '../db/agents-projection'; import { appendServerIteration, planToIteration } from '../db/iteration-writer'; import { refreshSnapshots } from '../db/snapshot-refresh'; -import { createServerLlmClient } from '../planner/llm-client'; +import { createServerLlmClient, ProviderCallError } from '../planner/llm-client'; import { SERVER_TOOLS } from '../planner/tools'; import { ticksTotal, @@ -43,6 +43,9 @@ import { grantSkipsTotal, agentDecisionsTotal, tokensUsedTotal, + toolCallsTotal, + plannerRoundsHistogram, + providerErrorsTotal, } from '../metrics'; import { unwrapMissionGrant } from '../crypto/unwrap-grant'; import { NewsResearchClient } from '../planner/news-research-client'; @@ -333,6 +336,21 @@ async function planOneMission( }), }); + // Observability: one counter tick per tool_call + one histogram + // sample for round consumption. `policy` is pulled off the + // catalog entry so a later change to Gemini-default flipping + // auto→propose would show up in the labels without code changes. + plannerRoundsHistogram.observe(loopResult.rounds); + for (const ec of loopResult.executedCalls) { + const catalogEntry = SERVER_TOOLS.find((t) => t.name === ec.call.name); + const policy = catalogEntry?.defaultPolicy ?? 'propose'; + // Server-side execution is always deferred to the client — + // the onToolCall stub returns success without running + // anything. Real execution metrics will come from the + // webapp runner once it emits its own Prom surface. + toolCallsTotal.inc({ tool: ec.call.name, policy, outcome: 'deferred' }); + } + return { plan: { summary: loopResult.summary ?? '', @@ -347,11 +365,23 @@ async function planOneMission( }; } catch (err) { const msg = err instanceof Error ? err.message : String(err); + if (err instanceof ProviderCallError) { + const provider = inferProviderFromModel('google/gemini-2.5-flash'); + providerErrorsTotal.inc({ provider, kind: err.kind }); + } console.warn(`[mana-ai tick] mission=${m.id} planner loop failed: ${msg}`); return null; } } +/** Parse provider name off a `provider/model` string. Used purely for + * metric labelling — falls back to `'unknown'` so a misconfigured + * model id doesn't crash the counter. */ +function inferProviderFromModel(model: string): string { + const [provider] = model.split('/', 1); + return provider || 'unknown'; +} + /** * Drop tools the agent's policy denies so the Planner never sees a tool * it can't use. `propose` and `auto` stay (but the server only hands the diff --git a/services/mana-ai/src/metrics.ts b/services/mana-ai/src/metrics.ts index 32fae4f90..5520e38da 100644 --- a/services/mana-ai/src/metrics.ts +++ b/services/mana-ai/src/metrics.ts @@ -154,3 +154,55 @@ export const tokensUsedTotal = new Counter({ labelNames: ['agent_id'] as const, registers: [register], }); + +// ── Function-Calling Planner (post-migration) ──────────── + +/** + * Per-tool outcome counter. + * + * `policy` is the catalog default (auto / propose) — the server-side + * surface offers only propose-tools, so in practice this is always + * `propose`, but the label stays for forward-compatibility with + * a future web-runner integration. + * + * `outcome` values: + * - `success` — the onToolCall callback returned `success: true` + * (used in environments that actually execute) + * - `failure` — onToolCall returned `success: false` + * - `deferred` — the server-side stub; the tool_call is recorded + * for client-side application on sync (the ONLY + * value the mana-ai tick emits today) + */ +export const toolCallsTotal = new Counter({ + name: 'mana_ai_tool_calls_total', + help: 'Total tool_calls produced by the planner and handled.', + labelNames: ['tool', 'policy', 'outcome'] as const, + registers: [register], +}); + +/** + * Distribution of how many planner rounds a single iteration consumed. + * 1 = LLM went straight to a terminal answer; runs close to the hard + * cap (5) mean the planner is struggling. Buckets line up with the + * fixed 5-round ceiling so Grafana's heatmap is trivially readable. + */ +export const plannerRoundsHistogram = new Histogram({ + name: 'mana_ai_planner_rounds', + help: 'Number of reasoning rounds consumed per iteration.', + buckets: [1, 2, 3, 4, 5], + registers: [register], +}); + +/** + * Structured provider errors returned from mana-llm. `kind` mirrors + * the ProviderError hierarchy in services/mana-llm/src/providers/errors.py + * (blocked / truncated / auth / rate_limit / capability / unknown). + * `provider` is inferred from the model id (google / openrouter / + * ollama / …). + */ +export const providerErrorsTotal = new Counter({ + name: 'mana_ai_provider_errors_total', + help: 'Structured provider errors surfaced from mana-llm.', + labelNames: ['provider', 'kind'] as const, + registers: [register], +}); diff --git a/services/mana-ai/src/planner/llm-client.ts b/services/mana-ai/src/planner/llm-client.ts index 6b37d0a26..81f12bbbd 100644 --- a/services/mana-ai/src/planner/llm-client.ts +++ b/services/mana-ai/src/planner/llm-client.ts @@ -16,6 +16,20 @@ import type { ToolCallRequest, } from '@mana/shared-ai'; +/** Thrown when mana-llm returns a non-2xx status. ``kind`` mirrors the + * structured ProviderError vocabulary (blocked / truncated / auth / + * rate_limit / capability / unknown) so downstream metrics can label + * without re-parsing the message. */ +export class ProviderCallError extends Error { + constructor( + message: string, + public readonly kind: string + ) { + super(message); + this.name = 'ProviderCallError'; + } +} + export interface ServerLlmClientOptions { readonly baseUrl: string; readonly serviceKey: string; @@ -65,8 +79,34 @@ export function createServerLlmClient(opts: ServerLlmClientOptions): LlmClient { clearTimeout(timeout); if (!res.ok) { - const detail = await res.text().catch(() => ''); - throw new Error(`mana-llm ${res.status}: ${detail.slice(0, 500)}`); + // mana-llm surfaces structured errors from the provider + // layer (see services/mana-llm/src/providers/errors.py): + // `{ detail: { kind, message } }` for 422 / 429 / 502 / + // 400, plain string detail for everything else. Preserve + // `kind` on the thrown error so callers (tick metrics) + // can label provider_errors_total without re-parsing. + let kind = 'unknown'; + let message = `mana-llm ${res.status}`; + try { + const body = (await res.json()) as { + detail?: string | { kind?: string; message?: string }; + }; + if (typeof body.detail === 'string') { + message = `${message}: ${body.detail.slice(0, 500)}`; + } else if (body.detail && typeof body.detail === 'object') { + kind = body.detail.kind ?? 'unknown'; + message = `${message} (${kind}): ${body.detail.message ?? ''}`; + } + } catch { + // body wasn't JSON — fall back to plain text + try { + const text = await res.text(); + if (text) message = `${message}: ${text.slice(0, 500)}`; + } catch { + /* already exhausted body stream */ + } + } + throw new ProviderCallError(message, kind); } const data = (await res.json()) as ChatCompletionResponseShape;