From 3edf680ea0b555a12109be28cc50d9cb389cfa22 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 23 Apr 2026 15:10:27 +0200 Subject: [PATCH] feat(mana-ai): telemetry for reminder producers (mana_ai_reminders_emitted_total) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Producers now return structured {producer, severity, text} objects instead of raw strings. buildReminderChannel collects them, increments mana_ai_reminders_emitted_total{producer, severity} per emission, and maps back to strings for the shared-ai loop input. Why structured: the Prometheus label "severity" lets dashboards split 75-99% token-budget warnings (severity=warn) from 100%+ escalations (severity=escalate) without NLP on the reminder text. Adding a new producer that emits only info-level state (e.g. stale-sync warning) falls out for free. Active producer labels today: - token-budget (warn, escalate) - retry-loop (warn) With this plus the scrape job (d087b4744), we can finally answer: "does the budget warning actually change LLM behaviour?" — correlate reminders_emitted_total{producer='token-budget'} with tick_duration_seconds or planner_rounds_histogram. 3 tests updated to assert the new {producer, severity, text} shape (16 reminder tests total, all green). Co-Authored-By: Claude Opus 4.7 (1M context) --- services/mana-ai/src/metrics.ts | 22 ++++++ .../mana-ai/src/planner/reminders.test.ts | 40 +++++----- services/mana-ai/src/planner/reminders.ts | 78 +++++++++++++------ 3 files changed, 100 insertions(+), 40 deletions(-) diff --git a/services/mana-ai/src/metrics.ts b/services/mana-ai/src/metrics.ts index 7ed3fe84f..747c88820 100644 --- a/services/mana-ai/src/metrics.ts +++ b/services/mana-ai/src/metrics.ts @@ -235,3 +235,25 @@ export const providerErrorsTotal = new Counter({ labelNames: ['provider', 'kind'] as const, registers: [register], }); + +// ── Reminder channel (Claude-Code pattern) ───────── + +/** + * Bumped once per round per producer that returned a non-null string. + * Enables dashboards that answer: "is the token-budget warning + * actually getting through?" (correlate with agent-run duration) and + * "how often do missions hit a retry loop?". + * + * Labels: + * - producer: `token-budget` | `retry-loop` (extended as we add more) + * - severity: `warn` | `escalate` | `info` — lets the budget producer + * split 75-99% (warn) from 100%+ (escalate) without adding more + * producers. `info` is the default for producers that only have + * one message. + */ +export const remindersEmittedTotal = new Counter({ + name: 'mana_ai_reminders_emitted_total', + help: 'Transient reminders injected into the planner loop by producer + severity.', + labelNames: ['producer', 'severity'] as const, + registers: [register], +}); diff --git a/services/mana-ai/src/planner/reminders.test.ts b/services/mana-ai/src/planner/reminders.test.ts index 363cf7a66..c3ba47aa6 100644 --- a/services/mana-ai/src/planner/reminders.test.ts +++ b/services/mana-ai/src/planner/reminders.test.ts @@ -95,28 +95,31 @@ describe('tokenBudgetReminder', () => { expect(tokenBudgetReminder(ctx, 20_000)).toBeNull(); // 70% }); - it('warns at the 75% threshold', () => { + it('warns at the 75% threshold with severity=warn', () => { const ctx: ReminderContext = { agent: makeAgent({ maxTokensPerDay: 100_000 }), mission: makeMission(), pretickUsage24h: 50_000, }; - const msg = tokenBudgetReminder(ctx, 25_000); // 75% - expect(msg).not.toBeNull(); - expect(msg).toContain('75%'); - expect(msg).toContain('Mana'); + const r = tokenBudgetReminder(ctx, 25_000); // 75% + expect(r).not.toBeNull(); + expect(r!.severity).toBe('warn'); + expect(r!.producer).toBe('token-budget'); + expect(r!.text).toContain('75%'); + expect(r!.text).toContain('Mana'); }); - it('emits a stronger message at/above 100%', () => { + it('escalates at/above 100% with severity=escalate', () => { const ctx: ReminderContext = { agent: makeAgent({ maxTokensPerDay: 100_000 }), mission: makeMission(), pretickUsage24h: 90_000, }; - const msg = tokenBudgetReminder(ctx, 15_000); // 105% - expect(msg).not.toBeNull(); - expect(msg).toContain('ausgeschoepft'); - expect(msg).toContain('JETZT'); + const r = tokenBudgetReminder(ctx, 15_000); // 105% + expect(r).not.toBeNull(); + expect(r!.severity).toBe('escalate'); + expect(r!.text).toContain('ausgeschoepft'); + expect(r!.text).toContain('JETZT'); }); it('adds pretick and round usage correctly', () => { @@ -126,10 +129,9 @@ describe('tokenBudgetReminder', () => { pretickUsage24h: 80_000, }; // 80k + 0k = 80% → warns - expect(tokenBudgetReminder(ctx, 0)).not.toBeNull(); - // 80k + 20k = 100% → exhausted - const exhausted = tokenBudgetReminder(ctx, 20_000); - expect(exhausted).toContain('ausgeschoepft'); + expect(tokenBudgetReminder(ctx, 0)?.severity).toBe('warn'); + // 80k + 20k = 100% → escalates + expect(tokenBudgetReminder(ctx, 20_000)?.severity).toBe('escalate'); }); }); @@ -145,13 +147,15 @@ describe('retryLoopReminder', () => { ).toBeNull(); }); - it('warns when the last 2 calls failed at round >= 3', () => { - const msg = retryLoopReminder({ + it('warns when the last 2 calls failed at round >= 3 with severity=warn', () => { + const r = retryLoopReminder({ round: 3, recentCalls: [mkExecutedCall(false), mkExecutedCall(false)], }); - expect(msg).not.toBeNull(); - expect(msg).toContain('fehlgeschlagen'); + expect(r).not.toBeNull(); + expect(r!.severity).toBe('warn'); + expect(r!.producer).toBe('retry-loop'); + expect(r!.text).toContain('fehlgeschlagen'); }); it('stays silent when only one of the last 2 failed', () => { diff --git a/services/mana-ai/src/planner/reminders.ts b/services/mana-ai/src/planner/reminders.ts index e9e39ccd6..223f77e05 100644 --- a/services/mana-ai/src/planner/reminders.ts +++ b/services/mana-ai/src/planner/reminders.ts @@ -21,6 +21,7 @@ import type { ReminderChannel } from '@mana/shared-ai'; import type { ServerAgent } from '../db/agents-projection'; import type { ServerMission } from '../db/missions-projection'; +import { remindersEmittedTotal } from '../metrics'; export interface ReminderContext { readonly agent: ServerAgent | null; @@ -31,6 +32,22 @@ export interface ReminderContext { readonly pretickUsage24h: number; } +/** + * Severity conveys urgency. Used for the `severity` metric label so + * dashboards can separate "FYI" from "please change course" without + * NLP on the reminder string. + * - `info`: background state — reader may or may not act + * - `warn`: the LLM should probably change course + * - `escalate`: the LLM must change course or the runner will cut it off + */ +export type ReminderSeverity = 'info' | 'warn' | 'escalate'; + +export interface Reminder { + readonly producer: string; + readonly severity: ReminderSeverity; + readonly text: string; +} + /** * Warn when the agent is nearing its daily token cap. Threshold at 75 % * gives the planner room to wind down cleanly before the hard skip at @@ -41,7 +58,7 @@ export interface ReminderContext { * - agents without a cap (`maxTokensPerDay == null`) * - usage below the warn threshold */ -export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): string | null { +export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): Reminder | null { const cap = ctx.agent?.maxTokensPerDay; if (!ctx.agent || cap == null || cap <= 0) return null; @@ -52,18 +69,24 @@ export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): s const pctDisplay = Math.round(pct * 100); const agentName = ctx.agent.name; if (pct >= 1.0) { - return ( - `Agent ${agentName} hat das Tagesbudget komplett ausgeschoepft ` + - `(${total} / ${cap} Tokens = ${pctDisplay}%). Schliesse die ` + - `Mission JETZT mit einer Summary ab — weitere Tool-Calls werden ` + - `kurz nach diesem Turn vom Runner abgeschnitten.` - ); + return { + producer: 'token-budget', + severity: 'escalate', + text: + `Agent ${agentName} hat das Tagesbudget komplett ausgeschoepft ` + + `(${total} / ${cap} Tokens = ${pctDisplay}%). Schliesse die ` + + `Mission JETZT mit einer Summary ab — weitere Tool-Calls werden ` + + `kurz nach diesem Turn vom Runner abgeschnitten.`, + }; } - return ( - `Agent ${agentName} hat ${pctDisplay}% des Tagesbudgets verbraucht ` + - `(${total} / ${cap} Tokens). Plane sparsam — vermeide redundante ` + - `Tool-Calls und liefere zuegig eine abschliessende Plan-Summary.` - ); + return { + producer: 'token-budget', + severity: 'warn', + text: + `Agent ${agentName} hat ${pctDisplay}% des Tagesbudgets verbraucht ` + + `(${total} / ${cap} Tokens). Plane sparsam — vermeide redundante ` + + `Tool-Calls und liefere zuegig eine abschliessende Plan-Summary.`, + }; } /** @@ -81,15 +104,18 @@ export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): s export function retryLoopReminder(state: { readonly round: number; readonly recentCalls: readonly { readonly result: { readonly success: boolean } }[]; -}): string | null { +}): Reminder | null { if (state.round < 3) return null; const tail = state.recentCalls.slice(-2); if (tail.length === 2 && tail.every((ec) => !ec.result.success)) { - return ( - `Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` + - `Wiederholung ab — formuliere stattdessen einen Summary-Text, ` + - `der dem Nutzer erklaert, was schief lief.` - ); + return { + producer: 'retry-loop', + severity: 'warn', + text: + `Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` + + `Wiederholung ab — formuliere stattdessen einen Summary-Text, ` + + `der dem Nutzer erklaert, was schief lief.`, + }; } return null; } @@ -105,11 +131,19 @@ export function retryLoopReminder(state: { */ export function buildReminderChannel(ctx: ReminderContext): ReminderChannel { return (state) => { - const out: string[] = []; + const reminders: Reminder[] = []; const budget = tokenBudgetReminder(ctx, state.usage.totalTokens); - if (budget) out.push(budget); + if (budget) reminders.push(budget); const retry = retryLoopReminder({ round: state.round, recentCalls: state.recentCalls }); - if (retry) out.push(retry); - return out; + if (retry) reminders.push(retry); + + // Telemetry — one increment per emitted reminder. No-op when + // the counter isn't registered (shouldn't happen outside tests + // that don't import the metrics module). + for (const r of reminders) { + remindersEmittedTotal.inc({ producer: r.producer, severity: r.severity }); + } + + return reminders.map((r) => r.text); }; }