feat(mana-ai): telemetry for reminder producers (mana_ai_reminders_emitted_total)

Producers now return structured {producer, severity, text} objects
instead of raw strings. buildReminderChannel collects them, increments
mana_ai_reminders_emitted_total{producer, severity} per emission, and
maps back to strings for the shared-ai loop input.

Why structured: the Prometheus label "severity" lets dashboards split
75-99% token-budget warnings (severity=warn) from 100%+ escalations
(severity=escalate) without NLP on the reminder text. Adding a new
producer that emits only info-level state (e.g. stale-sync warning)
falls out for free.

Active producer labels today:
  - token-budget (warn, escalate)
  - retry-loop (warn)

With this plus the scrape job (d087b4744), we can finally answer:
"does the budget warning actually change LLM behaviour?" — correlate
reminders_emitted_total{producer='token-budget'} with
tick_duration_seconds or planner_rounds_histogram.

3 tests updated to assert the new {producer, severity, text} shape
(16 reminder tests total, all green).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-23 15:10:27 +02:00
parent 638f9c34d6
commit 3edf680ea0
3 changed files with 100 additions and 40 deletions

View file

@ -235,3 +235,25 @@ export const providerErrorsTotal = new Counter({
labelNames: ['provider', 'kind'] as const,
registers: [register],
});
// ── Reminder channel (Claude-Code <system-reminder> pattern) ─────────
/**
* Bumped once per round per producer that returned a non-null string.
* Enables dashboards that answer: "is the token-budget warning
* actually getting through?" (correlate with agent-run duration) and
* "how often do missions hit a retry loop?".
*
* Labels:
* - producer: `token-budget` | `retry-loop` (extended as we add more)
* - severity: `warn` | `escalate` | `info` lets the budget producer
* split 75-99% (warn) from 100%+ (escalate) without adding more
* producers. `info` is the default for producers that only have
* one message.
*/
export const remindersEmittedTotal = new Counter({
name: 'mana_ai_reminders_emitted_total',
help: 'Transient reminders injected into the planner loop by producer + severity.',
labelNames: ['producer', 'severity'] as const,
registers: [register],
});

View file

@ -95,28 +95,31 @@ describe('tokenBudgetReminder', () => {
expect(tokenBudgetReminder(ctx, 20_000)).toBeNull(); // 70%
});
it('warns at the 75% threshold', () => {
it('warns at the 75% threshold with severity=warn', () => {
const ctx: ReminderContext = {
agent: makeAgent({ maxTokensPerDay: 100_000 }),
mission: makeMission(),
pretickUsage24h: 50_000,
};
const msg = tokenBudgetReminder(ctx, 25_000); // 75%
expect(msg).not.toBeNull();
expect(msg).toContain('75%');
expect(msg).toContain('Mana');
const r = tokenBudgetReminder(ctx, 25_000); // 75%
expect(r).not.toBeNull();
expect(r!.severity).toBe('warn');
expect(r!.producer).toBe('token-budget');
expect(r!.text).toContain('75%');
expect(r!.text).toContain('Mana');
});
it('emits a stronger message at/above 100%', () => {
it('escalates at/above 100% with severity=escalate', () => {
const ctx: ReminderContext = {
agent: makeAgent({ maxTokensPerDay: 100_000 }),
mission: makeMission(),
pretickUsage24h: 90_000,
};
const msg = tokenBudgetReminder(ctx, 15_000); // 105%
expect(msg).not.toBeNull();
expect(msg).toContain('ausgeschoepft');
expect(msg).toContain('JETZT');
const r = tokenBudgetReminder(ctx, 15_000); // 105%
expect(r).not.toBeNull();
expect(r!.severity).toBe('escalate');
expect(r!.text).toContain('ausgeschoepft');
expect(r!.text).toContain('JETZT');
});
it('adds pretick and round usage correctly', () => {
@ -126,10 +129,9 @@ describe('tokenBudgetReminder', () => {
pretickUsage24h: 80_000,
};
// 80k + 0k = 80% → warns
expect(tokenBudgetReminder(ctx, 0)).not.toBeNull();
// 80k + 20k = 100% → exhausted
const exhausted = tokenBudgetReminder(ctx, 20_000);
expect(exhausted).toContain('ausgeschoepft');
expect(tokenBudgetReminder(ctx, 0)?.severity).toBe('warn');
// 80k + 20k = 100% → escalates
expect(tokenBudgetReminder(ctx, 20_000)?.severity).toBe('escalate');
});
});
@ -145,13 +147,15 @@ describe('retryLoopReminder', () => {
).toBeNull();
});
it('warns when the last 2 calls failed at round >= 3', () => {
const msg = retryLoopReminder({
it('warns when the last 2 calls failed at round >= 3 with severity=warn', () => {
const r = retryLoopReminder({
round: 3,
recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
});
expect(msg).not.toBeNull();
expect(msg).toContain('fehlgeschlagen');
expect(r).not.toBeNull();
expect(r!.severity).toBe('warn');
expect(r!.producer).toBe('retry-loop');
expect(r!.text).toContain('fehlgeschlagen');
});
it('stays silent when only one of the last 2 failed', () => {

View file

@ -21,6 +21,7 @@
import type { ReminderChannel } from '@mana/shared-ai';
import type { ServerAgent } from '../db/agents-projection';
import type { ServerMission } from '../db/missions-projection';
import { remindersEmittedTotal } from '../metrics';
export interface ReminderContext {
readonly agent: ServerAgent | null;
@ -31,6 +32,22 @@ export interface ReminderContext {
readonly pretickUsage24h: number;
}
/**
* Severity conveys urgency. Used for the `severity` metric label so
* dashboards can separate "FYI" from "please change course" without
* NLP on the reminder string.
* - `info`: background state reader may or may not act
* - `warn`: the LLM should probably change course
* - `escalate`: the LLM must change course or the runner will cut it off
*/
export type ReminderSeverity = 'info' | 'warn' | 'escalate';
export interface Reminder {
readonly producer: string;
readonly severity: ReminderSeverity;
readonly text: string;
}
/**
* Warn when the agent is nearing its daily token cap. Threshold at 75 %
* gives the planner room to wind down cleanly before the hard skip at
@ -41,7 +58,7 @@ export interface ReminderContext {
* - agents without a cap (`maxTokensPerDay == null`)
* - usage below the warn threshold
*/
export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): string | null {
export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): Reminder | null {
const cap = ctx.agent?.maxTokensPerDay;
if (!ctx.agent || cap == null || cap <= 0) return null;
@ -52,18 +69,24 @@ export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): s
const pctDisplay = Math.round(pct * 100);
const agentName = ctx.agent.name;
if (pct >= 1.0) {
return (
`Agent ${agentName} hat das Tagesbudget komplett ausgeschoepft ` +
`(${total} / ${cap} Tokens = ${pctDisplay}%). Schliesse die ` +
`Mission JETZT mit einer Summary ab — weitere Tool-Calls werden ` +
`kurz nach diesem Turn vom Runner abgeschnitten.`
);
return {
producer: 'token-budget',
severity: 'escalate',
text:
`Agent ${agentName} hat das Tagesbudget komplett ausgeschoepft ` +
`(${total} / ${cap} Tokens = ${pctDisplay}%). Schliesse die ` +
`Mission JETZT mit einer Summary ab — weitere Tool-Calls werden ` +
`kurz nach diesem Turn vom Runner abgeschnitten.`,
};
}
return (
`Agent ${agentName} hat ${pctDisplay}% des Tagesbudgets verbraucht ` +
`(${total} / ${cap} Tokens). Plane sparsam — vermeide redundante ` +
`Tool-Calls und liefere zuegig eine abschliessende Plan-Summary.`
);
return {
producer: 'token-budget',
severity: 'warn',
text:
`Agent ${agentName} hat ${pctDisplay}% des Tagesbudgets verbraucht ` +
`(${total} / ${cap} Tokens). Plane sparsam — vermeide redundante ` +
`Tool-Calls und liefere zuegig eine abschliessende Plan-Summary.`,
};
}
/**
@ -81,15 +104,18 @@ export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): s
export function retryLoopReminder(state: {
readonly round: number;
readonly recentCalls: readonly { readonly result: { readonly success: boolean } }[];
}): string | null {
}): Reminder | null {
if (state.round < 3) return null;
const tail = state.recentCalls.slice(-2);
if (tail.length === 2 && tail.every((ec) => !ec.result.success)) {
return (
`Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` +
`Wiederholung ab — formuliere stattdessen einen Summary-Text, ` +
`der dem Nutzer erklaert, was schief lief.`
);
return {
producer: 'retry-loop',
severity: 'warn',
text:
`Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` +
`Wiederholung ab — formuliere stattdessen einen Summary-Text, ` +
`der dem Nutzer erklaert, was schief lief.`,
};
}
return null;
}
@ -105,11 +131,19 @@ export function retryLoopReminder(state: {
*/
export function buildReminderChannel(ctx: ReminderContext): ReminderChannel {
return (state) => {
const out: string[] = [];
const reminders: Reminder[] = [];
const budget = tokenBudgetReminder(ctx, state.usage.totalTokens);
if (budget) out.push(budget);
if (budget) reminders.push(budget);
const retry = retryLoopReminder({ round: state.round, recentCalls: state.recentCalls });
if (retry) out.push(retry);
return out;
if (retry) reminders.push(retry);
// Telemetry — one increment per emitted reminder. No-op when
// the counter isn't registered (shouldn't happen outside tests
// that don't import the metrics module).
for (const r of reminders) {
remindersEmittedTotal.inc({ producer: r.producer, severity: r.severity });
}
return reminders.map((r) => r.text);
};
}