mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 22:21:10 +02:00
feat(mana-ai): telemetry for reminder producers (mana_ai_reminders_emitted_total)
Producers now return structured {producer, severity, text} objects
instead of raw strings. buildReminderChannel collects them, increments
mana_ai_reminders_emitted_total{producer, severity} per emission, and
maps back to strings for the shared-ai loop input.
Why structured: the Prometheus label "severity" lets dashboards split
75-99% token-budget warnings (severity=warn) from 100%+ escalations
(severity=escalate) without NLP on the reminder text. Adding a new
producer that emits only info-level state (e.g. stale-sync warning)
falls out for free.
Active producer labels today:
- token-budget (warn, escalate)
- retry-loop (warn)
With this plus the scrape job (d087b4744), we can finally answer:
"does the budget warning actually change LLM behaviour?" — correlate
reminders_emitted_total{producer='token-budget'} with
tick_duration_seconds or planner_rounds_histogram.
3 tests updated to assert the new {producer, severity, text} shape
(16 reminder tests total, all green).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
638f9c34d6
commit
3edf680ea0
3 changed files with 100 additions and 40 deletions
|
|
@ -235,3 +235,25 @@ export const providerErrorsTotal = new Counter({
|
|||
labelNames: ['provider', 'kind'] as const,
|
||||
registers: [register],
|
||||
});
|
||||
|
||||
// ── Reminder channel (Claude-Code <system-reminder> pattern) ─────────
|
||||
|
||||
/**
|
||||
* Bumped once per round per producer that returned a non-null string.
|
||||
* Enables dashboards that answer: "is the token-budget warning
|
||||
* actually getting through?" (correlate with agent-run duration) and
|
||||
* "how often do missions hit a retry loop?".
|
||||
*
|
||||
* Labels:
|
||||
* - producer: `token-budget` | `retry-loop` (extended as we add more)
|
||||
* - severity: `warn` | `escalate` | `info` — lets the budget producer
|
||||
* split 75-99% (warn) from 100%+ (escalate) without adding more
|
||||
* producers. `info` is the default for producers that only have
|
||||
* one message.
|
||||
*/
|
||||
export const remindersEmittedTotal = new Counter({
|
||||
name: 'mana_ai_reminders_emitted_total',
|
||||
help: 'Transient reminders injected into the planner loop by producer + severity.',
|
||||
labelNames: ['producer', 'severity'] as const,
|
||||
registers: [register],
|
||||
});
|
||||
|
|
|
|||
|
|
@ -95,28 +95,31 @@ describe('tokenBudgetReminder', () => {
|
|||
expect(tokenBudgetReminder(ctx, 20_000)).toBeNull(); // 70%
|
||||
});
|
||||
|
||||
it('warns at the 75% threshold', () => {
|
||||
it('warns at the 75% threshold with severity=warn', () => {
|
||||
const ctx: ReminderContext = {
|
||||
agent: makeAgent({ maxTokensPerDay: 100_000 }),
|
||||
mission: makeMission(),
|
||||
pretickUsage24h: 50_000,
|
||||
};
|
||||
const msg = tokenBudgetReminder(ctx, 25_000); // 75%
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg).toContain('75%');
|
||||
expect(msg).toContain('Mana');
|
||||
const r = tokenBudgetReminder(ctx, 25_000); // 75%
|
||||
expect(r).not.toBeNull();
|
||||
expect(r!.severity).toBe('warn');
|
||||
expect(r!.producer).toBe('token-budget');
|
||||
expect(r!.text).toContain('75%');
|
||||
expect(r!.text).toContain('Mana');
|
||||
});
|
||||
|
||||
it('emits a stronger message at/above 100%', () => {
|
||||
it('escalates at/above 100% with severity=escalate', () => {
|
||||
const ctx: ReminderContext = {
|
||||
agent: makeAgent({ maxTokensPerDay: 100_000 }),
|
||||
mission: makeMission(),
|
||||
pretickUsage24h: 90_000,
|
||||
};
|
||||
const msg = tokenBudgetReminder(ctx, 15_000); // 105%
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg).toContain('ausgeschoepft');
|
||||
expect(msg).toContain('JETZT');
|
||||
const r = tokenBudgetReminder(ctx, 15_000); // 105%
|
||||
expect(r).not.toBeNull();
|
||||
expect(r!.severity).toBe('escalate');
|
||||
expect(r!.text).toContain('ausgeschoepft');
|
||||
expect(r!.text).toContain('JETZT');
|
||||
});
|
||||
|
||||
it('adds pretick and round usage correctly', () => {
|
||||
|
|
@ -126,10 +129,9 @@ describe('tokenBudgetReminder', () => {
|
|||
pretickUsage24h: 80_000,
|
||||
};
|
||||
// 80k + 0k = 80% → warns
|
||||
expect(tokenBudgetReminder(ctx, 0)).not.toBeNull();
|
||||
// 80k + 20k = 100% → exhausted
|
||||
const exhausted = tokenBudgetReminder(ctx, 20_000);
|
||||
expect(exhausted).toContain('ausgeschoepft');
|
||||
expect(tokenBudgetReminder(ctx, 0)?.severity).toBe('warn');
|
||||
// 80k + 20k = 100% → escalates
|
||||
expect(tokenBudgetReminder(ctx, 20_000)?.severity).toBe('escalate');
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -145,13 +147,15 @@ describe('retryLoopReminder', () => {
|
|||
).toBeNull();
|
||||
});
|
||||
|
||||
it('warns when the last 2 calls failed at round >= 3', () => {
|
||||
const msg = retryLoopReminder({
|
||||
it('warns when the last 2 calls failed at round >= 3 with severity=warn', () => {
|
||||
const r = retryLoopReminder({
|
||||
round: 3,
|
||||
recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
|
||||
});
|
||||
expect(msg).not.toBeNull();
|
||||
expect(msg).toContain('fehlgeschlagen');
|
||||
expect(r).not.toBeNull();
|
||||
expect(r!.severity).toBe('warn');
|
||||
expect(r!.producer).toBe('retry-loop');
|
||||
expect(r!.text).toContain('fehlgeschlagen');
|
||||
});
|
||||
|
||||
it('stays silent when only one of the last 2 failed', () => {
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
import type { ReminderChannel } from '@mana/shared-ai';
|
||||
import type { ServerAgent } from '../db/agents-projection';
|
||||
import type { ServerMission } from '../db/missions-projection';
|
||||
import { remindersEmittedTotal } from '../metrics';
|
||||
|
||||
export interface ReminderContext {
|
||||
readonly agent: ServerAgent | null;
|
||||
|
|
@ -31,6 +32,22 @@ export interface ReminderContext {
|
|||
readonly pretickUsage24h: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Severity conveys urgency. Used for the `severity` metric label so
|
||||
* dashboards can separate "FYI" from "please change course" without
|
||||
* NLP on the reminder string.
|
||||
* - `info`: background state — reader may or may not act
|
||||
* - `warn`: the LLM should probably change course
|
||||
* - `escalate`: the LLM must change course or the runner will cut it off
|
||||
*/
|
||||
export type ReminderSeverity = 'info' | 'warn' | 'escalate';
|
||||
|
||||
export interface Reminder {
|
||||
readonly producer: string;
|
||||
readonly severity: ReminderSeverity;
|
||||
readonly text: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Warn when the agent is nearing its daily token cap. Threshold at 75 %
|
||||
* gives the planner room to wind down cleanly before the hard skip at
|
||||
|
|
@ -41,7 +58,7 @@ export interface ReminderContext {
|
|||
* - agents without a cap (`maxTokensPerDay == null`)
|
||||
* - usage below the warn threshold
|
||||
*/
|
||||
export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): string | null {
|
||||
export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): Reminder | null {
|
||||
const cap = ctx.agent?.maxTokensPerDay;
|
||||
if (!ctx.agent || cap == null || cap <= 0) return null;
|
||||
|
||||
|
|
@ -52,18 +69,24 @@ export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): s
|
|||
const pctDisplay = Math.round(pct * 100);
|
||||
const agentName = ctx.agent.name;
|
||||
if (pct >= 1.0) {
|
||||
return (
|
||||
`Agent ${agentName} hat das Tagesbudget komplett ausgeschoepft ` +
|
||||
`(${total} / ${cap} Tokens = ${pctDisplay}%). Schliesse die ` +
|
||||
`Mission JETZT mit einer Summary ab — weitere Tool-Calls werden ` +
|
||||
`kurz nach diesem Turn vom Runner abgeschnitten.`
|
||||
);
|
||||
return {
|
||||
producer: 'token-budget',
|
||||
severity: 'escalate',
|
||||
text:
|
||||
`Agent ${agentName} hat das Tagesbudget komplett ausgeschoepft ` +
|
||||
`(${total} / ${cap} Tokens = ${pctDisplay}%). Schliesse die ` +
|
||||
`Mission JETZT mit einer Summary ab — weitere Tool-Calls werden ` +
|
||||
`kurz nach diesem Turn vom Runner abgeschnitten.`,
|
||||
};
|
||||
}
|
||||
return (
|
||||
`Agent ${agentName} hat ${pctDisplay}% des Tagesbudgets verbraucht ` +
|
||||
`(${total} / ${cap} Tokens). Plane sparsam — vermeide redundante ` +
|
||||
`Tool-Calls und liefere zuegig eine abschliessende Plan-Summary.`
|
||||
);
|
||||
return {
|
||||
producer: 'token-budget',
|
||||
severity: 'warn',
|
||||
text:
|
||||
`Agent ${agentName} hat ${pctDisplay}% des Tagesbudgets verbraucht ` +
|
||||
`(${total} / ${cap} Tokens). Plane sparsam — vermeide redundante ` +
|
||||
`Tool-Calls und liefere zuegig eine abschliessende Plan-Summary.`,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -81,15 +104,18 @@ export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): s
|
|||
export function retryLoopReminder(state: {
|
||||
readonly round: number;
|
||||
readonly recentCalls: readonly { readonly result: { readonly success: boolean } }[];
|
||||
}): string | null {
|
||||
}): Reminder | null {
|
||||
if (state.round < 3) return null;
|
||||
const tail = state.recentCalls.slice(-2);
|
||||
if (tail.length === 2 && tail.every((ec) => !ec.result.success)) {
|
||||
return (
|
||||
`Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` +
|
||||
`Wiederholung ab — formuliere stattdessen einen Summary-Text, ` +
|
||||
`der dem Nutzer erklaert, was schief lief.`
|
||||
);
|
||||
return {
|
||||
producer: 'retry-loop',
|
||||
severity: 'warn',
|
||||
text:
|
||||
`Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` +
|
||||
`Wiederholung ab — formuliere stattdessen einen Summary-Text, ` +
|
||||
`der dem Nutzer erklaert, was schief lief.`,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
@ -105,11 +131,19 @@ export function retryLoopReminder(state: {
|
|||
*/
|
||||
export function buildReminderChannel(ctx: ReminderContext): ReminderChannel {
|
||||
return (state) => {
|
||||
const out: string[] = [];
|
||||
const reminders: Reminder[] = [];
|
||||
const budget = tokenBudgetReminder(ctx, state.usage.totalTokens);
|
||||
if (budget) out.push(budget);
|
||||
if (budget) reminders.push(budget);
|
||||
const retry = retryLoopReminder({ round: state.round, recentCalls: state.recentCalls });
|
||||
if (retry) out.push(retry);
|
||||
return out;
|
||||
if (retry) reminders.push(retry);
|
||||
|
||||
// Telemetry — one increment per emitted reminder. No-op when
|
||||
// the counter isn't registered (shouldn't happen outside tests
|
||||
// that don't import the metrics module).
|
||||
for (const r of reminders) {
|
||||
remindersEmittedTotal.inc({ producer: r.producer, severity: r.severity });
|
||||
}
|
||||
|
||||
return reminders.map((r) => r.text);
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue