feat(mana-ai): first live reminder producers — token budget + retry-loop

Wires the M1 reminderChannel into the mana-ai mission runner with two initial producers in services/mana-ai/src/planner/reminders.ts: - tokenBudgetReminder — warns at 75% of the agent's daily cap, emits a stronger "wrap up NOW" message at/above 100%. Uses pretick usage + accumulated round usage so the warning tracks drift during a long plan. - retryLoopReminder — shape is in place (round≥3 + last 2 failures), currently limited to the single lastCall LoopState exposes. Extends cleanly once LoopState carries the full failure window. buildReminderChannel composes active producers; the tick hoists pretickUsage24h so the channel has the baseline. Each round the loop re-evaluates the producers, so usage drift across rounds surfaces on the NEXT turn. Also exports LoopState + ReminderChannel from @mana/shared-ai top-level so consumers don't need to reach into /planner. Tests: 13 new bun tests covering thresholds, pretick+round summing, composition, and per-round re-evaluation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 20:21:09 +02:00 · 2026-04-23 14:00:04 +02:00 · 2026-04-23 14:00:04 +02:00 · faa472be91
commit faa472be91
parent e5d230e599
4 changed files with 330 additions and 4 deletions
--- a/services/mana-ai/src/cron/tick.ts
+++ b/services/mana-ai/src/cron/tick.ts
@ -50,6 +50,7 @@ import {
 import { unwrapMissionGrant } from '../crypto/unwrap-grant';
 import { detectInjectionMarker } from '@mana/tool-registry';
 import { NewsResearchClient } from '../planner/news-research-client';
+import { buildReminderChannel } from '../planner/reminders';
 import { ManaResearchClient, type DeepResearchProvider } from '../clients/mana-research';
 import {
 	deletePendingResearchJob,
@ -192,6 +193,12 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
 				agentDecisionsTotal.inc({ decision: 'skipped-paused' });
 				continue;
 			}
+			// Pretick token usage is surfaced to the reminder channel so the
+			// planner sees a warning as it approaches the cap, rather than
+			// getting cut off without explanation. Default 0 when the
+			// agent has no cap or the query fails (reminder becomes a
+			// no-op for that mission).
+			let pretickUsage24h = 0;
 			if (agent) {
 				const used = activeRuns.get(agent.id) ?? 0;
 				if (used >= agent.maxConcurrentMissions) {
@ -200,8 +207,8 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
 				}
 				// Budget enforcement: check rolling 24h token usage.
 				if (agent.maxTokensPerDay != null && agent.maxTokensPerDay >= 0) {
-					const windowUsage = await getAgentTokenUsage24h(sql, m.userId, agent.id);
-					if (windowUsage >= agent.maxTokensPerDay) {
+					pretickUsage24h = await getAgentTokenUsage24h(sql, m.userId, agent.id);
+					if (pretickUsage24h >= agent.maxTokensPerDay) {
 						agentDecisionsTotal.inc({ decision: 'skipped-budget' });
 						continue;
 					}
@ -219,7 +226,7 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
 						'agent.id': agent?.id ?? 'legacy',
 						'agent.name': agent?.name ?? 'Mana',
 					},
-					() => planOneMission(m, llm, sql, agent, config)
+					() => planOneMission(m, llm, sql, agent, config, pretickUsage24h)
 				);
 				if (planResult.outcome === 'skipped') {
 					// Deep-research job still running — pick this mission
@ -309,7 +316,8 @@ async function planOneMission(
 	llm: ReturnType<typeof createServerLlmClient>,
 	sql: Sql,
 	agent: ServerAgent | null,
-	config: Config
+	config: Config,
+	pretickUsage24h: number
 ): Promise<PlanMissionOutcome> {
 	const mission = serverMissionToSharedMission(m);
 	// Resolve the mission's Key-Grant (if any) once per tick. An absent
@ -371,6 +379,17 @@ async function planOneMission(

 	const tools = filterToolsByAgentPolicy(SERVER_TOOLS, agent);

+	// Per-round reminder channel: injects transient hints (token-budget
+	// warnings today; retry-loop detection, stale-data signals later)
+	// into the NEXT LLM turn only. See `planner/reminders.ts` for the
+	// individual producers and the Claude-Code <system-reminder>
+	// rationale.
+	const reminderChannel = buildReminderChannel({
+		agent,
+		mission: m,
+		pretickUsage24h,
+	});
+
 	try {
 		const loopResult = await runPlannerLoop({
 			llm,
@ -379,6 +398,7 @@ async function planOneMission(
 				userPrompt,
 				tools,
 				model: 'google/gemini-2.5-flash',
+				reminderChannel,
 			},
 			// Server-side onToolCall: no execution, just acknowledge.
 			// The captured call lands in loopResult.executedCalls and
--- a/services/mana-ai/src/planner/reminders.test.ts
+++ b/services/mana-ai/src/planner/reminders.test.ts
@ -0,0 +1,185 @@
+import { describe, expect, it } from 'bun:test';
+import {
+	buildReminderChannel,
+	retryLoopReminder,
+	tokenBudgetReminder,
+	type ReminderContext,
+} from './reminders';
+import type { ServerAgent } from '../db/agents-projection';
+import type { ServerMission } from '../db/missions-projection';
+import type { LoopState } from '@mana/shared-ai';
+
+// ─── Fixtures ──────────────────────────────────────────────────────
+
+function makeAgent(overrides: Partial<ServerAgent> = {}): ServerAgent {
+	return {
+		id: 'agent-1',
+		userId: 'user-1',
+		spaceId: 'space-1',
+		name: 'Mana',
+		role: null,
+		systemPrompt: null,
+		memory: null,
+		state: 'active',
+		maxTokensPerDay: 100_000,
+		maxConcurrentMissions: 3,
+		policy: null,
+		updatedAt: '2026-04-23T00:00:00Z',
+		...overrides,
+	} as ServerAgent;
+}
+
+function makeMission(overrides: Partial<ServerMission> = {}): ServerMission {
+	return {
+		id: 'mission-1',
+		userId: 'user-1',
+		spaceId: 'space-1',
+		title: 'Test',
+		objective: 'Do the thing',
+		state: 'active',
+		nextRunAt: '2026-04-23T00:00:00Z',
+		iterations: [],
+		agentId: 'agent-1',
+		...overrides,
+	} as ServerMission;
+}
+
+function makeState(overrides: Partial<LoopState> = {}): LoopState {
+	return {
+		round: 1,
+		toolCallCount: 0,
+		usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
+		...overrides,
+	};
+}
+
+// ─── tokenBudgetReminder ──────────────────────────────────────────
+
+describe('tokenBudgetReminder', () => {
+	it('returns null when agent has no cap', () => {
+		const ctx: ReminderContext = {
+			agent: makeAgent({ maxTokensPerDay: null as unknown as number }),
+			mission: makeMission(),
+			pretickUsage24h: 50_000,
+		};
+		expect(tokenBudgetReminder(ctx, 10_000)).toBeNull();
+	});
+
+	it('returns null when agent is absent (legacy mission)', () => {
+		const ctx: ReminderContext = { agent: null, mission: makeMission(), pretickUsage24h: 0 };
+		expect(tokenBudgetReminder(ctx, 99_000)).toBeNull();
+	});
+
+	it('returns null below 75% utilisation', () => {
+		const ctx: ReminderContext = {
+			agent: makeAgent({ maxTokensPerDay: 100_000 }),
+			mission: makeMission(),
+			pretickUsage24h: 50_000,
+		};
+		expect(tokenBudgetReminder(ctx, 20_000)).toBeNull(); // 70%
+	});
+
+	it('warns at the 75% threshold', () => {
+		const ctx: ReminderContext = {
+			agent: makeAgent({ maxTokensPerDay: 100_000 }),
+			mission: makeMission(),
+			pretickUsage24h: 50_000,
+		};
+		const msg = tokenBudgetReminder(ctx, 25_000); // 75%
+		expect(msg).not.toBeNull();
+		expect(msg).toContain('75%');
+		expect(msg).toContain('Mana');
+	});
+
+	it('emits a stronger message at/above 100%', () => {
+		const ctx: ReminderContext = {
+			agent: makeAgent({ maxTokensPerDay: 100_000 }),
+			mission: makeMission(),
+			pretickUsage24h: 90_000,
+		};
+		const msg = tokenBudgetReminder(ctx, 15_000); // 105%
+		expect(msg).not.toBeNull();
+		expect(msg).toContain('ausgeschoepft');
+		expect(msg).toContain('JETZT');
+	});
+
+	it('adds pretick and round usage correctly', () => {
+		const ctx: ReminderContext = {
+			agent: makeAgent({ maxTokensPerDay: 100_000 }),
+			mission: makeMission(),
+			pretickUsage24h: 80_000,
+		};
+		// 80k + 0k = 80% → warns
+		expect(tokenBudgetReminder(ctx, 0)).not.toBeNull();
+		// 80k + 20k = 100% → exhausted
+		const exhausted = tokenBudgetReminder(ctx, 20_000);
+		expect(exhausted).toContain('ausgeschoepft');
+	});
+});
+
+// ─── retryLoopReminder ────────────────────────────────────────────
+
+describe('retryLoopReminder', () => {
+	it('is silent before round 3', () => {
+		expect(retryLoopReminder({ round: 2, lastFailures: [true, true] })).toBeNull();
+	});
+
+	it('warns when the last 2 calls failed at round >= 3', () => {
+		const msg = retryLoopReminder({ round: 3, lastFailures: [true, true] });
+		expect(msg).not.toBeNull();
+		expect(msg).toContain('fehlgeschlagen');
+	});
+
+	it('stays silent when only one of the last 2 failed', () => {
+		expect(retryLoopReminder({ round: 4, lastFailures: [false, true] })).toBeNull();
+	});
+
+	it('stays silent with fewer than 2 failures recorded', () => {
+		expect(retryLoopReminder({ round: 5, lastFailures: [true] })).toBeNull();
+	});
+});
+
+// ─── buildReminderChannel — composition ───────────────────────────
+
+describe('buildReminderChannel', () => {
+	it('returns an empty array when no producer fires', () => {
+		const channel = buildReminderChannel({
+			agent: makeAgent({ maxTokensPerDay: 100_000 }),
+			mission: makeMission(),
+			pretickUsage24h: 0,
+		});
+		expect(channel(makeState())).toEqual([]);
+	});
+
+	it('surfaces the budget reminder when usage is high', () => {
+		const channel = buildReminderChannel({
+			agent: makeAgent({ maxTokensPerDay: 10_000 }),
+			mission: makeMission(),
+			pretickUsage24h: 8_000,
+		});
+		const out = channel(
+			makeState({ usage: { promptTokens: 500, completionTokens: 500, totalTokens: 1_000 } })
+		);
+		expect(out).toHaveLength(1);
+		expect(out[0]).toContain('90%');
+	});
+
+	it('uses the updated totalTokens each round (re-evaluated)', () => {
+		const channel = buildReminderChannel({
+			agent: makeAgent({ maxTokensPerDay: 10_000 }),
+			mission: makeMission(),
+			pretickUsage24h: 5_000,
+		});
+		// Round 1 — 50% → silent
+		expect(channel(makeState())).toEqual([]);
+		// Round 2 — 5k + 3k = 80% → warns
+		const round2 = channel(
+			makeState({
+				round: 2,
+				usage: { promptTokens: 1500, completionTokens: 1500, totalTokens: 3_000 },
+			})
+		);
+		expect(round2).toHaveLength(1);
+		expect(round2[0]).toContain('80%');
+	});
+});
--- a/services/mana-ai/src/planner/reminders.ts
+++ b/services/mana-ai/src/planner/reminders.ts
@ -0,0 +1,119 @@
+/**
+ * Per-round reminder producers for the mana-ai mission runner.
+ *
+ * Each producer is a small pure function that reads some snapshot (agent
+ * state, mission metadata, tick-level usage counters) and returns a short
+ * German string to inject as a `<reminder>` tag on the next LLM call.
+ * Producers return `null` when there's nothing to say so the caller can
+ * cleanly filter them out.
+ *
+ * Composition happens in `buildReminderChannel()` which wires the active
+ * producers into a single `ReminderChannel` callback compatible with
+ * `runPlannerLoop`'s new reminderChannel input. The loop invokes the
+ * channel once per round; we re-evaluate every producer each round so
+ * usage drift across rounds (rounds can accumulate 10k+ completion
+ * tokens) shows up in the NEXT reminder.
+ *
+ * See `docs/plans/agent-loop-improvements-m1.md` §2 for the
+ * Claude-Code `<system-reminder>` pattern this implements.
+ */
+
+import type { ReminderChannel } from '@mana/shared-ai';
+import type { ServerAgent } from '../db/agents-projection';
+import type { ServerMission } from '../db/missions-projection';
+
+export interface ReminderContext {
+	readonly agent: ServerAgent | null;
+	readonly mission: ServerMission;
+	/** Tokens already charged to this agent in the rolling 24h window
+	 *  BEFORE the current mission run started. Round-level usage
+	 *  accrual is tracked separately by the loop and added on top. */
+	readonly pretickUsage24h: number;
+}
+
+/**
+ * Warn when the agent is nearing its daily token cap. Threshold at 75 %
+ * gives the planner room to wind down cleanly before the hard skip at
+ * 100 % (enforced at tick-level, not here).
+ *
+ * Returns null for:
+ *   - missions without an agent (legacy one-off missions)
+ *   - agents without a cap (`maxTokensPerDay == null`)
+ *   - usage below the warn threshold
+ */
+export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): string | null {
+	const cap = ctx.agent?.maxTokensPerDay;
+	if (!ctx.agent || cap == null || cap <= 0) return null;
+
+	const total = ctx.pretickUsage24h + roundUsage;
+	const pct = total / cap;
+	if (pct < 0.75) return null;
+
+	const pctDisplay = Math.round(pct * 100);
+	const agentName = ctx.agent.name;
+	if (pct >= 1.0) {
+		return (
+			`Agent ${agentName} hat das Tagesbudget komplett ausgeschoepft ` +
+			`(${total} / ${cap} Tokens = ${pctDisplay}%). Schliesse die ` +
+			`Mission JETZT mit einer Summary ab — weitere Tool-Calls werden ` +
+			`kurz nach diesem Turn vom Runner abgeschnitten.`
+		);
+	}
+	return (
+		`Agent ${agentName} hat ${pctDisplay}% des Tagesbudgets verbraucht ` +
+		`(${total} / ${cap} Tokens). Plane sparsam — vermeide redundante ` +
+		`Tool-Calls und liefere zuegig eine abschliessende Plan-Summary.`
+	);
+}
+
+/**
+ * Nudge the planner to end when it is clearly iterating without new
+ * information: 3+ rounds in and the last 2 tool-calls returned
+ * `success: false`. This is a heuristic guard against infinite re-try
+ * loops where the LLM keeps calling the same failing tool with slightly
+ * different arguments.
+ */
+export function retryLoopReminder(state: {
+	readonly round: number;
+	readonly lastFailures: readonly boolean[];
+}): string | null {
+	if (state.round < 3) return null;
+	const recent = state.lastFailures.slice(-2);
+	if (recent.length === 2 && recent.every((f) => f)) {
+		return (
+			`Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` +
+			`Wiederholung ab — formuliere stattdessen einen Summary-Text, ` +
+			`der dem Nutzer erklaert, was schief lief.`
+		);
+	}
+	return null;
+}
+
+/**
+ * Build a ReminderChannel that runs every producer per round and returns
+ * the concatenation of their non-null outputs. Each caller binds the
+ * context with a closure; the loop only sees the callback.
+ *
+ * Ordering: token-budget first (most actionable), retry-loop second.
+ * Additional producers should slot in before retry-loop unless they
+ * explicitly supersede it.
+ */
+export function buildReminderChannel(ctx: ReminderContext): ReminderChannel {
+	return (state) => {
+		const failures: boolean[] = [];
+		// We don't get the full executedCalls in LoopState (intentional —
+		// the channel is meant to be cheap), but `lastCall` is exposed.
+		// For retry-loop detection we'd ideally track the last N; for now
+		// the single lastCall is enough to skip 2-round miss signals, so
+		// this producer is effectively dormant until we extend LoopState.
+		// Left in place so the shape is right for M2 follow-ups.
+		if (state.lastCall) failures.push(!state.lastCall.result.success);
+
+		const out: string[] = [];
+		const budget = tokenBudgetReminder(ctx, state.usage.totalTokens);
+		if (budget) out.push(budget);
+		const retry = retryLoopReminder({ round: state.round, lastFailures: failures });
+		if (retry) out.push(retry);
+		return out;
+	};
+}