feat(mana-ai): first live reminder producers — token budget + retry-loop

Wires the M1 reminderChannel into the mana-ai mission runner with two
initial producers in services/mana-ai/src/planner/reminders.ts:

- tokenBudgetReminder — warns at 75% of the agent's daily cap, emits a
  stronger "wrap up NOW" message at/above 100%. Uses pretick usage +
  accumulated round usage so the warning tracks drift during a long
  plan.
- retryLoopReminder — shape is in place (round≥3 + last 2 failures),
  currently limited to the single lastCall LoopState exposes. Extends
  cleanly once LoopState carries the full failure window.

buildReminderChannel composes active producers; the tick hoists
pretickUsage24h so the channel has the baseline. Each round the loop
re-evaluates the producers, so usage drift across rounds surfaces on
the NEXT turn.

Also exports LoopState + ReminderChannel from @mana/shared-ai top-level
so consumers don't need to reach into /planner.

Tests: 13 new bun tests covering thresholds, pretick+round summing,
composition, and per-round re-evaluation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-23 14:00:04 +02:00
parent e5d230e599
commit faa472be91
4 changed files with 330 additions and 4 deletions

View file

@ -50,6 +50,7 @@ import {
import { unwrapMissionGrant } from '../crypto/unwrap-grant';
import { detectInjectionMarker } from '@mana/tool-registry';
import { NewsResearchClient } from '../planner/news-research-client';
import { buildReminderChannel } from '../planner/reminders';
import { ManaResearchClient, type DeepResearchProvider } from '../clients/mana-research';
import {
deletePendingResearchJob,
@ -192,6 +193,12 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
agentDecisionsTotal.inc({ decision: 'skipped-paused' });
continue;
}
// Pretick token usage is surfaced to the reminder channel so the
// planner sees a warning as it approaches the cap, rather than
// getting cut off without explanation. Default 0 when the
// agent has no cap or the query fails (reminder becomes a
// no-op for that mission).
let pretickUsage24h = 0;
if (agent) {
const used = activeRuns.get(agent.id) ?? 0;
if (used >= agent.maxConcurrentMissions) {
@ -200,8 +207,8 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
}
// Budget enforcement: check rolling 24h token usage.
if (agent.maxTokensPerDay != null && agent.maxTokensPerDay >= 0) {
const windowUsage = await getAgentTokenUsage24h(sql, m.userId, agent.id);
if (windowUsage >= agent.maxTokensPerDay) {
pretickUsage24h = await getAgentTokenUsage24h(sql, m.userId, agent.id);
if (pretickUsage24h >= agent.maxTokensPerDay) {
agentDecisionsTotal.inc({ decision: 'skipped-budget' });
continue;
}
@ -219,7 +226,7 @@ export async function runTickOnce(config: Config): Promise<TickStats> {
'agent.id': agent?.id ?? 'legacy',
'agent.name': agent?.name ?? 'Mana',
},
() => planOneMission(m, llm, sql, agent, config)
() => planOneMission(m, llm, sql, agent, config, pretickUsage24h)
);
if (planResult.outcome === 'skipped') {
// Deep-research job still running — pick this mission
@ -309,7 +316,8 @@ async function planOneMission(
llm: ReturnType<typeof createServerLlmClient>,
sql: Sql,
agent: ServerAgent | null,
config: Config
config: Config,
pretickUsage24h: number
): Promise<PlanMissionOutcome> {
const mission = serverMissionToSharedMission(m);
// Resolve the mission's Key-Grant (if any) once per tick. An absent
@ -371,6 +379,17 @@ async function planOneMission(
const tools = filterToolsByAgentPolicy(SERVER_TOOLS, agent);
// Per-round reminder channel: injects transient hints (token-budget
// warnings today; retry-loop detection, stale-data signals later)
// into the NEXT LLM turn only. See `planner/reminders.ts` for the
// individual producers and the Claude-Code <system-reminder>
// rationale.
const reminderChannel = buildReminderChannel({
agent,
mission: m,
pretickUsage24h,
});
try {
const loopResult = await runPlannerLoop({
llm,
@ -379,6 +398,7 @@ async function planOneMission(
userPrompt,
tools,
model: 'google/gemini-2.5-flash',
reminderChannel,
},
// Server-side onToolCall: no execution, just acknowledge.
// The captured call lands in loopResult.executedCalls and

View file

@ -0,0 +1,185 @@
import { describe, expect, it } from 'bun:test';
import {
buildReminderChannel,
retryLoopReminder,
tokenBudgetReminder,
type ReminderContext,
} from './reminders';
import type { ServerAgent } from '../db/agents-projection';
import type { ServerMission } from '../db/missions-projection';
import type { LoopState } from '@mana/shared-ai';
// ─── Fixtures ──────────────────────────────────────────────────────
function makeAgent(overrides: Partial<ServerAgent> = {}): ServerAgent {
return {
id: 'agent-1',
userId: 'user-1',
spaceId: 'space-1',
name: 'Mana',
role: null,
systemPrompt: null,
memory: null,
state: 'active',
maxTokensPerDay: 100_000,
maxConcurrentMissions: 3,
policy: null,
updatedAt: '2026-04-23T00:00:00Z',
...overrides,
} as ServerAgent;
}
function makeMission(overrides: Partial<ServerMission> = {}): ServerMission {
return {
id: 'mission-1',
userId: 'user-1',
spaceId: 'space-1',
title: 'Test',
objective: 'Do the thing',
state: 'active',
nextRunAt: '2026-04-23T00:00:00Z',
iterations: [],
agentId: 'agent-1',
...overrides,
} as ServerMission;
}
function makeState(overrides: Partial<LoopState> = {}): LoopState {
return {
round: 1,
toolCallCount: 0,
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
...overrides,
};
}
// ─── tokenBudgetReminder ──────────────────────────────────────────
describe('tokenBudgetReminder', () => {
it('returns null when agent has no cap', () => {
const ctx: ReminderContext = {
agent: makeAgent({ maxTokensPerDay: null as unknown as number }),
mission: makeMission(),
pretickUsage24h: 50_000,
};
expect(tokenBudgetReminder(ctx, 10_000)).toBeNull();
});
it('returns null when agent is absent (legacy mission)', () => {
const ctx: ReminderContext = { agent: null, mission: makeMission(), pretickUsage24h: 0 };
expect(tokenBudgetReminder(ctx, 99_000)).toBeNull();
});
it('returns null below 75% utilisation', () => {
const ctx: ReminderContext = {
agent: makeAgent({ maxTokensPerDay: 100_000 }),
mission: makeMission(),
pretickUsage24h: 50_000,
};
expect(tokenBudgetReminder(ctx, 20_000)).toBeNull(); // 70%
});
it('warns at the 75% threshold', () => {
const ctx: ReminderContext = {
agent: makeAgent({ maxTokensPerDay: 100_000 }),
mission: makeMission(),
pretickUsage24h: 50_000,
};
const msg = tokenBudgetReminder(ctx, 25_000); // 75%
expect(msg).not.toBeNull();
expect(msg).toContain('75%');
expect(msg).toContain('Mana');
});
it('emits a stronger message at/above 100%', () => {
const ctx: ReminderContext = {
agent: makeAgent({ maxTokensPerDay: 100_000 }),
mission: makeMission(),
pretickUsage24h: 90_000,
};
const msg = tokenBudgetReminder(ctx, 15_000); // 105%
expect(msg).not.toBeNull();
expect(msg).toContain('ausgeschoepft');
expect(msg).toContain('JETZT');
});
it('adds pretick and round usage correctly', () => {
const ctx: ReminderContext = {
agent: makeAgent({ maxTokensPerDay: 100_000 }),
mission: makeMission(),
pretickUsage24h: 80_000,
};
// 80k + 0k = 80% → warns
expect(tokenBudgetReminder(ctx, 0)).not.toBeNull();
// 80k + 20k = 100% → exhausted
const exhausted = tokenBudgetReminder(ctx, 20_000);
expect(exhausted).toContain('ausgeschoepft');
});
});
// ─── retryLoopReminder ────────────────────────────────────────────
describe('retryLoopReminder', () => {
it('is silent before round 3', () => {
expect(retryLoopReminder({ round: 2, lastFailures: [true, true] })).toBeNull();
});
it('warns when the last 2 calls failed at round >= 3', () => {
const msg = retryLoopReminder({ round: 3, lastFailures: [true, true] });
expect(msg).not.toBeNull();
expect(msg).toContain('fehlgeschlagen');
});
it('stays silent when only one of the last 2 failed', () => {
expect(retryLoopReminder({ round: 4, lastFailures: [false, true] })).toBeNull();
});
it('stays silent with fewer than 2 failures recorded', () => {
expect(retryLoopReminder({ round: 5, lastFailures: [true] })).toBeNull();
});
});
// ─── buildReminderChannel — composition ───────────────────────────
describe('buildReminderChannel', () => {
it('returns an empty array when no producer fires', () => {
const channel = buildReminderChannel({
agent: makeAgent({ maxTokensPerDay: 100_000 }),
mission: makeMission(),
pretickUsage24h: 0,
});
expect(channel(makeState())).toEqual([]);
});
it('surfaces the budget reminder when usage is high', () => {
const channel = buildReminderChannel({
agent: makeAgent({ maxTokensPerDay: 10_000 }),
mission: makeMission(),
pretickUsage24h: 8_000,
});
const out = channel(
makeState({ usage: { promptTokens: 500, completionTokens: 500, totalTokens: 1_000 } })
);
expect(out).toHaveLength(1);
expect(out[0]).toContain('90%');
});
it('uses the updated totalTokens each round (re-evaluated)', () => {
const channel = buildReminderChannel({
agent: makeAgent({ maxTokensPerDay: 10_000 }),
mission: makeMission(),
pretickUsage24h: 5_000,
});
// Round 1 — 50% → silent
expect(channel(makeState())).toEqual([]);
// Round 2 — 5k + 3k = 80% → warns
const round2 = channel(
makeState({
round: 2,
usage: { promptTokens: 1500, completionTokens: 1500, totalTokens: 3_000 },
})
);
expect(round2).toHaveLength(1);
expect(round2[0]).toContain('80%');
});
});

View file

@ -0,0 +1,119 @@
/**
* Per-round reminder producers for the mana-ai mission runner.
*
* Each producer is a small pure function that reads some snapshot (agent
* state, mission metadata, tick-level usage counters) and returns a short
* German string to inject as a `<reminder>` tag on the next LLM call.
* Producers return `null` when there's nothing to say so the caller can
* cleanly filter them out.
*
* Composition happens in `buildReminderChannel()` which wires the active
* producers into a single `ReminderChannel` callback compatible with
* `runPlannerLoop`'s new reminderChannel input. The loop invokes the
* channel once per round; we re-evaluate every producer each round so
* usage drift across rounds (rounds can accumulate 10k+ completion
* tokens) shows up in the NEXT reminder.
*
* See `docs/plans/agent-loop-improvements-m1.md` §2 for the
* Claude-Code `<system-reminder>` pattern this implements.
*/
import type { ReminderChannel } from '@mana/shared-ai';
import type { ServerAgent } from '../db/agents-projection';
import type { ServerMission } from '../db/missions-projection';
export interface ReminderContext {
readonly agent: ServerAgent | null;
readonly mission: ServerMission;
/** Tokens already charged to this agent in the rolling 24h window
* BEFORE the current mission run started. Round-level usage
* accrual is tracked separately by the loop and added on top. */
readonly pretickUsage24h: number;
}
/**
* Warn when the agent is nearing its daily token cap. Threshold at 75 %
* gives the planner room to wind down cleanly before the hard skip at
* 100 % (enforced at tick-level, not here).
*
* Returns null for:
* - missions without an agent (legacy one-off missions)
* - agents without a cap (`maxTokensPerDay == null`)
* - usage below the warn threshold
*/
export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): string | null {
const cap = ctx.agent?.maxTokensPerDay;
if (!ctx.agent || cap == null || cap <= 0) return null;
const total = ctx.pretickUsage24h + roundUsage;
const pct = total / cap;
if (pct < 0.75) return null;
const pctDisplay = Math.round(pct * 100);
const agentName = ctx.agent.name;
if (pct >= 1.0) {
return (
`Agent ${agentName} hat das Tagesbudget komplett ausgeschoepft ` +
`(${total} / ${cap} Tokens = ${pctDisplay}%). Schliesse die ` +
`Mission JETZT mit einer Summary ab — weitere Tool-Calls werden ` +
`kurz nach diesem Turn vom Runner abgeschnitten.`
);
}
return (
`Agent ${agentName} hat ${pctDisplay}% des Tagesbudgets verbraucht ` +
`(${total} / ${cap} Tokens). Plane sparsam — vermeide redundante ` +
`Tool-Calls und liefere zuegig eine abschliessende Plan-Summary.`
);
}
/**
* Nudge the planner to end when it is clearly iterating without new
* information: 3+ rounds in and the last 2 tool-calls returned
* `success: false`. This is a heuristic guard against infinite re-try
* loops where the LLM keeps calling the same failing tool with slightly
* different arguments.
*/
export function retryLoopReminder(state: {
readonly round: number;
readonly lastFailures: readonly boolean[];
}): string | null {
if (state.round < 3) return null;
const recent = state.lastFailures.slice(-2);
if (recent.length === 2 && recent.every((f) => f)) {
return (
`Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` +
`Wiederholung ab — formuliere stattdessen einen Summary-Text, ` +
`der dem Nutzer erklaert, was schief lief.`
);
}
return null;
}
/**
* Build a ReminderChannel that runs every producer per round and returns
* the concatenation of their non-null outputs. Each caller binds the
* context with a closure; the loop only sees the callback.
*
* Ordering: token-budget first (most actionable), retry-loop second.
* Additional producers should slot in before retry-loop unless they
* explicitly supersede it.
*/
export function buildReminderChannel(ctx: ReminderContext): ReminderChannel {
return (state) => {
const failures: boolean[] = [];
// We don't get the full executedCalls in LoopState (intentional —
// the channel is meant to be cheap), but `lastCall` is exposed.
// For retry-loop detection we'd ideally track the last N; for now
// the single lastCall is enough to skip 2-round miss signals, so
// this producer is effectively dormant until we extend LoopState.
// Left in place so the shape is right for M2 follow-ups.
if (state.lastCall) failures.push(!state.lastCall.result.success);
const out: string[] = [];
const budget = tokenBudgetReminder(ctx, state.usage.totalTokens);
if (budget) out.push(budget);
const retry = retryLoopReminder({ round: state.round, lastFailures: failures });
if (retry) out.push(retry);
return out;
};
}