diff --git a/packages/shared-ai/src/index.ts b/packages/shared-ai/src/index.ts index e4ca0bc05..8c52ebcff 100644 --- a/packages/shared-ai/src/index.ts +++ b/packages/shared-ai/src/index.ts @@ -67,12 +67,14 @@ export type { LlmCompletionRequest, LlmCompletionResponse, LlmFinishReason, + LoopState, LoopStopReason, ParseResult, PlannedStep, PlannerLoopInput, PlannerLoopResult, PlannerMessages, + ReminderChannel, ResolvedInput, SystemPromptInput, SystemPromptOutput, diff --git a/services/mana-ai/src/cron/tick.ts b/services/mana-ai/src/cron/tick.ts index 22cb70fa7..b304ffbbc 100644 --- a/services/mana-ai/src/cron/tick.ts +++ b/services/mana-ai/src/cron/tick.ts @@ -50,6 +50,7 @@ import { import { unwrapMissionGrant } from '../crypto/unwrap-grant'; import { detectInjectionMarker } from '@mana/tool-registry'; import { NewsResearchClient } from '../planner/news-research-client'; +import { buildReminderChannel } from '../planner/reminders'; import { ManaResearchClient, type DeepResearchProvider } from '../clients/mana-research'; import { deletePendingResearchJob, @@ -192,6 +193,12 @@ export async function runTickOnce(config: Config): Promise { agentDecisionsTotal.inc({ decision: 'skipped-paused' }); continue; } + // Pretick token usage is surfaced to the reminder channel so the + // planner sees a warning as it approaches the cap, rather than + // getting cut off without explanation. Default 0 when the + // agent has no cap or the query fails (reminder becomes a + // no-op for that mission). + let pretickUsage24h = 0; if (agent) { const used = activeRuns.get(agent.id) ?? 0; if (used >= agent.maxConcurrentMissions) { @@ -200,8 +207,8 @@ export async function runTickOnce(config: Config): Promise { } // Budget enforcement: check rolling 24h token usage. if (agent.maxTokensPerDay != null && agent.maxTokensPerDay >= 0) { - const windowUsage = await getAgentTokenUsage24h(sql, m.userId, agent.id); - if (windowUsage >= agent.maxTokensPerDay) { + pretickUsage24h = await getAgentTokenUsage24h(sql, m.userId, agent.id); + if (pretickUsage24h >= agent.maxTokensPerDay) { agentDecisionsTotal.inc({ decision: 'skipped-budget' }); continue; } @@ -219,7 +226,7 @@ export async function runTickOnce(config: Config): Promise { 'agent.id': agent?.id ?? 'legacy', 'agent.name': agent?.name ?? 'Mana', }, - () => planOneMission(m, llm, sql, agent, config) + () => planOneMission(m, llm, sql, agent, config, pretickUsage24h) ); if (planResult.outcome === 'skipped') { // Deep-research job still running — pick this mission @@ -309,7 +316,8 @@ async function planOneMission( llm: ReturnType, sql: Sql, agent: ServerAgent | null, - config: Config + config: Config, + pretickUsage24h: number ): Promise { const mission = serverMissionToSharedMission(m); // Resolve the mission's Key-Grant (if any) once per tick. An absent @@ -371,6 +379,17 @@ async function planOneMission( const tools = filterToolsByAgentPolicy(SERVER_TOOLS, agent); + // Per-round reminder channel: injects transient hints (token-budget + // warnings today; retry-loop detection, stale-data signals later) + // into the NEXT LLM turn only. See `planner/reminders.ts` for the + // individual producers and the Claude-Code + // rationale. + const reminderChannel = buildReminderChannel({ + agent, + mission: m, + pretickUsage24h, + }); + try { const loopResult = await runPlannerLoop({ llm, @@ -379,6 +398,7 @@ async function planOneMission( userPrompt, tools, model: 'google/gemini-2.5-flash', + reminderChannel, }, // Server-side onToolCall: no execution, just acknowledge. // The captured call lands in loopResult.executedCalls and diff --git a/services/mana-ai/src/planner/reminders.test.ts b/services/mana-ai/src/planner/reminders.test.ts new file mode 100644 index 000000000..1bcabcff3 --- /dev/null +++ b/services/mana-ai/src/planner/reminders.test.ts @@ -0,0 +1,185 @@ +import { describe, expect, it } from 'bun:test'; +import { + buildReminderChannel, + retryLoopReminder, + tokenBudgetReminder, + type ReminderContext, +} from './reminders'; +import type { ServerAgent } from '../db/agents-projection'; +import type { ServerMission } from '../db/missions-projection'; +import type { LoopState } from '@mana/shared-ai'; + +// ─── Fixtures ────────────────────────────────────────────────────── + +function makeAgent(overrides: Partial = {}): ServerAgent { + return { + id: 'agent-1', + userId: 'user-1', + spaceId: 'space-1', + name: 'Mana', + role: null, + systemPrompt: null, + memory: null, + state: 'active', + maxTokensPerDay: 100_000, + maxConcurrentMissions: 3, + policy: null, + updatedAt: '2026-04-23T00:00:00Z', + ...overrides, + } as ServerAgent; +} + +function makeMission(overrides: Partial = {}): ServerMission { + return { + id: 'mission-1', + userId: 'user-1', + spaceId: 'space-1', + title: 'Test', + objective: 'Do the thing', + state: 'active', + nextRunAt: '2026-04-23T00:00:00Z', + iterations: [], + agentId: 'agent-1', + ...overrides, + } as ServerMission; +} + +function makeState(overrides: Partial = {}): LoopState { + return { + round: 1, + toolCallCount: 0, + usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 }, + ...overrides, + }; +} + +// ─── tokenBudgetReminder ────────────────────────────────────────── + +describe('tokenBudgetReminder', () => { + it('returns null when agent has no cap', () => { + const ctx: ReminderContext = { + agent: makeAgent({ maxTokensPerDay: null as unknown as number }), + mission: makeMission(), + pretickUsage24h: 50_000, + }; + expect(tokenBudgetReminder(ctx, 10_000)).toBeNull(); + }); + + it('returns null when agent is absent (legacy mission)', () => { + const ctx: ReminderContext = { agent: null, mission: makeMission(), pretickUsage24h: 0 }; + expect(tokenBudgetReminder(ctx, 99_000)).toBeNull(); + }); + + it('returns null below 75% utilisation', () => { + const ctx: ReminderContext = { + agent: makeAgent({ maxTokensPerDay: 100_000 }), + mission: makeMission(), + pretickUsage24h: 50_000, + }; + expect(tokenBudgetReminder(ctx, 20_000)).toBeNull(); // 70% + }); + + it('warns at the 75% threshold', () => { + const ctx: ReminderContext = { + agent: makeAgent({ maxTokensPerDay: 100_000 }), + mission: makeMission(), + pretickUsage24h: 50_000, + }; + const msg = tokenBudgetReminder(ctx, 25_000); // 75% + expect(msg).not.toBeNull(); + expect(msg).toContain('75%'); + expect(msg).toContain('Mana'); + }); + + it('emits a stronger message at/above 100%', () => { + const ctx: ReminderContext = { + agent: makeAgent({ maxTokensPerDay: 100_000 }), + mission: makeMission(), + pretickUsage24h: 90_000, + }; + const msg = tokenBudgetReminder(ctx, 15_000); // 105% + expect(msg).not.toBeNull(); + expect(msg).toContain('ausgeschoepft'); + expect(msg).toContain('JETZT'); + }); + + it('adds pretick and round usage correctly', () => { + const ctx: ReminderContext = { + agent: makeAgent({ maxTokensPerDay: 100_000 }), + mission: makeMission(), + pretickUsage24h: 80_000, + }; + // 80k + 0k = 80% → warns + expect(tokenBudgetReminder(ctx, 0)).not.toBeNull(); + // 80k + 20k = 100% → exhausted + const exhausted = tokenBudgetReminder(ctx, 20_000); + expect(exhausted).toContain('ausgeschoepft'); + }); +}); + +// ─── retryLoopReminder ──────────────────────────────────────────── + +describe('retryLoopReminder', () => { + it('is silent before round 3', () => { + expect(retryLoopReminder({ round: 2, lastFailures: [true, true] })).toBeNull(); + }); + + it('warns when the last 2 calls failed at round >= 3', () => { + const msg = retryLoopReminder({ round: 3, lastFailures: [true, true] }); + expect(msg).not.toBeNull(); + expect(msg).toContain('fehlgeschlagen'); + }); + + it('stays silent when only one of the last 2 failed', () => { + expect(retryLoopReminder({ round: 4, lastFailures: [false, true] })).toBeNull(); + }); + + it('stays silent with fewer than 2 failures recorded', () => { + expect(retryLoopReminder({ round: 5, lastFailures: [true] })).toBeNull(); + }); +}); + +// ─── buildReminderChannel — composition ─────────────────────────── + +describe('buildReminderChannel', () => { + it('returns an empty array when no producer fires', () => { + const channel = buildReminderChannel({ + agent: makeAgent({ maxTokensPerDay: 100_000 }), + mission: makeMission(), + pretickUsage24h: 0, + }); + expect(channel(makeState())).toEqual([]); + }); + + it('surfaces the budget reminder when usage is high', () => { + const channel = buildReminderChannel({ + agent: makeAgent({ maxTokensPerDay: 10_000 }), + mission: makeMission(), + pretickUsage24h: 8_000, + }); + const out = channel( + makeState({ usage: { promptTokens: 500, completionTokens: 500, totalTokens: 1_000 } }) + ); + expect(out).toHaveLength(1); + expect(out[0]).toContain('90%'); + }); + + it('uses the updated totalTokens each round (re-evaluated)', () => { + const channel = buildReminderChannel({ + agent: makeAgent({ maxTokensPerDay: 10_000 }), + mission: makeMission(), + pretickUsage24h: 5_000, + }); + // Round 1 — 50% → silent + expect(channel(makeState())).toEqual([]); + // Round 2 — 5k + 3k = 80% → warns + const round2 = channel( + makeState({ + round: 2, + usage: { promptTokens: 1500, completionTokens: 1500, totalTokens: 3_000 }, + }) + ); + expect(round2).toHaveLength(1); + expect(round2[0]).toContain('80%'); + }); +}); diff --git a/services/mana-ai/src/planner/reminders.ts b/services/mana-ai/src/planner/reminders.ts new file mode 100644 index 000000000..2036d214a --- /dev/null +++ b/services/mana-ai/src/planner/reminders.ts @@ -0,0 +1,119 @@ +/** + * Per-round reminder producers for the mana-ai mission runner. + * + * Each producer is a small pure function that reads some snapshot (agent + * state, mission metadata, tick-level usage counters) and returns a short + * German string to inject as a `` tag on the next LLM call. + * Producers return `null` when there's nothing to say so the caller can + * cleanly filter them out. + * + * Composition happens in `buildReminderChannel()` which wires the active + * producers into a single `ReminderChannel` callback compatible with + * `runPlannerLoop`'s new reminderChannel input. The loop invokes the + * channel once per round; we re-evaluate every producer each round so + * usage drift across rounds (rounds can accumulate 10k+ completion + * tokens) shows up in the NEXT reminder. + * + * See `docs/plans/agent-loop-improvements-m1.md` §2 for the + * Claude-Code `` pattern this implements. + */ + +import type { ReminderChannel } from '@mana/shared-ai'; +import type { ServerAgent } from '../db/agents-projection'; +import type { ServerMission } from '../db/missions-projection'; + +export interface ReminderContext { + readonly agent: ServerAgent | null; + readonly mission: ServerMission; + /** Tokens already charged to this agent in the rolling 24h window + * BEFORE the current mission run started. Round-level usage + * accrual is tracked separately by the loop and added on top. */ + readonly pretickUsage24h: number; +} + +/** + * Warn when the agent is nearing its daily token cap. Threshold at 75 % + * gives the planner room to wind down cleanly before the hard skip at + * 100 % (enforced at tick-level, not here). + * + * Returns null for: + * - missions without an agent (legacy one-off missions) + * - agents without a cap (`maxTokensPerDay == null`) + * - usage below the warn threshold + */ +export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): string | null { + const cap = ctx.agent?.maxTokensPerDay; + if (!ctx.agent || cap == null || cap <= 0) return null; + + const total = ctx.pretickUsage24h + roundUsage; + const pct = total / cap; + if (pct < 0.75) return null; + + const pctDisplay = Math.round(pct * 100); + const agentName = ctx.agent.name; + if (pct >= 1.0) { + return ( + `Agent ${agentName} hat das Tagesbudget komplett ausgeschoepft ` + + `(${total} / ${cap} Tokens = ${pctDisplay}%). Schliesse die ` + + `Mission JETZT mit einer Summary ab — weitere Tool-Calls werden ` + + `kurz nach diesem Turn vom Runner abgeschnitten.` + ); + } + return ( + `Agent ${agentName} hat ${pctDisplay}% des Tagesbudgets verbraucht ` + + `(${total} / ${cap} Tokens). Plane sparsam — vermeide redundante ` + + `Tool-Calls und liefere zuegig eine abschliessende Plan-Summary.` + ); +} + +/** + * Nudge the planner to end when it is clearly iterating without new + * information: 3+ rounds in and the last 2 tool-calls returned + * `success: false`. This is a heuristic guard against infinite re-try + * loops where the LLM keeps calling the same failing tool with slightly + * different arguments. + */ +export function retryLoopReminder(state: { + readonly round: number; + readonly lastFailures: readonly boolean[]; +}): string | null { + if (state.round < 3) return null; + const recent = state.lastFailures.slice(-2); + if (recent.length === 2 && recent.every((f) => f)) { + return ( + `Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` + + `Wiederholung ab — formuliere stattdessen einen Summary-Text, ` + + `der dem Nutzer erklaert, was schief lief.` + ); + } + return null; +} + +/** + * Build a ReminderChannel that runs every producer per round and returns + * the concatenation of their non-null outputs. Each caller binds the + * context with a closure; the loop only sees the callback. + * + * Ordering: token-budget first (most actionable), retry-loop second. + * Additional producers should slot in before retry-loop unless they + * explicitly supersede it. + */ +export function buildReminderChannel(ctx: ReminderContext): ReminderChannel { + return (state) => { + const failures: boolean[] = []; + // We don't get the full executedCalls in LoopState (intentional — + // the channel is meant to be cheap), but `lastCall` is exposed. + // For retry-loop detection we'd ideally track the last N; for now + // the single lastCall is enough to skip 2-round miss signals, so + // this producer is effectively dormant until we extend LoopState. + // Left in place so the shape is right for M2 follow-ups. + if (state.lastCall) failures.push(!state.lastCall.result.success); + + const out: string[] = []; + const budget = tokenBudgetReminder(ctx, state.usage.totalTokens); + if (budget) out.push(budget); + const retry = retryLoopReminder({ round: state.round, lastFailures: failures }); + if (retry) out.push(retry); + return out; + }; +}