diff --git a/packages/shared-ai/src/planner/index.ts b/packages/shared-ai/src/planner/index.ts
index 3c36de0e7..ae837a314 100644
--- a/packages/shared-ai/src/planner/index.ts
+++ b/packages/shared-ai/src/planner/index.ts
@@ -9,7 +9,7 @@ export type { AiPlanInput, AiPlanOutput, AvailableTool, PlannedStep, ResolvedInp
// coexist within the atomic PR.
export { buildSystemPrompt } from './system-prompt';
export type { SystemPromptInput, SystemPromptOutput } from './system-prompt';
-export { runPlannerLoop } from './loop';
+export { runPlannerLoop, LOOP_STATE_RECENT_CALLS_WINDOW, PARALLEL_TOOL_BATCH_SIZE } from './loop';
export { MockLlmClient } from './mock-llm';
export type { MockLlmTurn } from './mock-llm';
export type {
diff --git a/packages/shared-ai/src/planner/loop.test.ts b/packages/shared-ai/src/planner/loop.test.ts
index 1d535b6ae..82263a0e9 100644
--- a/packages/shared-ai/src/planner/loop.test.ts
+++ b/packages/shared-ai/src/planner/loop.test.ts
@@ -396,6 +396,62 @@ describe('runPlannerLoop — reminderChannel', () => {
expect(reminders[0].content).toBe('round-2');
});
+ it('exposes recentCalls as a sliding window, oldest-first', async () => {
+ // 7 rounds, each with one tool call, so by round 7 we have 6 prior
+ // results — the window must cap at LOOP_STATE_RECENT_CALLS_WINDOW = 5.
+ const llm = new MockLlmClient();
+ for (let i = 0; i < 7; i++) {
+ llm.enqueueToolCalls([{ name: 'list_things', args: { i } }]);
+ }
+ llm.enqueueStop();
+
+ const windowsSeen: Array> = [];
+ await runPlannerLoop({
+ llm,
+ input: {
+ systemPrompt: 's',
+ userPrompt: 'u',
+ tools,
+ model: 'm',
+ maxRounds: 10,
+ reminderChannel: (state) => {
+ windowsSeen.push(
+ state.recentCalls.map((ec) => ({
+ i: ec.call.arguments.i,
+ ok: ec.result.success,
+ }))
+ );
+ return [];
+ },
+ },
+ onToolCall: async (call) => ({
+ success: true,
+ message: `ok-${call.arguments.i}`,
+ }),
+ });
+
+ // Round 1 → window empty
+ expect(windowsSeen[0]).toEqual([]);
+ // Round 2 → one prior call
+ expect(windowsSeen[1]).toEqual([{ i: 0, ok: true }]);
+ // Round 6 → five prior calls, oldest-first
+ expect(windowsSeen[5]).toEqual([
+ { i: 0, ok: true },
+ { i: 1, ok: true },
+ { i: 2, ok: true },
+ { i: 3, ok: true },
+ { i: 4, ok: true },
+ ]);
+ // Round 7 → window slides; i=0 drops off, i=5 is newest
+ expect(windowsSeen[6]).toEqual([
+ { i: 1, ok: true },
+ { i: 2, ok: true },
+ { i: 3, ok: true },
+ { i: 4, ok: true },
+ { i: 5, ok: true },
+ ]);
+ });
+
it('surfaces loop state — toolCallCount and lastCall — to the channel', async () => {
const llm = new MockLlmClient()
.enqueueToolCalls([{ name: 'list_things', args: {} }])
diff --git a/packages/shared-ai/src/planner/loop.ts b/packages/shared-ai/src/planner/loop.ts
index afd6648f2..747cb247f 100644
--- a/packages/shared-ai/src/planner/loop.ts
+++ b/packages/shared-ai/src/planner/loop.ts
@@ -69,6 +69,12 @@ export interface LlmClient {
// ─── Loop input / result ────────────────────────────────────────────
+/** Sliding-window size for `LoopState.recentCalls`. Capped so the
+ * reminder channel stays cheap and hint-producers can only reason
+ * over the last handful of calls, which is what retry-loop-style
+ * heuristics need. */
+export const LOOP_STATE_RECENT_CALLS_WINDOW = 5;
+
/**
* Transient loop state surfaced to the reminderChannel. The reminder
* callback is pure — it reads this snapshot and returns hints; it does
@@ -86,6 +92,14 @@ export interface LoopState {
/** The most recent ExecutedCall, or undefined in round 1. Handy for
* "the last tool failed — warn the LLM" producers. */
readonly lastCall?: ExecutedCall;
+ /**
+ * Sliding window of the last N (= `LOOP_STATE_RECENT_CALLS_WINDOW`)
+ * ExecutedCalls in source order, oldest first. Used by producers
+ * that need more than the single-last signal — retry-loop detection
+ * (N consecutive failures), burst detection (many calls to the same
+ * tool), and similar. Empty in round 1; grows up to the cap.
+ */
+ readonly recentCalls: readonly ExecutedCall[];
}
/**
@@ -202,6 +216,7 @@ export async function runPlannerLoop(opts: {
// — the reminders are ephemeral steering, not conversation.
let requestMessages: readonly ChatMessage[] = messages;
if (input.reminderChannel) {
+ const recentCalls = executedCalls.slice(-LOOP_STATE_RECENT_CALLS_WINDOW);
const state: LoopState = {
round: rounds,
toolCallCount: executedCalls.length,
@@ -211,6 +226,7 @@ export async function runPlannerLoop(opts: {
totalTokens: promptTokens + completionTokens,
},
lastCall: executedCalls[executedCalls.length - 1],
+ recentCalls,
};
const reminders = input.reminderChannel(state);
if (reminders.length > 0) {
diff --git a/services/mana-ai/CLAUDE.md b/services/mana-ai/CLAUDE.md
index 1b27ac918..7d083565b 100644
--- a/services/mana-ai/CLAUDE.md
+++ b/services/mana-ai/CLAUDE.md
@@ -101,8 +101,8 @@ Details zum Deep-Research-Flow: [`docs/reports/gemini-deep-research.md`](../../d
Claude-Code-inspirierte Primitive in `runPlannerLoop` (live in `@mana/shared-ai`, siehe [`docs/plans/agent-loop-improvements-m1.md`](../../docs/plans/agent-loop-improvements-m1.md)) und deren Konsumierung hier:
-- [x] `reminderChannel` wired via `buildReminderChannel()` in `src/planner/reminders.ts`. Erster Live-Producer: `tokenBudgetReminder` — warnt ab 75% Tagesbudget, eskaliert ab 100% mit "JETZT abschliessen"-Prompt. Round-usage wird on-the-fly drauf addiert, so dass der Warn-Level mitwandert.
-- [x] `retryLoopReminder` — Shape fertig, aber dormant: LoopState exponiert heute nur `lastCall`, nicht ein Failure-Window. Aktiviert automatisch sobald shared-ai LoopState um `recentResults` erweitert.
+- [x] `reminderChannel` wired via `buildReminderChannel()` in `src/planner/reminders.ts`. Live-Producer 1: `tokenBudgetReminder` — warnt ab 75% Tagesbudget, eskaliert ab 100% mit "JETZT abschliessen"-Prompt. Round-usage wird on-the-fly drauf addiert, so dass der Warn-Level mitwandert.
+- [x] `retryLoopReminder` live — feuert ab Round 3 wenn die letzten 2 Tool-Calls beide fehlschlugen. Liest das `recentCalls`-Sliding-Window (5 Einträge, oldest-first) aus `LoopState`.
- [x] `POLICY_MODE` env (off/log-only/enforce, default log-only) für die mana-ai-seitige Freitext-Inspection (`detectInjectionMarker`). Rate-Limit und destructive-opt-in sind hier NICHT aktiv — tools werden nur als PlanSteps aufgezeichnet, die echte Enforcement passiert im Webapp-Client.
- [ ] Parallel-Reads im Server-Tick haben keinen Effekt, weil `SERVER_TOOLS` per Konstruktion propose-only ist. Könnte relevant werden sobald mana-ai die vollständige tool-registry absorbiert (M4 des Personas-Plans).
diff --git a/services/mana-ai/src/planner/reminders.test.ts b/services/mana-ai/src/planner/reminders.test.ts
index 1bcabcff3..363cf7a66 100644
--- a/services/mana-ai/src/planner/reminders.test.ts
+++ b/services/mana-ai/src/planner/reminders.test.ts
@@ -49,10 +49,26 @@ function makeState(overrides: Partial = {}): LoopState {
round: 1,
toolCallCount: 0,
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
+ recentCalls: [],
...overrides,
};
}
+function mkExecutedCall(
+ success: boolean,
+ toolName = 'create_thing'
+): {
+ round: number;
+ call: { id: string; name: string; arguments: Record };
+ result: { success: boolean; message: string };
+} {
+ return {
+ round: 1,
+ call: { id: crypto.randomUUID(), name: toolName, arguments: {} },
+ result: { success, message: success ? 'ok' : 'boom' },
+ };
+}
+
// ─── tokenBudgetReminder ──────────────────────────────────────────
describe('tokenBudgetReminder', () => {
@@ -121,21 +137,50 @@ describe('tokenBudgetReminder', () => {
describe('retryLoopReminder', () => {
it('is silent before round 3', () => {
- expect(retryLoopReminder({ round: 2, lastFailures: [true, true] })).toBeNull();
+ expect(
+ retryLoopReminder({
+ round: 2,
+ recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
+ })
+ ).toBeNull();
});
it('warns when the last 2 calls failed at round >= 3', () => {
- const msg = retryLoopReminder({ round: 3, lastFailures: [true, true] });
+ const msg = retryLoopReminder({
+ round: 3,
+ recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
+ });
expect(msg).not.toBeNull();
expect(msg).toContain('fehlgeschlagen');
});
it('stays silent when only one of the last 2 failed', () => {
- expect(retryLoopReminder({ round: 4, lastFailures: [false, true] })).toBeNull();
+ expect(
+ retryLoopReminder({
+ round: 4,
+ recentCalls: [mkExecutedCall(true), mkExecutedCall(false)],
+ })
+ ).toBeNull();
});
- it('stays silent with fewer than 2 failures recorded', () => {
- expect(retryLoopReminder({ round: 5, lastFailures: [true] })).toBeNull();
+ it('stays silent with fewer than 2 calls recorded', () => {
+ expect(retryLoopReminder({ round: 5, recentCalls: [mkExecutedCall(false)] })).toBeNull();
+ });
+
+ it('looks only at the TAIL 2 — a flaky run with intermittent success is not a retry loop', () => {
+ // 5 calls: F, F, F, OK, F → tail-2 is [OK, F] → silent
+ expect(
+ retryLoopReminder({
+ round: 5,
+ recentCalls: [
+ mkExecutedCall(false),
+ mkExecutedCall(false),
+ mkExecutedCall(false),
+ mkExecutedCall(true),
+ mkExecutedCall(false),
+ ],
+ })
+ ).toBeNull();
});
});
@@ -164,6 +209,40 @@ describe('buildReminderChannel', () => {
expect(out[0]).toContain('90%');
});
+ it('fires retryLoopReminder end-to-end through the channel', () => {
+ const channel = buildReminderChannel({
+ agent: makeAgent({ maxTokensPerDay: 1_000_000 }), // budget silent
+ mission: makeMission(),
+ pretickUsage24h: 0,
+ });
+ const out = channel(
+ makeState({
+ round: 4,
+ recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
+ })
+ );
+ expect(out).toHaveLength(1);
+ expect(out[0]).toContain('fehlgeschlagen');
+ });
+
+ it('can fire budget + retry together (composition)', () => {
+ const channel = buildReminderChannel({
+ agent: makeAgent({ maxTokensPerDay: 10_000 }),
+ mission: makeMission(),
+ pretickUsage24h: 9_000,
+ });
+ const out = channel(
+ makeState({
+ round: 3,
+ usage: { promptTokens: 500, completionTokens: 500, totalTokens: 1_000 },
+ recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
+ })
+ );
+ expect(out).toHaveLength(2);
+ expect(out[0]).toContain('ausgeschoepft'); // budget first
+ expect(out[1]).toContain('fehlgeschlagen'); // retry second
+ });
+
it('uses the updated totalTokens each round (re-evaluated)', () => {
const channel = buildReminderChannel({
agent: makeAgent({ maxTokensPerDay: 10_000 }),
diff --git a/services/mana-ai/src/planner/reminders.ts b/services/mana-ai/src/planner/reminders.ts
index 2036d214a..e9e39ccd6 100644
--- a/services/mana-ai/src/planner/reminders.ts
+++ b/services/mana-ai/src/planner/reminders.ts
@@ -69,17 +69,22 @@ export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): s
/**
* Nudge the planner to end when it is clearly iterating without new
* information: 3+ rounds in and the last 2 tool-calls returned
- * `success: false`. This is a heuristic guard against infinite re-try
- * loops where the LLM keeps calling the same failing tool with slightly
- * different arguments.
+ * `success: false`. Heuristic guard against infinite retry loops where
+ * the LLM keeps calling the same failing tool with slightly different
+ * arguments.
+ *
+ * Reads the `recentCalls` sliding window from LoopState — the last 5
+ * executed calls in oldest-first order. We only look at the tail 2
+ * because a run that mixes failures and successes is not a true retry
+ * loop, it's just flaky tools.
*/
export function retryLoopReminder(state: {
readonly round: number;
- readonly lastFailures: readonly boolean[];
+ readonly recentCalls: readonly { readonly result: { readonly success: boolean } }[];
}): string | null {
if (state.round < 3) return null;
- const recent = state.lastFailures.slice(-2);
- if (recent.length === 2 && recent.every((f) => f)) {
+ const tail = state.recentCalls.slice(-2);
+ if (tail.length === 2 && tail.every((ec) => !ec.result.success)) {
return (
`Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` +
`Wiederholung ab — formuliere stattdessen einen Summary-Text, ` +
@@ -100,19 +105,10 @@ export function retryLoopReminder(state: {
*/
export function buildReminderChannel(ctx: ReminderContext): ReminderChannel {
return (state) => {
- const failures: boolean[] = [];
- // We don't get the full executedCalls in LoopState (intentional —
- // the channel is meant to be cheap), but `lastCall` is exposed.
- // For retry-loop detection we'd ideally track the last N; for now
- // the single lastCall is enough to skip 2-round miss signals, so
- // this producer is effectively dormant until we extend LoopState.
- // Left in place so the shape is right for M2 follow-ups.
- if (state.lastCall) failures.push(!state.lastCall.result.success);
-
const out: string[] = [];
const budget = tokenBudgetReminder(ctx, state.usage.totalTokens);
if (budget) out.push(budget);
- const retry = retryLoopReminder({ round: state.round, lastFailures: failures });
+ const retry = retryLoopReminder({ round: state.round, recentCalls: state.recentCalls });
if (retry) out.push(retry);
return out;
};