feat(agent-loop): activate retryLoopReminder via LoopState.recentCalls

Extends LoopState with a sliding window of the last N ExecutedCalls (oldest-first), capped at LOOP_STATE_RECENT_CALLS_WINDOW = 5. The loop maintains the window automatically; reminderChannel producers read it without touching internal state. This activates retryLoopReminder which was shape-only in faa472be9. The guard now fires end-to-end: when round >= 3 and the tail-2 calls both returned success:false, the LLM sees a "stop retrying, write a summary instead" <reminder> on the next turn. The tail-2 check rather than window-wide is deliberate — a flaky run with intermittent success (F, F, F, OK, F) is not a retry loop, just flaky tools. Why window=5: retry loops usually manifest within 2-3 consecutive rounds; a 5-deep window gives room for burst-detection and stale-tool heuristics without bloating the reminder channel. Cap keeps the reminder producers O(5) regardless of loop length. Tests: 3 new (sliding-window cap + slide + order in shared-ai, retry composition + budget+retry chain + tail-only heuristic in mana-ai). Total agent-loop tests now 74 across both packages. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 20:21:09 +02:00 · 2026-04-23 15:02:40 +02:00 · 2026-04-23 15:02:40 +02:00 · 8f283726b1
commit 8f283726b1
parent fc635f9830
6 changed files with 171 additions and 24 deletions
--- a/services/mana-ai/CLAUDE.md
+++ b/services/mana-ai/CLAUDE.md
@ -101,8 +101,8 @@ Details zum Deep-Research-Flow: [`docs/reports/gemini-deep-research.md`](../../d

 Claude-Code-inspirierte Primitive in `runPlannerLoop` (live in `@mana/shared-ai`, siehe [`docs/plans/agent-loop-improvements-m1.md`](../../docs/plans/agent-loop-improvements-m1.md)) und deren Konsumierung hier:

- [x] `reminderChannel` wired via `buildReminderChannel()` in `src/planner/reminders.ts`. Erster Live-Producer: `tokenBudgetReminder` — warnt ab 75% Tagesbudget, eskaliert ab 100% mit "JETZT abschliessen"-Prompt. Round-usage wird on-the-fly drauf addiert, so dass der Warn-Level mitwandert.
- [x] `retryLoopReminder` — Shape fertig, aber dormant: LoopState exponiert heute nur `lastCall`, nicht ein Failure-Window. Aktiviert automatisch sobald shared-ai LoopState um `recentResults` erweitert.
+- [x] `reminderChannel` wired via `buildReminderChannel()` in `src/planner/reminders.ts`. Live-Producer 1: `tokenBudgetReminder` — warnt ab 75% Tagesbudget, eskaliert ab 100% mit "JETZT abschliessen"-Prompt. Round-usage wird on-the-fly drauf addiert, so dass der Warn-Level mitwandert.
+- [x] `retryLoopReminder` live — feuert ab Round 3 wenn die letzten 2 Tool-Calls beide fehlschlugen. Liest das `recentCalls`-Sliding-Window (5 Einträge, oldest-first) aus `LoopState`.
 - [x] `POLICY_MODE` env (off/log-only/enforce, default log-only) für die mana-ai-seitige Freitext-Inspection (`detectInjectionMarker`). Rate-Limit und destructive-opt-in sind hier NICHT aktiv — tools werden nur als PlanSteps aufgezeichnet, die echte Enforcement passiert im Webapp-Client.
 - [ ] Parallel-Reads im Server-Tick haben keinen Effekt, weil `SERVER_TOOLS` per Konstruktion propose-only ist. Könnte relevant werden sobald mana-ai die vollständige tool-registry absorbiert (M4 des Personas-Plans).

--- a/services/mana-ai/src/planner/reminders.test.ts
+++ b/services/mana-ai/src/planner/reminders.test.ts
@ -49,10 +49,26 @@ function makeState(overrides: Partial<LoopState> = {}): LoopState {
 		round: 1,
 		toolCallCount: 0,
 		usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
+		recentCalls: [],
 		...overrides,
 	};
 }

+function mkExecutedCall(
+	success: boolean,
+	toolName = 'create_thing'
+): {
+	round: number;
+	call: { id: string; name: string; arguments: Record<string, unknown> };
+	result: { success: boolean; message: string };
+} {
+	return {
+		round: 1,
+		call: { id: crypto.randomUUID(), name: toolName, arguments: {} },
+		result: { success, message: success ? 'ok' : 'boom' },
+	};
+}
+
 // ─── tokenBudgetReminder ──────────────────────────────────────────

 describe('tokenBudgetReminder', () => {
@ -121,21 +137,50 @@ describe('tokenBudgetReminder', () => {

 describe('retryLoopReminder', () => {
 	it('is silent before round 3', () => {
-		expect(retryLoopReminder({ round: 2, lastFailures: [true, true] })).toBeNull();
+		expect(
+			retryLoopReminder({
+				round: 2,
+				recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
+			})
+		).toBeNull();
 	});

 	it('warns when the last 2 calls failed at round >= 3', () => {
-		const msg = retryLoopReminder({ round: 3, lastFailures: [true, true] });
+		const msg = retryLoopReminder({
+			round: 3,
+			recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
+		});
 		expect(msg).not.toBeNull();
 		expect(msg).toContain('fehlgeschlagen');
 	});

 	it('stays silent when only one of the last 2 failed', () => {
-		expect(retryLoopReminder({ round: 4, lastFailures: [false, true] })).toBeNull();
+		expect(
+			retryLoopReminder({
+				round: 4,
+				recentCalls: [mkExecutedCall(true), mkExecutedCall(false)],
+			})
+		).toBeNull();
 	});

-	it('stays silent with fewer than 2 failures recorded', () => {
-		expect(retryLoopReminder({ round: 5, lastFailures: [true] })).toBeNull();
+	it('stays silent with fewer than 2 calls recorded', () => {
+		expect(retryLoopReminder({ round: 5, recentCalls: [mkExecutedCall(false)] })).toBeNull();
+	});
+
+	it('looks only at the TAIL 2 — a flaky run with intermittent success is not a retry loop', () => {
+		// 5 calls: F, F, F, OK, F → tail-2 is [OK, F] → silent
+		expect(
+			retryLoopReminder({
+				round: 5,
+				recentCalls: [
+					mkExecutedCall(false),
+					mkExecutedCall(false),
+					mkExecutedCall(false),
+					mkExecutedCall(true),
+					mkExecutedCall(false),
+				],
+			})
+		).toBeNull();
 	});
 });

@ -164,6 +209,40 @@ describe('buildReminderChannel', () => {
 		expect(out[0]).toContain('90%');
 	});

+	it('fires retryLoopReminder end-to-end through the channel', () => {
+		const channel = buildReminderChannel({
+			agent: makeAgent({ maxTokensPerDay: 1_000_000 }), // budget silent
+			mission: makeMission(),
+			pretickUsage24h: 0,
+		});
+		const out = channel(
+			makeState({
+				round: 4,
+				recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
+			})
+		);
+		expect(out).toHaveLength(1);
+		expect(out[0]).toContain('fehlgeschlagen');
+	});
+
+	it('can fire budget + retry together (composition)', () => {
+		const channel = buildReminderChannel({
+			agent: makeAgent({ maxTokensPerDay: 10_000 }),
+			mission: makeMission(),
+			pretickUsage24h: 9_000,
+		});
+		const out = channel(
+			makeState({
+				round: 3,
+				usage: { promptTokens: 500, completionTokens: 500, totalTokens: 1_000 },
+				recentCalls: [mkExecutedCall(false), mkExecutedCall(false)],
+			})
+		);
+		expect(out).toHaveLength(2);
+		expect(out[0]).toContain('ausgeschoepft'); // budget first
+		expect(out[1]).toContain('fehlgeschlagen'); // retry second
+	});
+
 	it('uses the updated totalTokens each round (re-evaluated)', () => {
 		const channel = buildReminderChannel({
 			agent: makeAgent({ maxTokensPerDay: 10_000 }),
--- a/services/mana-ai/src/planner/reminders.ts
+++ b/services/mana-ai/src/planner/reminders.ts
@ -69,17 +69,22 @@ export function tokenBudgetReminder(ctx: ReminderContext, roundUsage: number): s
 /**
 * Nudge the planner to end when it is clearly iterating without new
 * information: 3+ rounds in and the last 2 tool-calls returned
- * `success: false`. This is a heuristic guard against infinite re-try
- * loops where the LLM keeps calling the same failing tool with slightly
- * different arguments.
+ * `success: false`. Heuristic guard against infinite retry loops where
+ * the LLM keeps calling the same failing tool with slightly different
+ * arguments.
+ *
+ * Reads the `recentCalls` sliding window from LoopState — the last 5
+ * executed calls in oldest-first order. We only look at the tail 2
+ * because a run that mixes failures and successes is not a true retry
+ * loop, it's just flaky tools.
 */
 export function retryLoopReminder(state: {
 	readonly round: number;
-	readonly lastFailures: readonly boolean[];
+	readonly recentCalls: readonly { readonly result: { readonly success: boolean } }[];
 }): string | null {
 	if (state.round < 3) return null;
-	const recent = state.lastFailures.slice(-2);
-	if (recent.length === 2 && recent.every((f) => f)) {
+	const tail = state.recentCalls.slice(-2);
+	if (tail.length === 2 && tail.every((ec) => !ec.result.success)) {
 		return (
 			`Die letzten 2 Tool-Calls sind fehlgeschlagen. Brich die ` +
 			`Wiederholung ab — formuliere stattdessen einen Summary-Text, ` +
@ -100,19 +105,10 @@ export function retryLoopReminder(state: {
 */
 export function buildReminderChannel(ctx: ReminderContext): ReminderChannel {
 	return (state) => {
-		const failures: boolean[] = [];
-		// We don't get the full executedCalls in LoopState (intentional —
-		// the channel is meant to be cheap), but `lastCall` is exposed.
-		// For retry-loop detection we'd ideally track the last N; for now
-		// the single lastCall is enough to skip 2-round miss signals, so
-		// this producer is effectively dormant until we extend LoopState.
-		// Left in place so the shape is right for M2 follow-ups.
-		if (state.lastCall) failures.push(!state.lastCall.result.success);
-
 		const out: string[] = [];
 		const budget = tokenBudgetReminder(ctx, state.usage.totalTokens);
 		if (budget) out.push(budget);
-		const retry = retryLoopReminder({ round: state.round, lastFailures: failures });
+		const retry = retryLoopReminder({ round: state.round, recentCalls: state.recentCalls });
 		if (retry) out.push(retry);
 		return out;
 	};