managarten/packages/shared-ai/src/planner/loop.ts
Till JS 72f7978ed4 feat(agent-loop): expose compactionsDone + compactedReminder producer
Closes the loop on M2: when the compactor fires, the LLM needs to know
it's now seeing a <compact-summary> instead of raw turns so it
doesn't waste a turn asking about lost details or re-executing tools
whose responses are gone.

shared-ai:
  - LoopState grows `compactionsDone: number` (cap-1 by current loop
    policy, but shape kept as count for future multi-compact cycles).
  - runPlannerLoop populates it on each reminder-channel call. New
    loop test asserts [0, 1] sequence: round 1 before compaction,
    round 2 after.

mana-ai:
  - New producer `compactedReminder` — fires severity=info when
    compactionsDone >= 1, wrapped in a German one-liner ("frag nicht
    nach verlorenen Details").
  - Injected FIRST in buildReminderChannel so the LLM frames the rest
    of the round with "I'm looking at a summary" context. Metric
    surface stays `{producer='compacted', severity='info'}`.

4 new reminder tests (3 pure producer + 1 composition-ordering) +
1 loop-wiring test. 77 shared-ai, 20 reminders.test.ts — green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 15:36:21 +02:00

400 lines
15 KiB
TypeScript

/**
* Multi-turn tool-calling loop shared between the webapp runner and the
* server-side mana-ai tick. Replaces the text-JSON planner pipeline:
* we hand the LLM a tool catalog, it emits native tool_calls, we
* execute them and feed the results back as tool-messages until the
* LLM has nothing more to call (or we hit the round budget).
*
* Environment-specific concerns (HTTP transport, auth, actor
* attribution) live in the caller-provided ``LlmClient`` and
* ``onToolCall`` callback. The loop itself stays pure.
*/
import type { ToolSchema } from '../tools/schemas';
import type { ToolSpec } from '../tools/function-schema';
import { toolsToFunctionSchemas } from '../tools/function-schema';
// ─── Chat-message contract ──────────────────────────────────────────
export interface ToolCallRequest {
readonly id: string;
readonly name: string;
readonly arguments: Record<string, unknown>;
}
export interface ToolResult {
readonly success: boolean;
readonly data?: unknown;
readonly message: string;
}
export type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
export interface ChatMessage {
readonly role: ChatRole;
readonly content?: string | null;
readonly toolCalls?: readonly ToolCallRequest[];
readonly toolCallId?: string;
}
// ─── LLM client contract ────────────────────────────────────────────
export interface LlmCompletionRequest {
readonly messages: readonly ChatMessage[];
readonly tools: readonly ToolSpec[];
readonly model: string;
readonly temperature?: number;
}
export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter';
export interface TokenUsage {
readonly promptTokens: number;
readonly completionTokens: number;
readonly totalTokens: number;
}
export interface LlmCompletionResponse {
readonly content: string | null;
readonly toolCalls: readonly ToolCallRequest[];
readonly finishReason: LlmFinishReason;
/** Token counts for this one call — propagated from the provider
* response when available. Summed across rounds in PlannerLoopResult. */
readonly usage?: TokenUsage;
}
export interface LlmClient {
complete(req: LlmCompletionRequest): Promise<LlmCompletionResponse>;
}
// ─── Loop input / result ────────────────────────────────────────────
/** Sliding-window size for `LoopState.recentCalls`. Capped so the
* reminder channel stays cheap and hint-producers can only reason
* over the last handful of calls, which is what retry-loop-style
* heuristics need. */
export const LOOP_STATE_RECENT_CALLS_WINDOW = 5;
/**
* Transient loop state surfaced to the reminderChannel. The reminder
* callback is pure — it reads this snapshot and returns hints; it does
* not mutate anything.
*/
export interface LoopState {
/** 1-based round index for the CURRENT LLM call (before it runs). */
readonly round: number;
/** Number of tool calls executed across all prior rounds. */
readonly toolCallCount: number;
/** Accumulated tokens reported by the provider, up to (but not
* including) the current round's call. Zero when the provider
* hasn't reported usage. */
readonly usage: TokenUsage;
/** The most recent ExecutedCall, or undefined in round 1. Handy for
* "the last tool failed — warn the LLM" producers. */
readonly lastCall?: ExecutedCall;
/**
* Sliding window of the last N (= `LOOP_STATE_RECENT_CALLS_WINDOW`)
* ExecutedCalls in source order, oldest first. Used by producers
* that need more than the single-last signal — retry-loop detection
* (N consecutive failures), burst detection (many calls to the same
* tool), and similar. Empty in round 1; grows up to the cap.
*/
readonly recentCalls: readonly ExecutedCall[];
/**
* Number of times the compactor has folded the message history in
* this loop run. Capped at 1 by the loop itself (fire-once policy),
* but still exposed as a count rather than a boolean so future
* policies (e.g. multi-compact cycles) don't need a breaking API
* change. A producer can use this to inject a "just compacted"
* reminder on the round immediately after compaction.
*/
readonly compactionsDone: number;
}
/**
* Callback that yields transient system-message strings to attach to the
* NEXT LLM request only. Returned strings are wrapped in `<reminder>…
* </reminder>` tags and injected as system messages AFTER the persistent
* `messages` history. They are NEVER written back to `messages[]` and
* therefore NEVER appear in `PlannerLoopResult.messages`.
*
* This is the Claude-Code `<system-reminder>` pattern: steering the model
* per-turn without polluting the persisted conversation log or
* invalidating the provider's KV-cache on stable prefixes.
*/
export type ReminderChannel = (state: LoopState) => readonly string[];
export interface PlannerLoopInput {
readonly systemPrompt: string;
readonly userPrompt: string;
/** Optional prior conversation turns inserted between the system
* prompt and the new user turn. Used by the companion chat to
* preserve multi-turn history; missions leave this empty. */
readonly priorMessages?: readonly ChatMessage[];
readonly tools: readonly ToolSchema[];
readonly model: string;
readonly temperature?: number;
/** Hard ceiling on planner rounds. Each round = one LLM call plus
* whatever tool executions its output triggered. Defaults to 5. */
readonly maxRounds?: number;
/** Optional per-round reminder producer — see ReminderChannel docs. */
readonly reminderChannel?: ReminderChannel;
/**
* Predicate that decides whether a tool is safe to execute in parallel
* with other tools of the same stripe. Claude-Code `gW5` pattern: when
* every tool_call in a round is parallel-safe, they run via Promise.all
* in batches of 10; if any call is NOT parallel-safe, the whole batch
* falls back to sequential (preserves ordering invariants for
* write-after-read chains).
*
* Default: `() => false` → fully sequential, matching pre-M1 behaviour.
*
* The predicate is called once per tool_call per round, so cheap
* constant-time lookups are expected (registry hit, name-prefix check).
*/
readonly isParallelSafe?: (toolName: string) => boolean;
/**
* Context-window compactor wiring (Claude-Code `wU2` pattern).
*
* When set AND usage crosses the threshold, the loop replaces the
* middle of the message history with a compact summary before the
* next LLM call. The compact summary is persisted in the returned
* `messages` — unlike reminders, this IS part of the canonical
* history because raw turns got dropped.
*
* Contract:
* - `maxContextTokens`: provider ceiling; compactor skips when unset
* (matches `shouldCompact()`'s safe-bail behaviour).
* - `compact`: async callback that performs the compaction. Pass
* `compactHistory` from this package or an adapter that uses a
* cheaper model (e.g. Haiku) for the compactor's LLM call.
* - `threshold`: optional override, default 0.92.
*
* Compaction fires at MOST once per loop run — once a round has been
* compacted, we don't re-trigger until the next run, even if the
* fresh history hits the threshold again (defence-in-depth against
* a runaway tool that keeps bloating turns).
*/
readonly compactor?: {
readonly maxContextTokens: number;
readonly threshold?: number;
readonly compact: (
messages: readonly ChatMessage[]
) => Promise<{ readonly messages: readonly ChatMessage[]; readonly compactedTurns: number }>;
};
}
/** Max concurrent tool executions per round. Mirrors Claude Code's gW5
* ceiling. Keeps tail latency bounded when the LLM requests many reads
* at once and protects downstream services from unbounded fan-out. */
export const PARALLEL_TOOL_BATCH_SIZE = 10;
export interface ExecutedCall {
readonly round: number;
readonly call: ToolCallRequest;
readonly result: ToolResult;
}
export type LoopStopReason = 'assistant-stop' | 'max-rounds' | 'no-tool-calls' | 'llm-error';
export interface PlannerLoopResult {
readonly rounds: number;
readonly executedCalls: readonly ExecutedCall[];
/** Final assistant text when the LLM stopped instead of calling a
* tool. ``null`` when the last turn was a tool-call burst that we
* cut off via round budget. */
readonly summary: string | null;
readonly stopReason: LoopStopReason;
/** Complete chat history for debug-log capture (system + user +
* every assistant/tool turn). Never synced — contains decrypted
* user content. */
readonly messages: readonly ChatMessage[];
/** Accumulated token usage across every LLM round. Zero counts when
* the provider didn't report usage. Consumers use this for budget
* tracking (mana-ai's per-agent daily limit) and cost telemetry. */
readonly usage: TokenUsage;
}
// ─── The loop ───────────────────────────────────────────────────────
const DEFAULT_MAX_ROUNDS = 5;
export async function runPlannerLoop(opts: {
readonly llm: LlmClient;
readonly input: PlannerLoopInput;
/** Execute a tool call and return the result that should be fed back
* to the LLM as a tool-message. Must not throw — convert errors to
* ``{ success: false, message }``. The loop injects the result
* verbatim so the LLM can reason over failures (e.g. "vault locked
* → ask user to unlock"). */
readonly onToolCall: (call: ToolCallRequest) => Promise<ToolResult>;
}): Promise<PlannerLoopResult> {
const { llm, input, onToolCall } = opts;
const maxRounds = input.maxRounds ?? DEFAULT_MAX_ROUNDS;
const toolSpecs = toolsToFunctionSchemas(input.tools);
const messages: ChatMessage[] = [
{ role: 'system', content: input.systemPrompt },
...(input.priorMessages ?? []),
{ role: 'user', content: input.userPrompt },
];
const executedCalls: ExecutedCall[] = [];
let summary: string | null = null;
let stopReason: LoopStopReason = 'max-rounds';
let rounds = 0;
let promptTokens = 0;
let completionTokens = 0;
let compactionsDone = 0;
while (rounds < maxRounds) {
rounds++;
// Context-window compactor (Claude-Code `wU2`): check BEFORE the
// next LLM call whether the previous round's usage crossed the
// threshold; if so, replace the middle of `messages` with a
// compact summary. Fire at most once per loop run so a runaway
// tool can't keep re-triggering.
if (input.compactor && compactionsDone === 0) {
const total = promptTokens + completionTokens;
const cap = input.compactor.maxContextTokens;
const threshold = input.compactor.threshold ?? 0.92;
if (cap > 0 && total > 0 && total / cap >= threshold) {
const compactResult = await input.compactor.compact(messages);
if (compactResult.compactedTurns > 0) {
messages.length = 0;
for (const m of compactResult.messages) messages.push(m);
compactionsDone++;
}
}
}
// Per-round reminder injection: ask the channel for transient
// hints, wrap each in <reminder> tags, and prepend them as system
// messages to THIS request only. Nothing gets pushed to `messages`
// — the reminders are ephemeral steering, not conversation.
let requestMessages: readonly ChatMessage[] = messages;
if (input.reminderChannel) {
const recentCalls = executedCalls.slice(-LOOP_STATE_RECENT_CALLS_WINDOW);
const state: LoopState = {
round: rounds,
toolCallCount: executedCalls.length,
usage: {
promptTokens,
completionTokens,
totalTokens: promptTokens + completionTokens,
},
lastCall: executedCalls[executedCalls.length - 1],
recentCalls,
compactionsDone,
};
const reminders = input.reminderChannel(state);
if (reminders.length > 0) {
const reminderMessages: ChatMessage[] = reminders.map((text) => ({
role: 'system',
content: `<reminder>${text}</reminder>`,
}));
requestMessages = [...messages, ...reminderMessages];
}
}
const response = await llm.complete({
messages: requestMessages,
tools: toolSpecs,
model: input.model,
temperature: input.temperature,
});
if (response.usage) {
promptTokens += response.usage.promptTokens;
completionTokens += response.usage.completionTokens;
}
// Append the assistant turn to history before we execute any
// tools — the LLM needs to see its own prior tool_calls alongside
// the tool-message results in the next turn.
messages.push({
role: 'assistant',
content: response.content,
toolCalls: response.toolCalls.length > 0 ? response.toolCalls : undefined,
});
if (response.toolCalls.length === 0) {
summary = response.content;
stopReason = response.finishReason === 'stop' ? 'assistant-stop' : 'no-tool-calls';
break;
}
// Tool execution.
//
// Sequential by default. When the caller supplies `isParallelSafe`
// and EVERY call in this round passes it, we dispatch in batches
// of PARALLEL_TOOL_BATCH_SIZE via Promise.all. A single unsafe
// call in the batch downgrades the whole round to sequential —
// this preserves semantics for write-after-read chains without
// pushing the decision onto the model.
//
// In both modes we append to `messages` in the LLM's original
// call order, not completion order, so the debug-log stays linear.
const calls = response.toolCalls;
const parallelSafePredicate = input.isParallelSafe;
const allParallelSafe =
!!parallelSafePredicate &&
calls.length > 1 &&
calls.every((c) => parallelSafePredicate(c.name));
if (allParallelSafe) {
for (let i = 0; i < calls.length; i += PARALLEL_TOOL_BATCH_SIZE) {
const batch = calls.slice(i, i + PARALLEL_TOOL_BATCH_SIZE);
const results = await Promise.all(batch.map((call) => onToolCall(call)));
for (let j = 0; j < batch.length; j++) {
const call = batch[j];
const result = results[j];
executedCalls.push({ round: rounds, call, result });
messages.push({
role: 'tool',
toolCallId: call.id,
content: JSON.stringify({
success: result.success,
message: result.message,
...(result.data !== undefined ? { data: result.data } : {}),
}),
});
}
}
} else {
for (const call of calls) {
const result = await onToolCall(call);
executedCalls.push({ round: rounds, call, result });
messages.push({
role: 'tool',
toolCallId: call.id,
content: JSON.stringify({
success: result.success,
message: result.message,
...(result.data !== undefined ? { data: result.data } : {}),
}),
});
}
}
// If the round limit is about to hit, surface it as the reason —
// the outer consumer can mark the iteration as incomplete.
if (rounds >= maxRounds) {
stopReason = 'max-rounds';
break;
}
}
return {
rounds,
executedCalls,
summary,
stopReason,
messages,
usage: {
promptTokens,
completionTokens,
totalTokens: promptTokens + completionTokens,
},
};
}