mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 10:21:10 +02:00
Three Claude-Code-inspired primitives for runPlannerLoop, derived from the
reverse-engineering reports in docs/reports/:
1. **Policy gate** (@mana/tool-registry) — evaluatePolicy() gates every tool
dispatch: denies admin-scope, denies destructive tools not in the user's
opt-in list, rate-limits per tool (30/60s default), flags prompt-injection
markers in freetext without blocking. Wired into mana-mcp with a
per-user rolling invocation log and POLICY_MODE env (off|log-only|enforce,
default log-only). mana-ai uses detectInjectionMarker only — tool dispatch
there is plan-only, so rate-limit/destructive checks don't apply yet.
2. **Reminder channel** (packages/shared-ai/src/planner/loop.ts) — new
reminderChannel callback in PlannerLoopInput. Called once per round with
LoopState snapshot (round, toolCallCount, usage, lastCall); returned
strings wrap in <reminder> tags and inject as transient system messages
into THIS LLM request only. Never pushed to messages[] — the Claude-Code
<system-reminder> pattern that keeps the KV-cache prefix stable.
3. **Parallel reads** (loop.ts) — isParallelSafe predicate enables
Promise.all dispatch when every tool_call in a round is parallel-safe,
in batches of PARALLEL_TOOL_BATCH_SIZE=10. Any non-safe call downgrades
the whole round to sequential. messages[] always appends in source
order, never completion order, so the debug log stays linear.
Default-off (undefined predicate) preserves pre-M1 behaviour.
Tests: 21 new in tool-registry (policy), 9 new in shared-ai (5 parallel,
4 reminder). All 74 green, type-check clean across 4 packages.
Design/plan: docs/plans/agent-loop-improvements-m1.md
Reports: docs/reports/claude-code-architecture.md,
docs/reports/mana-agent-improvements-from-claude-code.md
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
324 lines
12 KiB
TypeScript
324 lines
12 KiB
TypeScript
/**
|
|
* Multi-turn tool-calling loop shared between the webapp runner and the
|
|
* server-side mana-ai tick. Replaces the text-JSON planner pipeline:
|
|
* we hand the LLM a tool catalog, it emits native tool_calls, we
|
|
* execute them and feed the results back as tool-messages until the
|
|
* LLM has nothing more to call (or we hit the round budget).
|
|
*
|
|
* Environment-specific concerns (HTTP transport, auth, actor
|
|
* attribution) live in the caller-provided ``LlmClient`` and
|
|
* ``onToolCall`` callback. The loop itself stays pure.
|
|
*/
|
|
|
|
import type { ToolSchema } from '../tools/schemas';
|
|
import type { ToolSpec } from '../tools/function-schema';
|
|
import { toolsToFunctionSchemas } from '../tools/function-schema';
|
|
|
|
// ─── Chat-message contract ──────────────────────────────────────────
|
|
|
|
export interface ToolCallRequest {
|
|
readonly id: string;
|
|
readonly name: string;
|
|
readonly arguments: Record<string, unknown>;
|
|
}
|
|
|
|
export interface ToolResult {
|
|
readonly success: boolean;
|
|
readonly data?: unknown;
|
|
readonly message: string;
|
|
}
|
|
|
|
export type ChatRole = 'system' | 'user' | 'assistant' | 'tool';
|
|
|
|
export interface ChatMessage {
|
|
readonly role: ChatRole;
|
|
readonly content?: string | null;
|
|
readonly toolCalls?: readonly ToolCallRequest[];
|
|
readonly toolCallId?: string;
|
|
}
|
|
|
|
// ─── LLM client contract ────────────────────────────────────────────
|
|
|
|
export interface LlmCompletionRequest {
|
|
readonly messages: readonly ChatMessage[];
|
|
readonly tools: readonly ToolSpec[];
|
|
readonly model: string;
|
|
readonly temperature?: number;
|
|
}
|
|
|
|
export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter';
|
|
|
|
export interface TokenUsage {
|
|
readonly promptTokens: number;
|
|
readonly completionTokens: number;
|
|
readonly totalTokens: number;
|
|
}
|
|
|
|
export interface LlmCompletionResponse {
|
|
readonly content: string | null;
|
|
readonly toolCalls: readonly ToolCallRequest[];
|
|
readonly finishReason: LlmFinishReason;
|
|
/** Token counts for this one call — propagated from the provider
|
|
* response when available. Summed across rounds in PlannerLoopResult. */
|
|
readonly usage?: TokenUsage;
|
|
}
|
|
|
|
export interface LlmClient {
|
|
complete(req: LlmCompletionRequest): Promise<LlmCompletionResponse>;
|
|
}
|
|
|
|
// ─── Loop input / result ────────────────────────────────────────────
|
|
|
|
/**
|
|
* Transient loop state surfaced to the reminderChannel. The reminder
|
|
* callback is pure — it reads this snapshot and returns hints; it does
|
|
* not mutate anything.
|
|
*/
|
|
export interface LoopState {
|
|
/** 1-based round index for the CURRENT LLM call (before it runs). */
|
|
readonly round: number;
|
|
/** Number of tool calls executed across all prior rounds. */
|
|
readonly toolCallCount: number;
|
|
/** Accumulated tokens reported by the provider, up to (but not
|
|
* including) the current round's call. Zero when the provider
|
|
* hasn't reported usage. */
|
|
readonly usage: TokenUsage;
|
|
/** The most recent ExecutedCall, or undefined in round 1. Handy for
|
|
* "the last tool failed — warn the LLM" producers. */
|
|
readonly lastCall?: ExecutedCall;
|
|
}
|
|
|
|
/**
|
|
* Callback that yields transient system-message strings to attach to the
|
|
* NEXT LLM request only. Returned strings are wrapped in `<reminder>…
|
|
* </reminder>` tags and injected as system messages AFTER the persistent
|
|
* `messages` history. They are NEVER written back to `messages[]` and
|
|
* therefore NEVER appear in `PlannerLoopResult.messages`.
|
|
*
|
|
* This is the Claude-Code `<system-reminder>` pattern: steering the model
|
|
* per-turn without polluting the persisted conversation log or
|
|
* invalidating the provider's KV-cache on stable prefixes.
|
|
*/
|
|
export type ReminderChannel = (state: LoopState) => readonly string[];
|
|
|
|
export interface PlannerLoopInput {
|
|
readonly systemPrompt: string;
|
|
readonly userPrompt: string;
|
|
/** Optional prior conversation turns inserted between the system
|
|
* prompt and the new user turn. Used by the companion chat to
|
|
* preserve multi-turn history; missions leave this empty. */
|
|
readonly priorMessages?: readonly ChatMessage[];
|
|
readonly tools: readonly ToolSchema[];
|
|
readonly model: string;
|
|
readonly temperature?: number;
|
|
/** Hard ceiling on planner rounds. Each round = one LLM call plus
|
|
* whatever tool executions its output triggered. Defaults to 5. */
|
|
readonly maxRounds?: number;
|
|
/** Optional per-round reminder producer — see ReminderChannel docs. */
|
|
readonly reminderChannel?: ReminderChannel;
|
|
/**
|
|
* Predicate that decides whether a tool is safe to execute in parallel
|
|
* with other tools of the same stripe. Claude-Code `gW5` pattern: when
|
|
* every tool_call in a round is parallel-safe, they run via Promise.all
|
|
* in batches of 10; if any call is NOT parallel-safe, the whole batch
|
|
* falls back to sequential (preserves ordering invariants for
|
|
* write-after-read chains).
|
|
*
|
|
* Default: `() => false` → fully sequential, matching pre-M1 behaviour.
|
|
*
|
|
* The predicate is called once per tool_call per round, so cheap
|
|
* constant-time lookups are expected (registry hit, name-prefix check).
|
|
*/
|
|
readonly isParallelSafe?: (toolName: string) => boolean;
|
|
}
|
|
|
|
/** Max concurrent tool executions per round. Mirrors Claude Code's gW5
|
|
* ceiling. Keeps tail latency bounded when the LLM requests many reads
|
|
* at once and protects downstream services from unbounded fan-out. */
|
|
export const PARALLEL_TOOL_BATCH_SIZE = 10;
|
|
|
|
export interface ExecutedCall {
|
|
readonly round: number;
|
|
readonly call: ToolCallRequest;
|
|
readonly result: ToolResult;
|
|
}
|
|
|
|
export type LoopStopReason = 'assistant-stop' | 'max-rounds' | 'no-tool-calls' | 'llm-error';
|
|
|
|
export interface PlannerLoopResult {
|
|
readonly rounds: number;
|
|
readonly executedCalls: readonly ExecutedCall[];
|
|
/** Final assistant text when the LLM stopped instead of calling a
|
|
* tool. ``null`` when the last turn was a tool-call burst that we
|
|
* cut off via round budget. */
|
|
readonly summary: string | null;
|
|
readonly stopReason: LoopStopReason;
|
|
/** Complete chat history for debug-log capture (system + user +
|
|
* every assistant/tool turn). Never synced — contains decrypted
|
|
* user content. */
|
|
readonly messages: readonly ChatMessage[];
|
|
/** Accumulated token usage across every LLM round. Zero counts when
|
|
* the provider didn't report usage. Consumers use this for budget
|
|
* tracking (mana-ai's per-agent daily limit) and cost telemetry. */
|
|
readonly usage: TokenUsage;
|
|
}
|
|
|
|
// ─── The loop ───────────────────────────────────────────────────────
|
|
|
|
const DEFAULT_MAX_ROUNDS = 5;
|
|
|
|
export async function runPlannerLoop(opts: {
|
|
readonly llm: LlmClient;
|
|
readonly input: PlannerLoopInput;
|
|
/** Execute a tool call and return the result that should be fed back
|
|
* to the LLM as a tool-message. Must not throw — convert errors to
|
|
* ``{ success: false, message }``. The loop injects the result
|
|
* verbatim so the LLM can reason over failures (e.g. "vault locked
|
|
* → ask user to unlock"). */
|
|
readonly onToolCall: (call: ToolCallRequest) => Promise<ToolResult>;
|
|
}): Promise<PlannerLoopResult> {
|
|
const { llm, input, onToolCall } = opts;
|
|
const maxRounds = input.maxRounds ?? DEFAULT_MAX_ROUNDS;
|
|
const toolSpecs = toolsToFunctionSchemas(input.tools);
|
|
|
|
const messages: ChatMessage[] = [
|
|
{ role: 'system', content: input.systemPrompt },
|
|
...(input.priorMessages ?? []),
|
|
{ role: 'user', content: input.userPrompt },
|
|
];
|
|
const executedCalls: ExecutedCall[] = [];
|
|
let summary: string | null = null;
|
|
let stopReason: LoopStopReason = 'max-rounds';
|
|
let rounds = 0;
|
|
let promptTokens = 0;
|
|
let completionTokens = 0;
|
|
|
|
while (rounds < maxRounds) {
|
|
rounds++;
|
|
|
|
// Per-round reminder injection: ask the channel for transient
|
|
// hints, wrap each in <reminder> tags, and prepend them as system
|
|
// messages to THIS request only. Nothing gets pushed to `messages`
|
|
// — the reminders are ephemeral steering, not conversation.
|
|
let requestMessages: readonly ChatMessage[] = messages;
|
|
if (input.reminderChannel) {
|
|
const state: LoopState = {
|
|
round: rounds,
|
|
toolCallCount: executedCalls.length,
|
|
usage: {
|
|
promptTokens,
|
|
completionTokens,
|
|
totalTokens: promptTokens + completionTokens,
|
|
},
|
|
lastCall: executedCalls[executedCalls.length - 1],
|
|
};
|
|
const reminders = input.reminderChannel(state);
|
|
if (reminders.length > 0) {
|
|
const reminderMessages: ChatMessage[] = reminders.map((text) => ({
|
|
role: 'system',
|
|
content: `<reminder>${text}</reminder>`,
|
|
}));
|
|
requestMessages = [...messages, ...reminderMessages];
|
|
}
|
|
}
|
|
|
|
const response = await llm.complete({
|
|
messages: requestMessages,
|
|
tools: toolSpecs,
|
|
model: input.model,
|
|
temperature: input.temperature,
|
|
});
|
|
|
|
if (response.usage) {
|
|
promptTokens += response.usage.promptTokens;
|
|
completionTokens += response.usage.completionTokens;
|
|
}
|
|
|
|
// Append the assistant turn to history before we execute any
|
|
// tools — the LLM needs to see its own prior tool_calls alongside
|
|
// the tool-message results in the next turn.
|
|
messages.push({
|
|
role: 'assistant',
|
|
content: response.content,
|
|
toolCalls: response.toolCalls.length > 0 ? response.toolCalls : undefined,
|
|
});
|
|
|
|
if (response.toolCalls.length === 0) {
|
|
summary = response.content;
|
|
stopReason = response.finishReason === 'stop' ? 'assistant-stop' : 'no-tool-calls';
|
|
break;
|
|
}
|
|
|
|
// Tool execution.
|
|
//
|
|
// Sequential by default. When the caller supplies `isParallelSafe`
|
|
// and EVERY call in this round passes it, we dispatch in batches
|
|
// of PARALLEL_TOOL_BATCH_SIZE via Promise.all. A single unsafe
|
|
// call in the batch downgrades the whole round to sequential —
|
|
// this preserves semantics for write-after-read chains without
|
|
// pushing the decision onto the model.
|
|
//
|
|
// In both modes we append to `messages` in the LLM's original
|
|
// call order, not completion order, so the debug-log stays linear.
|
|
const calls = response.toolCalls;
|
|
const allParallelSafe =
|
|
!!input.isParallelSafe &&
|
|
calls.length > 1 &&
|
|
calls.every((c) => input.isParallelSafe!(c.name));
|
|
|
|
if (allParallelSafe) {
|
|
for (let i = 0; i < calls.length; i += PARALLEL_TOOL_BATCH_SIZE) {
|
|
const batch = calls.slice(i, i + PARALLEL_TOOL_BATCH_SIZE);
|
|
const results = await Promise.all(batch.map((call) => onToolCall(call)));
|
|
for (let j = 0; j < batch.length; j++) {
|
|
const call = batch[j];
|
|
const result = results[j];
|
|
executedCalls.push({ round: rounds, call, result });
|
|
messages.push({
|
|
role: 'tool',
|
|
toolCallId: call.id,
|
|
content: JSON.stringify({
|
|
success: result.success,
|
|
message: result.message,
|
|
...(result.data !== undefined ? { data: result.data } : {}),
|
|
}),
|
|
});
|
|
}
|
|
}
|
|
} else {
|
|
for (const call of calls) {
|
|
const result = await onToolCall(call);
|
|
executedCalls.push({ round: rounds, call, result });
|
|
messages.push({
|
|
role: 'tool',
|
|
toolCallId: call.id,
|
|
content: JSON.stringify({
|
|
success: result.success,
|
|
message: result.message,
|
|
...(result.data !== undefined ? { data: result.data } : {}),
|
|
}),
|
|
});
|
|
}
|
|
}
|
|
|
|
// If the round limit is about to hit, surface it as the reason —
|
|
// the outer consumer can mark the iteration as incomplete.
|
|
if (rounds >= maxRounds) {
|
|
stopReason = 'max-rounds';
|
|
break;
|
|
}
|
|
}
|
|
|
|
return {
|
|
rounds,
|
|
executedCalls,
|
|
summary,
|
|
stopReason,
|
|
messages,
|
|
usage: {
|
|
promptTokens,
|
|
completionTokens,
|
|
totalTokens: promptTokens + completionTokens,
|
|
},
|
|
};
|
|
}
|