/** * Multi-turn tool-calling loop shared between the webapp runner and the * server-side mana-ai tick. Replaces the text-JSON planner pipeline: * we hand the LLM a tool catalog, it emits native tool_calls, we * execute them and feed the results back as tool-messages until the * LLM has nothing more to call (or we hit the round budget). * * Environment-specific concerns (HTTP transport, auth, actor * attribution) live in the caller-provided ``LlmClient`` and * ``onToolCall`` callback. The loop itself stays pure. */ import type { ToolSchema } from '../tools/schemas'; import type { ToolSpec } from '../tools/function-schema'; import { toolsToFunctionSchemas } from '../tools/function-schema'; // ─── Chat-message contract ────────────────────────────────────────── export interface ToolCallRequest { readonly id: string; readonly name: string; readonly arguments: Record; } export interface ToolResult { readonly success: boolean; readonly data?: unknown; readonly message: string; } export type ChatRole = 'system' | 'user' | 'assistant' | 'tool'; export interface ChatMessage { readonly role: ChatRole; readonly content?: string | null; readonly toolCalls?: readonly ToolCallRequest[]; readonly toolCallId?: string; } // ─── LLM client contract ──────────────────────────────────────────── export interface LlmCompletionRequest { readonly messages: readonly ChatMessage[]; readonly tools: readonly ToolSpec[]; readonly model: string; readonly temperature?: number; } export type LlmFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter'; export interface TokenUsage { readonly promptTokens: number; readonly completionTokens: number; readonly totalTokens: number; } export interface LlmCompletionResponse { readonly content: string | null; readonly toolCalls: readonly ToolCallRequest[]; readonly finishReason: LlmFinishReason; /** Token counts for this one call — propagated from the provider * response when available. Summed across rounds in PlannerLoopResult. */ readonly usage?: TokenUsage; } export interface LlmClient { complete(req: LlmCompletionRequest): Promise; } // ─── Loop input / result ──────────────────────────────────────────── /** * Transient loop state surfaced to the reminderChannel. The reminder * callback is pure — it reads this snapshot and returns hints; it does * not mutate anything. */ export interface LoopState { /** 1-based round index for the CURRENT LLM call (before it runs). */ readonly round: number; /** Number of tool calls executed across all prior rounds. */ readonly toolCallCount: number; /** Accumulated tokens reported by the provider, up to (but not * including) the current round's call. Zero when the provider * hasn't reported usage. */ readonly usage: TokenUsage; /** The most recent ExecutedCall, or undefined in round 1. Handy for * "the last tool failed — warn the LLM" producers. */ readonly lastCall?: ExecutedCall; } /** * Callback that yields transient system-message strings to attach to the * NEXT LLM request only. Returned strings are wrapped in `… * ` tags and injected as system messages AFTER the persistent * `messages` history. They are NEVER written back to `messages[]` and * therefore NEVER appear in `PlannerLoopResult.messages`. * * This is the Claude-Code `` pattern: steering the model * per-turn without polluting the persisted conversation log or * invalidating the provider's KV-cache on stable prefixes. */ export type ReminderChannel = (state: LoopState) => readonly string[]; export interface PlannerLoopInput { readonly systemPrompt: string; readonly userPrompt: string; /** Optional prior conversation turns inserted between the system * prompt and the new user turn. Used by the companion chat to * preserve multi-turn history; missions leave this empty. */ readonly priorMessages?: readonly ChatMessage[]; readonly tools: readonly ToolSchema[]; readonly model: string; readonly temperature?: number; /** Hard ceiling on planner rounds. Each round = one LLM call plus * whatever tool executions its output triggered. Defaults to 5. */ readonly maxRounds?: number; /** Optional per-round reminder producer — see ReminderChannel docs. */ readonly reminderChannel?: ReminderChannel; /** * Predicate that decides whether a tool is safe to execute in parallel * with other tools of the same stripe. Claude-Code `gW5` pattern: when * every tool_call in a round is parallel-safe, they run via Promise.all * in batches of 10; if any call is NOT parallel-safe, the whole batch * falls back to sequential (preserves ordering invariants for * write-after-read chains). * * Default: `() => false` → fully sequential, matching pre-M1 behaviour. * * The predicate is called once per tool_call per round, so cheap * constant-time lookups are expected (registry hit, name-prefix check). */ readonly isParallelSafe?: (toolName: string) => boolean; } /** Max concurrent tool executions per round. Mirrors Claude Code's gW5 * ceiling. Keeps tail latency bounded when the LLM requests many reads * at once and protects downstream services from unbounded fan-out. */ export const PARALLEL_TOOL_BATCH_SIZE = 10; export interface ExecutedCall { readonly round: number; readonly call: ToolCallRequest; readonly result: ToolResult; } export type LoopStopReason = 'assistant-stop' | 'max-rounds' | 'no-tool-calls' | 'llm-error'; export interface PlannerLoopResult { readonly rounds: number; readonly executedCalls: readonly ExecutedCall[]; /** Final assistant text when the LLM stopped instead of calling a * tool. ``null`` when the last turn was a tool-call burst that we * cut off via round budget. */ readonly summary: string | null; readonly stopReason: LoopStopReason; /** Complete chat history for debug-log capture (system + user + * every assistant/tool turn). Never synced — contains decrypted * user content. */ readonly messages: readonly ChatMessage[]; /** Accumulated token usage across every LLM round. Zero counts when * the provider didn't report usage. Consumers use this for budget * tracking (mana-ai's per-agent daily limit) and cost telemetry. */ readonly usage: TokenUsage; } // ─── The loop ─────────────────────────────────────────────────────── const DEFAULT_MAX_ROUNDS = 5; export async function runPlannerLoop(opts: { readonly llm: LlmClient; readonly input: PlannerLoopInput; /** Execute a tool call and return the result that should be fed back * to the LLM as a tool-message. Must not throw — convert errors to * ``{ success: false, message }``. The loop injects the result * verbatim so the LLM can reason over failures (e.g. "vault locked * → ask user to unlock"). */ readonly onToolCall: (call: ToolCallRequest) => Promise; }): Promise { const { llm, input, onToolCall } = opts; const maxRounds = input.maxRounds ?? DEFAULT_MAX_ROUNDS; const toolSpecs = toolsToFunctionSchemas(input.tools); const messages: ChatMessage[] = [ { role: 'system', content: input.systemPrompt }, ...(input.priorMessages ?? []), { role: 'user', content: input.userPrompt }, ]; const executedCalls: ExecutedCall[] = []; let summary: string | null = null; let stopReason: LoopStopReason = 'max-rounds'; let rounds = 0; let promptTokens = 0; let completionTokens = 0; while (rounds < maxRounds) { rounds++; // Per-round reminder injection: ask the channel for transient // hints, wrap each in tags, and prepend them as system // messages to THIS request only. Nothing gets pushed to `messages` // — the reminders are ephemeral steering, not conversation. let requestMessages: readonly ChatMessage[] = messages; if (input.reminderChannel) { const state: LoopState = { round: rounds, toolCallCount: executedCalls.length, usage: { promptTokens, completionTokens, totalTokens: promptTokens + completionTokens, }, lastCall: executedCalls[executedCalls.length - 1], }; const reminders = input.reminderChannel(state); if (reminders.length > 0) { const reminderMessages: ChatMessage[] = reminders.map((text) => ({ role: 'system', content: `${text}`, })); requestMessages = [...messages, ...reminderMessages]; } } const response = await llm.complete({ messages: requestMessages, tools: toolSpecs, model: input.model, temperature: input.temperature, }); if (response.usage) { promptTokens += response.usage.promptTokens; completionTokens += response.usage.completionTokens; } // Append the assistant turn to history before we execute any // tools — the LLM needs to see its own prior tool_calls alongside // the tool-message results in the next turn. messages.push({ role: 'assistant', content: response.content, toolCalls: response.toolCalls.length > 0 ? response.toolCalls : undefined, }); if (response.toolCalls.length === 0) { summary = response.content; stopReason = response.finishReason === 'stop' ? 'assistant-stop' : 'no-tool-calls'; break; } // Tool execution. // // Sequential by default. When the caller supplies `isParallelSafe` // and EVERY call in this round passes it, we dispatch in batches // of PARALLEL_TOOL_BATCH_SIZE via Promise.all. A single unsafe // call in the batch downgrades the whole round to sequential — // this preserves semantics for write-after-read chains without // pushing the decision onto the model. // // In both modes we append to `messages` in the LLM's original // call order, not completion order, so the debug-log stays linear. const calls = response.toolCalls; const allParallelSafe = !!input.isParallelSafe && calls.length > 1 && calls.every((c) => input.isParallelSafe!(c.name)); if (allParallelSafe) { for (let i = 0; i < calls.length; i += PARALLEL_TOOL_BATCH_SIZE) { const batch = calls.slice(i, i + PARALLEL_TOOL_BATCH_SIZE); const results = await Promise.all(batch.map((call) => onToolCall(call))); for (let j = 0; j < batch.length; j++) { const call = batch[j]; const result = results[j]; executedCalls.push({ round: rounds, call, result }); messages.push({ role: 'tool', toolCallId: call.id, content: JSON.stringify({ success: result.success, message: result.message, ...(result.data !== undefined ? { data: result.data } : {}), }), }); } } } else { for (const call of calls) { const result = await onToolCall(call); executedCalls.push({ round: rounds, call, result }); messages.push({ role: 'tool', toolCallId: call.id, content: JSON.stringify({ success: result.success, message: result.message, ...(result.data !== undefined ? { data: result.data } : {}), }), }); } } // If the round limit is about to hit, surface it as the reason — // the outer consumer can mark the iteration as incomplete. if (rounds >= maxRounds) { stopReason = 'max-rounds'; break; } } return { rounds, executedCalls, summary, stopReason, messages, usage: { promptTokens, completionTokens, totalTokens: promptTokens + completionTokens, }, }; }