From 66bfcb3996f1682b98af14de177fe5cf7771ca4b Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 23 Apr 2026 19:14:36 +0200 Subject: [PATCH] feat(webapp): wire `task` tool into Companion chat + Mission runner (M3.3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the M3 sub-agent loop. Both webapp consumers of runPlannerLoop now expose the `task` tool to their planner LLM and route matching calls to a session-bound sub-agent handler. Pattern (identical in both files): 1. Hoist the regular tool dispatcher into a local `dispatchTool` so both the main loop AND the sub-agent executor can share it. The parent's guardrail, executor, actor attribution, and domain-event emission happen exactly once — sub-agent tool calls route through the same function. 2. Build a per-session taskHandler via createTaskToolHandler() with parentDepth=0 (sub-agents themselves refuse to recurse) and model=google/gemini-2.5-flash-lite (cheap tier — sub-agents are summarisation-heavy, no reason to burn primary budget on them). 3. toolsWithTask = [...regular tools, TASK_TOOL_SCHEMA]. 4. onToolCall branches on `call.name === TASK_TOOL_NAME` → taskHandler.handle; else dispatchTool. Both return ToolResult, loop doesn't care which route was taken. Companion: - parentTools = AI_TOOL_CATALOG (full catalog) - Token tracking via taskHandler.cumulativeUsage() available if we later want to attribute sub-agent tokens to a companion- session counter Mission runner: - parentTools = availableTools (agent-policy-filtered) - Sub-agent inherits the same filter — a research sub-agent in a mission that already had policy:deny on `list_events` still can't see `list_events`, defense-in-depth - runToolCall still gets aiActor → sub-agent tool executions are attributed to the same mission/iteration as the parent mana-ai deliberately NOT wired: its onToolCall is a no-op recorder (plans get staged, executed client-side on sync). Sub-agents there would produce no value since the sub-agent couldn't execute tools either, just plan. When the tool-registry fully absorbs AI_TOOL_CATALOG (Personas-plan M4), mana-ai will get sub-agent support in that same migration. No new tests — shared-ai's 107 tests cover the primitive + handler exhaustively. Existing 31 companion+mission tests remain green; svelte-check clean across 7427 files. Completes M3. runPlannerLoop now has Claude-Code's four big patterns: policy-gate (M1) / reminder-channel (M1) / parallel-reads (M1) / compactor (M2) / sub-agents (M3). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../web/src/lib/data/ai/missions/runner.ts | 77 +++++++++++++------ .../web/src/lib/modules/companion/engine.ts | 66 ++++++++++++---- 2 files changed, 102 insertions(+), 41 deletions(-) diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts index 72c0c7d4e..c54205ac7 100644 --- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts +++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts @@ -43,9 +43,12 @@ import { AI_TOOL_CATALOG_BY_NAME, buildSystemPrompt, compactHistory, + createTaskToolHandler, runPlannerLoop, runPrePlanGuardrails, runPreExecuteGuardrails, + TASK_TOOL_NAME, + TASK_TOOL_SCHEMA, type ChatMessage, type LlmClient, type ResolvedInput, @@ -266,12 +269,54 @@ async function runMissionInner( agentMemory: owningAgent?.memory ?? null, }); + // Regular tool dispatcher — shared between the planner loop + // and any sub-agent spawned via the `task` tool, so both + // routes go through the same guardrail + executor + actor + // attribution path. + const dispatchTool = async (call: ToolCallRequest): Promise => { + await checkCancel(); + await enterPhase('staging-proposals', call.name); + + const execCheck = runPreExecuteGuardrails({ + summary: call.name, + toolName: call.name, + params: call.arguments, + rationale: mission!.objective, + }); + if (!execCheck.passed) { + return { success: false, message: `Guardrail: ${execCheck.blockReason}` }; + } + + try { + return await runToolCall(call.name, call.arguments, aiActor); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.error(`[MissionRunner] tool ${call.name} threw:`, err); + return { success: false, message: `Tool execution failed: ${msg}` }; + } + }; + + // `task` tool handler: the mission planner can spawn a + // research/plan/general sub-agent, which returns one string + // summary. parentDepth=0 here; the sub-agent itself refuses + // to launch another one. Cheap-tier model since sub-agents + // are summarisation-heavy by construction. + const taskHandler = createTaskToolHandler({ + llm: deps.llm, + model: 'google/gemini-2.5-flash-lite', + parentDepth: 0, + parentTools: availableTools, + parentOnToolCall: dispatchTool, + }); + + const toolsWithTask = [...availableTools, TASK_TOOL_SCHEMA]; + const loopResult = await runPlannerLoop({ llm: deps.llm, input: { systemPrompt, userPrompt, - tools: availableTools, + tools: toolsWithTask, model: deps.model ?? 'google/gemini-2.5-flash', maxRounds: MAX_PLANNER_ROUNDS, // Fan-out read tools when the planner requests several in @@ -295,31 +340,13 @@ async function runMissionInner( }, }, onToolCall: async (call: ToolCallRequest): Promise => { - await checkCancel(); - await enterPhase('staging-proposals', call.name); - - // Pre-execute guardrail per call. Failures come back as - // tool-messages so the LLM can choose a different path. - const execCheck = runPreExecuteGuardrails({ - summary: call.name, - toolName: call.name, - params: call.arguments, - rationale: mission!.objective, - }); - if (!execCheck.passed) { - return { - success: false, - message: `Guardrail: ${execCheck.blockReason}`, - }; - } - - try { - return await runToolCall(call.name, call.arguments, aiActor); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - console.error(`[MissionRunner] tool ${call.name} threw:`, err); - return { success: false, message: `Tool execution failed: ${msg}` }; + // Route `task` calls to the sub-agent handler; everything + // else goes through the regular dispatcher (guardrail + + // executor + actor attribution). + if (call.name === TASK_TOOL_NAME) { + return taskHandler.handle(call); } + return dispatchTool(call); }, }); diff --git a/apps/mana/apps/web/src/lib/modules/companion/engine.ts b/apps/mana/apps/web/src/lib/modules/companion/engine.ts index a3693914d..e87517e63 100644 --- a/apps/mana/apps/web/src/lib/modules/companion/engine.ts +++ b/apps/mana/apps/web/src/lib/modules/companion/engine.ts @@ -16,6 +16,9 @@ import { AI_TOOL_CATALOG, AI_TOOL_CATALOG_BY_NAME, compactHistory, + createTaskToolHandler, + TASK_TOOL_NAME, + TASK_TOOL_SCHEMA, type ChatMessage, type ToolCallRequest, type ToolResult, @@ -106,6 +109,44 @@ export async function runCompanionChat( const priorMessages = historyToChatMessages(history); const toolCalls: EngineResult['toolCalls'] = []; + // The parent tool catalog the sub-agent may filter down from. + // TASK_TOOL_SCHEMA itself is NOT in parentTools — a sub-agent can't + // launch a nested sub-agent by construction (recursion guard). + const toolsWithTask = [...AI_TOOL_CATALOG, TASK_TOOL_SCHEMA]; + + // Local dispatcher the planner's loop invokes and — via the task + // handler's parentOnToolCall — also any sub-agent. Hoisted so the + // task handler can close over it before the loop sets the branch. + const dispatchTool = async (call: ToolCallRequest): Promise => { + const startedAt = Date.now(); + const toolResult = await executeTool(call.name, call.arguments); + const latencyMs = Date.now() - startedAt; + + const toolDef = getTool(call.name); + emitDomainEvent('CompanionToolCalled', 'companion', 'tools', call.name, { + tool: call.name, + module: toolDef?.module ?? 'unknown', + success: toolResult.success, + latencyMs, + errorMessage: toolResult.success ? undefined : toolResult.message, + }); + + toolCalls.push({ name: call.name, params: call.arguments, result: toolResult }); + return toolResult; + }; + + // Sub-agent handler bound to this session. parentDepth = 0 means + // the sub-agent itself will refuse to launch another one (recursion + // guard inside runSubAgent). Model is the cheap tier — sub-agents + // are summarisation-heavy so flash-lite is the right default. + const taskHandler = createTaskToolHandler({ + llm, + model: 'google/gemini-2.5-flash-lite', + parentDepth: 0, + parentTools: AI_TOOL_CATALOG, + parentOnToolCall: dispatchTool, + }); + try { const result = await runPlannerLoop({ llm, @@ -113,7 +154,7 @@ export async function runCompanionChat( systemPrompt, userPrompt: userMessage, priorMessages, - tools: AI_TOOL_CATALOG, + tools: toolsWithTask, model: 'google/gemini-2.5-flash', maxRounds: MAX_TOOL_ROUNDS, temperature: 0.7, @@ -136,21 +177,14 @@ export async function runCompanionChat( }, }, onToolCall: async (call: ToolCallRequest): Promise => { - const startedAt = Date.now(); - const toolResult = await executeTool(call.name, call.arguments); - const latencyMs = Date.now() - startedAt; - - const toolDef = getTool(call.name); - emitDomainEvent('CompanionToolCalled', 'companion', 'tools', call.name, { - tool: call.name, - module: toolDef?.module ?? 'unknown', - success: toolResult.success, - latencyMs, - errorMessage: toolResult.success ? undefined : toolResult.message, - }); - - toolCalls.push({ name: call.name, params: call.arguments, result: toolResult }); - return toolResult; + // Route `task` calls into the sub-agent handler. Everything + // else goes through the regular executor. + if (call.name === TASK_TOOL_NAME) { + const result = await taskHandler.handle(call); + toolCalls.push({ name: call.name, params: call.arguments, result }); + return result; + } + return dispatchTool(call); }, });