From 66bfcb3996f1682b98af14de177fe5cf7771ca4b Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Thu, 23 Apr 2026 19:14:36 +0200
Subject: [PATCH] feat(webapp): wire `task` tool into Companion chat + Mission
 runner (M3.3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the M3 sub-agent loop. Both webapp consumers of runPlannerLoop
now expose the `task` tool to their planner LLM and route matching
calls to a session-bound sub-agent handler.

Pattern (identical in both files):

  1. Hoist the regular tool dispatcher into a local `dispatchTool`
     so both the main loop AND the sub-agent executor can share it.
     The parent's guardrail, executor, actor attribution, and
     domain-event emission happen exactly once — sub-agent tool
     calls route through the same function.

  2. Build a per-session taskHandler via createTaskToolHandler()
     with parentDepth=0 (sub-agents themselves refuse to recurse)
     and model=google/gemini-2.5-flash-lite (cheap tier —
     sub-agents are summarisation-heavy, no reason to burn primary
     budget on them).

  3. toolsWithTask = [...regular tools, TASK_TOOL_SCHEMA].

  4. onToolCall branches on `call.name === TASK_TOOL_NAME` →
     taskHandler.handle; else dispatchTool. Both return
     ToolResult, loop doesn't care which route was taken.

Companion:
  - parentTools = AI_TOOL_CATALOG (full catalog)
  - Token tracking via taskHandler.cumulativeUsage() available if
    we later want to attribute sub-agent tokens to a companion-
    session counter

Mission runner:
  - parentTools = availableTools (agent-policy-filtered)
  - Sub-agent inherits the same filter — a research sub-agent in a
    mission that already had policy:deny on `list_events` still
    can't see `list_events`, defense-in-depth
  - runToolCall still gets aiActor → sub-agent tool executions are
    attributed to the same mission/iteration as the parent

mana-ai deliberately NOT wired: its onToolCall is a no-op recorder
(plans get staged, executed client-side on sync). Sub-agents there
would produce no value since the sub-agent couldn't execute tools
either, just plan. When the tool-registry fully absorbs AI_TOOL_CATALOG
(Personas-plan M4), mana-ai will get sub-agent support in that same
migration.

No new tests — shared-ai's 107 tests cover the primitive + handler
exhaustively. Existing 31 companion+mission tests remain green;
svelte-check clean across 7427 files.

Completes M3. runPlannerLoop now has Claude-Code's four big patterns:
policy-gate (M1) / reminder-channel (M1) / parallel-reads (M1) /
compactor (M2) / sub-agents (M3).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../web/src/lib/data/ai/missions/runner.ts    | 77 +++++++++++++------
 .../web/src/lib/modules/companion/engine.ts   | 66 ++++++++++++----
 2 files changed, 102 insertions(+), 41 deletions(-)
diff --git a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
index 72c0c7d4e..c54205ac7 100644
--- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
@@ -43,9 +43,12 @@ import {
 	AI_TOOL_CATALOG_BY_NAME,
 	buildSystemPrompt,
 	compactHistory,
+	createTaskToolHandler,
 	runPlannerLoop,
 	runPrePlanGuardrails,
 	runPreExecuteGuardrails,
+	TASK_TOOL_NAME,
+	TASK_TOOL_SCHEMA,
 	type ChatMessage,
 	type LlmClient,
 	type ResolvedInput,
@@ -266,12 +269,54 @@ async function runMissionInner(
 			agentMemory: owningAgent?.memory ?? null,
 		});
 
+		// Regular tool dispatcher — shared between the planner loop
+		// and any sub-agent spawned via the `task` tool, so both
+		// routes go through the same guardrail + executor + actor
+		// attribution path.
+		const dispatchTool = async (call: ToolCallRequest): Promise<ToolResult> => {
+			await checkCancel();
+			await enterPhase('staging-proposals', call.name);
+
+			const execCheck = runPreExecuteGuardrails({
+				summary: call.name,
+				toolName: call.name,
+				params: call.arguments,
+				rationale: mission!.objective,
+			});
+			if (!execCheck.passed) {
+				return { success: false, message: `Guardrail: ${execCheck.blockReason}` };
+			}
+
+			try {
+				return await runToolCall(call.name, call.arguments, aiActor);
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				console.error(`[MissionRunner] tool ${call.name} threw:`, err);
+				return { success: false, message: `Tool execution failed: ${msg}` };
+			}
+		};
+
+		// `task` tool handler: the mission planner can spawn a
+		// research/plan/general sub-agent, which returns one string
+		// summary. parentDepth=0 here; the sub-agent itself refuses
+		// to launch another one. Cheap-tier model since sub-agents
+		// are summarisation-heavy by construction.
+		const taskHandler = createTaskToolHandler({
+			llm: deps.llm,
+			model: 'google/gemini-2.5-flash-lite',
+			parentDepth: 0,
+			parentTools: availableTools,
+			parentOnToolCall: dispatchTool,
+		});
+
+		const toolsWithTask = [...availableTools, TASK_TOOL_SCHEMA];
+
 		const loopResult = await runPlannerLoop({
 			llm: deps.llm,
 			input: {
 				systemPrompt,
 				userPrompt,
-				tools: availableTools,
+				tools: toolsWithTask,
 				model: deps.model ?? 'google/gemini-2.5-flash',
 				maxRounds: MAX_PLANNER_ROUNDS,
 				// Fan-out read tools when the planner requests several in
@@ -295,31 +340,13 @@ async function runMissionInner(
 				},
 			},
 			onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
-				await checkCancel();
-				await enterPhase('staging-proposals', call.name);
-
-				// Pre-execute guardrail per call. Failures come back as
-				// tool-messages so the LLM can choose a different path.
-				const execCheck = runPreExecuteGuardrails({
-					summary: call.name,
-					toolName: call.name,
-					params: call.arguments,
-					rationale: mission!.objective,
-				});
-				if (!execCheck.passed) {
-					return {
-						success: false,
-						message: `Guardrail: ${execCheck.blockReason}`,
-					};
-				}
-
-				try {
-					return await runToolCall(call.name, call.arguments, aiActor);
-				} catch (err) {
-					const msg = err instanceof Error ? err.message : String(err);
-					console.error(`[MissionRunner] tool ${call.name} threw:`, err);
-					return { success: false, message: `Tool execution failed: ${msg}` };
+				// Route `task` calls to the sub-agent handler; everything
+				// else goes through the regular dispatcher (guardrail +
+				// executor + actor attribution).
+				if (call.name === TASK_TOOL_NAME) {
+					return taskHandler.handle(call);
 				}
+				return dispatchTool(call);
 			},
 		});
 
diff --git a/apps/mana/apps/web/src/lib/modules/companion/engine.ts b/apps/mana/apps/web/src/lib/modules/companion/engine.ts
index a3693914d..e87517e63 100644
--- a/apps/mana/apps/web/src/lib/modules/companion/engine.ts
+++ b/apps/mana/apps/web/src/lib/modules/companion/engine.ts
@@ -16,6 +16,9 @@ import {
 	AI_TOOL_CATALOG,
 	AI_TOOL_CATALOG_BY_NAME,
 	compactHistory,
+	createTaskToolHandler,
+	TASK_TOOL_NAME,
+	TASK_TOOL_SCHEMA,
 	type ChatMessage,
 	type ToolCallRequest,
 	type ToolResult,
@@ -106,6 +109,44 @@ export async function runCompanionChat(
 	const priorMessages = historyToChatMessages(history);
 	const toolCalls: EngineResult['toolCalls'] = [];
 
+	// The parent tool catalog the sub-agent may filter down from.
+	// TASK_TOOL_SCHEMA itself is NOT in parentTools — a sub-agent can't
+	// launch a nested sub-agent by construction (recursion guard).
+	const toolsWithTask = [...AI_TOOL_CATALOG, TASK_TOOL_SCHEMA];
+
+	// Local dispatcher the planner's loop invokes and — via the task
+	// handler's parentOnToolCall — also any sub-agent. Hoisted so the
+	// task handler can close over it before the loop sets the branch.
+	const dispatchTool = async (call: ToolCallRequest): Promise<ToolResult> => {
+		const startedAt = Date.now();
+		const toolResult = await executeTool(call.name, call.arguments);
+		const latencyMs = Date.now() - startedAt;
+
+		const toolDef = getTool(call.name);
+		emitDomainEvent('CompanionToolCalled', 'companion', 'tools', call.name, {
+			tool: call.name,
+			module: toolDef?.module ?? 'unknown',
+			success: toolResult.success,
+			latencyMs,
+			errorMessage: toolResult.success ? undefined : toolResult.message,
+		});
+
+		toolCalls.push({ name: call.name, params: call.arguments, result: toolResult });
+		return toolResult;
+	};
+
+	// Sub-agent handler bound to this session. parentDepth = 0 means
+	// the sub-agent itself will refuse to launch another one (recursion
+	// guard inside runSubAgent). Model is the cheap tier — sub-agents
+	// are summarisation-heavy so flash-lite is the right default.
+	const taskHandler = createTaskToolHandler({
+		llm,
+		model: 'google/gemini-2.5-flash-lite',
+		parentDepth: 0,
+		parentTools: AI_TOOL_CATALOG,
+		parentOnToolCall: dispatchTool,
+	});
+
 	try {
 		const result = await runPlannerLoop({
 			llm,
@@ -113,7 +154,7 @@ export async function runCompanionChat(
 				systemPrompt,
 				userPrompt: userMessage,
 				priorMessages,
-				tools: AI_TOOL_CATALOG,
+				tools: toolsWithTask,
 				model: 'google/gemini-2.5-flash',
 				maxRounds: MAX_TOOL_ROUNDS,
 				temperature: 0.7,
@@ -136,21 +177,14 @@ export async function runCompanionChat(
 				},
 			},
 			onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
-				const startedAt = Date.now();
-				const toolResult = await executeTool(call.name, call.arguments);
-				const latencyMs = Date.now() - startedAt;
-
-				const toolDef = getTool(call.name);
-				emitDomainEvent('CompanionToolCalled', 'companion', 'tools', call.name, {
-					tool: call.name,
-					module: toolDef?.module ?? 'unknown',
-					success: toolResult.success,
-					latencyMs,
-					errorMessage: toolResult.success ? undefined : toolResult.message,
-				});
-
-				toolCalls.push({ name: call.name, params: call.arguments, result: toolResult });
-				return toolResult;
+				// Route `task` calls into the sub-agent handler. Everything
+				// else goes through the regular executor.
+				if (call.name === TASK_TOOL_NAME) {
+					const result = await taskHandler.handle(call);
+					toolCalls.push({ name: call.name, params: call.arguments, result });
+					return result;
+				}
+				return dispatchTool(call);
 			},
 		});