feat(webapp): wire task tool into Companion chat + Mission runner (M3.3)

Closes the M3 sub-agent loop. Both webapp consumers of runPlannerLoop now expose the `task` tool to their planner LLM and route matching calls to a session-bound sub-agent handler. Pattern (identical in both files): 1. Hoist the regular tool dispatcher into a local `dispatchTool` so both the main loop AND the sub-agent executor can share it. The parent's guardrail, executor, actor attribution, and domain-event emission happen exactly once — sub-agent tool calls route through the same function. 2. Build a per-session taskHandler via createTaskToolHandler() with parentDepth=0 (sub-agents themselves refuse to recurse) and model=google/gemini-2.5-flash-lite (cheap tier — sub-agents are summarisation-heavy, no reason to burn primary budget on them). 3. toolsWithTask = [...regular tools, TASK_TOOL_SCHEMA]. 4. onToolCall branches on `call.name === TASK_TOOL_NAME` → taskHandler.handle; else dispatchTool. Both return ToolResult, loop doesn't care which route was taken. Companion: - parentTools = AI_TOOL_CATALOG (full catalog) - Token tracking via taskHandler.cumulativeUsage() available if we later want to attribute sub-agent tokens to a companion- session counter Mission runner: - parentTools = availableTools (agent-policy-filtered) - Sub-agent inherits the same filter — a research sub-agent in a mission that already had policy:deny on `list_events` still can't see `list_events`, defense-in-depth - runToolCall still gets aiActor → sub-agent tool executions are attributed to the same mission/iteration as the parent mana-ai deliberately NOT wired: its onToolCall is a no-op recorder (plans get staged, executed client-side on sync). Sub-agents there would produce no value since the sub-agent couldn't execute tools either, just plan. When the tool-registry fully absorbs AI_TOOL_CATALOG (Personas-plan M4), mana-ai will get sub-agent support in that same migration. No new tests — shared-ai's 107 tests cover the primitive + handler exhaustively. Existing 31 companion+mission tests remain green; svelte-check clean across 7427 files. Completes M3. runPlannerLoop now has Claude-Code's four big patterns: policy-gate (M1) / reminder-channel (M1) / parallel-reads (M1) / compactor (M2) / sub-agents (M3). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 20:01:09 +02:00 · 2026-04-23 19:14:36 +02:00 · 2026-04-23 19:14:36 +02:00 · 66bfcb3996
commit 66bfcb3996
parent 101af462a8
2 changed files with 102 additions and 41 deletions
--- a/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
+++ b/apps/mana/apps/web/src/lib/data/ai/missions/runner.ts
@ -43,9 +43,12 @@ import {
 	AI_TOOL_CATALOG_BY_NAME,
 	buildSystemPrompt,
 	compactHistory,
+	createTaskToolHandler,
 	runPlannerLoop,
 	runPrePlanGuardrails,
 	runPreExecuteGuardrails,
+	TASK_TOOL_NAME,
+	TASK_TOOL_SCHEMA,
 	type ChatMessage,
 	type LlmClient,
 	type ResolvedInput,
@ -266,12 +269,54 @@ async function runMissionInner(
 			agentMemory: owningAgent?.memory ?? null,
 		});

+		// Regular tool dispatcher — shared between the planner loop
+		// and any sub-agent spawned via the `task` tool, so both
+		// routes go through the same guardrail + executor + actor
+		// attribution path.
+		const dispatchTool = async (call: ToolCallRequest): Promise<ToolResult> => {
+			await checkCancel();
+			await enterPhase('staging-proposals', call.name);
+
+			const execCheck = runPreExecuteGuardrails({
+				summary: call.name,
+				toolName: call.name,
+				params: call.arguments,
+				rationale: mission!.objective,
+			});
+			if (!execCheck.passed) {
+				return { success: false, message: `Guardrail: ${execCheck.blockReason}` };
+			}
+
+			try {
+				return await runToolCall(call.name, call.arguments, aiActor);
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				console.error(`[MissionRunner] tool ${call.name} threw:`, err);
+				return { success: false, message: `Tool execution failed: ${msg}` };
+			}
+		};
+
+		// `task` tool handler: the mission planner can spawn a
+		// research/plan/general sub-agent, which returns one string
+		// summary. parentDepth=0 here; the sub-agent itself refuses
+		// to launch another one. Cheap-tier model since sub-agents
+		// are summarisation-heavy by construction.
+		const taskHandler = createTaskToolHandler({
+			llm: deps.llm,
+			model: 'google/gemini-2.5-flash-lite',
+			parentDepth: 0,
+			parentTools: availableTools,
+			parentOnToolCall: dispatchTool,
+		});
+
+		const toolsWithTask = [...availableTools, TASK_TOOL_SCHEMA];
+
 		const loopResult = await runPlannerLoop({
 			llm: deps.llm,
 			input: {
 				systemPrompt,
 				userPrompt,
-				tools: availableTools,
+				tools: toolsWithTask,
 				model: deps.model ?? 'google/gemini-2.5-flash',
 				maxRounds: MAX_PLANNER_ROUNDS,
 				// Fan-out read tools when the planner requests several in
@ -295,31 +340,13 @@ async function runMissionInner(
 				},
 			},
 			onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
-				await checkCancel();
-				await enterPhase('staging-proposals', call.name);
-
-				// Pre-execute guardrail per call. Failures come back as
-				// tool-messages so the LLM can choose a different path.
-				const execCheck = runPreExecuteGuardrails({
-					summary: call.name,
-					toolName: call.name,
-					params: call.arguments,
-					rationale: mission!.objective,
-				});
-				if (!execCheck.passed) {
-					return {
-						success: false,
-						message: `Guardrail: ${execCheck.blockReason}`,
-					};
-				}
-
-				try {
-					return await runToolCall(call.name, call.arguments, aiActor);
-				} catch (err) {
-					const msg = err instanceof Error ? err.message : String(err);
-					console.error(`[MissionRunner] tool ${call.name} threw:`, err);
-					return { success: false, message: `Tool execution failed: ${msg}` };
+				// Route `task` calls to the sub-agent handler; everything
+				// else goes through the regular dispatcher (guardrail +
+				// executor + actor attribution).
+				if (call.name === TASK_TOOL_NAME) {
+					return taskHandler.handle(call);
 				}
+				return dispatchTool(call);
 			},
 		});

--- a/apps/mana/apps/web/src/lib/modules/companion/engine.ts
+++ b/apps/mana/apps/web/src/lib/modules/companion/engine.ts
@ -16,6 +16,9 @@ import {
 	AI_TOOL_CATALOG,
 	AI_TOOL_CATALOG_BY_NAME,
 	compactHistory,
+	createTaskToolHandler,
+	TASK_TOOL_NAME,
+	TASK_TOOL_SCHEMA,
 	type ChatMessage,
 	type ToolCallRequest,
 	type ToolResult,
@ -106,6 +109,44 @@ export async function runCompanionChat(
 	const priorMessages = historyToChatMessages(history);
 	const toolCalls: EngineResult['toolCalls'] = [];

+	// The parent tool catalog the sub-agent may filter down from.
+	// TASK_TOOL_SCHEMA itself is NOT in parentTools — a sub-agent can't
+	// launch a nested sub-agent by construction (recursion guard).
+	const toolsWithTask = [...AI_TOOL_CATALOG, TASK_TOOL_SCHEMA];
+
+	// Local dispatcher the planner's loop invokes and — via the task
+	// handler's parentOnToolCall — also any sub-agent. Hoisted so the
+	// task handler can close over it before the loop sets the branch.
+	const dispatchTool = async (call: ToolCallRequest): Promise<ToolResult> => {
+		const startedAt = Date.now();
+		const toolResult = await executeTool(call.name, call.arguments);
+		const latencyMs = Date.now() - startedAt;
+
+		const toolDef = getTool(call.name);
+		emitDomainEvent('CompanionToolCalled', 'companion', 'tools', call.name, {
+			tool: call.name,
+			module: toolDef?.module ?? 'unknown',
+			success: toolResult.success,
+			latencyMs,
+			errorMessage: toolResult.success ? undefined : toolResult.message,
+		});
+
+		toolCalls.push({ name: call.name, params: call.arguments, result: toolResult });
+		return toolResult;
+	};
+
+	// Sub-agent handler bound to this session. parentDepth = 0 means
+	// the sub-agent itself will refuse to launch another one (recursion
+	// guard inside runSubAgent). Model is the cheap tier — sub-agents
+	// are summarisation-heavy so flash-lite is the right default.
+	const taskHandler = createTaskToolHandler({
+		llm,
+		model: 'google/gemini-2.5-flash-lite',
+		parentDepth: 0,
+		parentTools: AI_TOOL_CATALOG,
+		parentOnToolCall: dispatchTool,
+	});
+
 	try {
 		const result = await runPlannerLoop({
 			llm,
@ -113,7 +154,7 @@ export async function runCompanionChat(
 				systemPrompt,
 				userPrompt: userMessage,
 				priorMessages,
-				tools: AI_TOOL_CATALOG,
+				tools: toolsWithTask,
 				model: 'google/gemini-2.5-flash',
 				maxRounds: MAX_TOOL_ROUNDS,
 				temperature: 0.7,
@ -136,21 +177,14 @@ export async function runCompanionChat(
 				},
 			},
 			onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
-				const startedAt = Date.now();
-				const toolResult = await executeTool(call.name, call.arguments);
-				const latencyMs = Date.now() - startedAt;
-
-				const toolDef = getTool(call.name);
-				emitDomainEvent('CompanionToolCalled', 'companion', 'tools', call.name, {
-					tool: call.name,
-					module: toolDef?.module ?? 'unknown',
-					success: toolResult.success,
-					latencyMs,
-					errorMessage: toolResult.success ? undefined : toolResult.message,
-				});
-
-				toolCalls.push({ name: call.name, params: call.arguments, result: toolResult });
-				return toolResult;
+				// Route `task` calls into the sub-agent handler. Everything
+				// else goes through the regular executor.
+				if (call.name === TASK_TOOL_NAME) {
+					const result = await taskHandler.handle(call);
+					toolCalls.push({ name: call.name, params: call.arguments, result });
+					return result;
+				}
+				return dispatchTool(call);
 			},
 		});