feat(webapp): wire task tool into Companion chat + Mission runner (M3.3)

Closes the M3 sub-agent loop. Both webapp consumers of runPlannerLoop
now expose the `task` tool to their planner LLM and route matching
calls to a session-bound sub-agent handler.

Pattern (identical in both files):

  1. Hoist the regular tool dispatcher into a local `dispatchTool`
     so both the main loop AND the sub-agent executor can share it.
     The parent's guardrail, executor, actor attribution, and
     domain-event emission happen exactly once — sub-agent tool
     calls route through the same function.

  2. Build a per-session taskHandler via createTaskToolHandler()
     with parentDepth=0 (sub-agents themselves refuse to recurse)
     and model=google/gemini-2.5-flash-lite (cheap tier —
     sub-agents are summarisation-heavy, no reason to burn primary
     budget on them).

  3. toolsWithTask = [...regular tools, TASK_TOOL_SCHEMA].

  4. onToolCall branches on `call.name === TASK_TOOL_NAME` →
     taskHandler.handle; else dispatchTool. Both return
     ToolResult, loop doesn't care which route was taken.

Companion:
  - parentTools = AI_TOOL_CATALOG (full catalog)
  - Token tracking via taskHandler.cumulativeUsage() available if
    we later want to attribute sub-agent tokens to a companion-
    session counter

Mission runner:
  - parentTools = availableTools (agent-policy-filtered)
  - Sub-agent inherits the same filter — a research sub-agent in a
    mission that already had policy:deny on `list_events` still
    can't see `list_events`, defense-in-depth
  - runToolCall still gets aiActor → sub-agent tool executions are
    attributed to the same mission/iteration as the parent

mana-ai deliberately NOT wired: its onToolCall is a no-op recorder
(plans get staged, executed client-side on sync). Sub-agents there
would produce no value since the sub-agent couldn't execute tools
either, just plan. When the tool-registry fully absorbs AI_TOOL_CATALOG
(Personas-plan M4), mana-ai will get sub-agent support in that same
migration.

No new tests — shared-ai's 107 tests cover the primitive + handler
exhaustively. Existing 31 companion+mission tests remain green;
svelte-check clean across 7427 files.

Completes M3. runPlannerLoop now has Claude-Code's four big patterns:
policy-gate (M1) / reminder-channel (M1) / parallel-reads (M1) /
compactor (M2) / sub-agents (M3).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-23 19:14:36 +02:00
parent 101af462a8
commit 66bfcb3996
2 changed files with 102 additions and 41 deletions

View file

@ -43,9 +43,12 @@ import {
AI_TOOL_CATALOG_BY_NAME,
buildSystemPrompt,
compactHistory,
createTaskToolHandler,
runPlannerLoop,
runPrePlanGuardrails,
runPreExecuteGuardrails,
TASK_TOOL_NAME,
TASK_TOOL_SCHEMA,
type ChatMessage,
type LlmClient,
type ResolvedInput,
@ -266,12 +269,54 @@ async function runMissionInner(
agentMemory: owningAgent?.memory ?? null,
});
// Regular tool dispatcher — shared between the planner loop
// and any sub-agent spawned via the `task` tool, so both
// routes go through the same guardrail + executor + actor
// attribution path.
const dispatchTool = async (call: ToolCallRequest): Promise<ToolResult> => {
await checkCancel();
await enterPhase('staging-proposals', call.name);
const execCheck = runPreExecuteGuardrails({
summary: call.name,
toolName: call.name,
params: call.arguments,
rationale: mission!.objective,
});
if (!execCheck.passed) {
return { success: false, message: `Guardrail: ${execCheck.blockReason}` };
}
try {
return await runToolCall(call.name, call.arguments, aiActor);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[MissionRunner] tool ${call.name} threw:`, err);
return { success: false, message: `Tool execution failed: ${msg}` };
}
};
// `task` tool handler: the mission planner can spawn a
// research/plan/general sub-agent, which returns one string
// summary. parentDepth=0 here; the sub-agent itself refuses
// to launch another one. Cheap-tier model since sub-agents
// are summarisation-heavy by construction.
const taskHandler = createTaskToolHandler({
llm: deps.llm,
model: 'google/gemini-2.5-flash-lite',
parentDepth: 0,
parentTools: availableTools,
parentOnToolCall: dispatchTool,
});
const toolsWithTask = [...availableTools, TASK_TOOL_SCHEMA];
const loopResult = await runPlannerLoop({
llm: deps.llm,
input: {
systemPrompt,
userPrompt,
tools: availableTools,
tools: toolsWithTask,
model: deps.model ?? 'google/gemini-2.5-flash',
maxRounds: MAX_PLANNER_ROUNDS,
// Fan-out read tools when the planner requests several in
@ -295,31 +340,13 @@ async function runMissionInner(
},
},
onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
await checkCancel();
await enterPhase('staging-proposals', call.name);
// Pre-execute guardrail per call. Failures come back as
// tool-messages so the LLM can choose a different path.
const execCheck = runPreExecuteGuardrails({
summary: call.name,
toolName: call.name,
params: call.arguments,
rationale: mission!.objective,
});
if (!execCheck.passed) {
return {
success: false,
message: `Guardrail: ${execCheck.blockReason}`,
};
}
try {
return await runToolCall(call.name, call.arguments, aiActor);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
console.error(`[MissionRunner] tool ${call.name} threw:`, err);
return { success: false, message: `Tool execution failed: ${msg}` };
// Route `task` calls to the sub-agent handler; everything
// else goes through the regular dispatcher (guardrail +
// executor + actor attribution).
if (call.name === TASK_TOOL_NAME) {
return taskHandler.handle(call);
}
return dispatchTool(call);
},
});

View file

@ -16,6 +16,9 @@ import {
AI_TOOL_CATALOG,
AI_TOOL_CATALOG_BY_NAME,
compactHistory,
createTaskToolHandler,
TASK_TOOL_NAME,
TASK_TOOL_SCHEMA,
type ChatMessage,
type ToolCallRequest,
type ToolResult,
@ -106,6 +109,44 @@ export async function runCompanionChat(
const priorMessages = historyToChatMessages(history);
const toolCalls: EngineResult['toolCalls'] = [];
// The parent tool catalog the sub-agent may filter down from.
// TASK_TOOL_SCHEMA itself is NOT in parentTools — a sub-agent can't
// launch a nested sub-agent by construction (recursion guard).
const toolsWithTask = [...AI_TOOL_CATALOG, TASK_TOOL_SCHEMA];
// Local dispatcher the planner's loop invokes and — via the task
// handler's parentOnToolCall — also any sub-agent. Hoisted so the
// task handler can close over it before the loop sets the branch.
const dispatchTool = async (call: ToolCallRequest): Promise<ToolResult> => {
const startedAt = Date.now();
const toolResult = await executeTool(call.name, call.arguments);
const latencyMs = Date.now() - startedAt;
const toolDef = getTool(call.name);
emitDomainEvent('CompanionToolCalled', 'companion', 'tools', call.name, {
tool: call.name,
module: toolDef?.module ?? 'unknown',
success: toolResult.success,
latencyMs,
errorMessage: toolResult.success ? undefined : toolResult.message,
});
toolCalls.push({ name: call.name, params: call.arguments, result: toolResult });
return toolResult;
};
// Sub-agent handler bound to this session. parentDepth = 0 means
// the sub-agent itself will refuse to launch another one (recursion
// guard inside runSubAgent). Model is the cheap tier — sub-agents
// are summarisation-heavy so flash-lite is the right default.
const taskHandler = createTaskToolHandler({
llm,
model: 'google/gemini-2.5-flash-lite',
parentDepth: 0,
parentTools: AI_TOOL_CATALOG,
parentOnToolCall: dispatchTool,
});
try {
const result = await runPlannerLoop({
llm,
@ -113,7 +154,7 @@ export async function runCompanionChat(
systemPrompt,
userPrompt: userMessage,
priorMessages,
tools: AI_TOOL_CATALOG,
tools: toolsWithTask,
model: 'google/gemini-2.5-flash',
maxRounds: MAX_TOOL_ROUNDS,
temperature: 0.7,
@ -136,21 +177,14 @@ export async function runCompanionChat(
},
},
onToolCall: async (call: ToolCallRequest): Promise<ToolResult> => {
const startedAt = Date.now();
const toolResult = await executeTool(call.name, call.arguments);
const latencyMs = Date.now() - startedAt;
const toolDef = getTool(call.name);
emitDomainEvent('CompanionToolCalled', 'companion', 'tools', call.name, {
tool: call.name,
module: toolDef?.module ?? 'unknown',
success: toolResult.success,
latencyMs,
errorMessage: toolResult.success ? undefined : toolResult.message,
});
toolCalls.push({ name: call.name, params: call.arguments, result: toolResult });
return toolResult;
// Route `task` calls into the sub-agent handler. Everything
// else goes through the regular executor.
if (call.name === TASK_TOOL_NAME) {
const result = await taskHandler.handle(call);
toolCalls.push({ name: call.name, params: call.arguments, result });
return result;
}
return dispatchTool(call);
},
});