mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-19 01:01:30 +02:00
Exposes runSubAgent() as a tool the planner LLM can call natively,
matching Claude Code's `Task` tool shape: { subagent_type, description,
prompt } -> single-string summary.
New exports from @mana/shared-ai:
- TASK_TOOL_NAME = 'task'
- TASK_TOOL_SCHEMA — ToolSchema ready to drop into a runPlannerLoop
`tools` array. subagent_type enum = research|plan|general;
description+prompt required; defaultPolicy: 'auto' (control-flow,
not a user-data write).
- createTaskToolHandler(opts) — factory returning:
- handle(call): structured ToolResult with the sub-agent's
summary as message + data {subAgentType, toolsCalled,
rounds, stopReason, usage}
- cumulativeUsage(): rolled-up TokenUsage across all sub-agent
invocations — parent budget accounting reads from here
- invocationCount(): metric-ready counter
Why not in mana-tool-registry: `task` is a loop-internal control-flow
primitive, not a user-data operation. Registry is for habits/notes/etc.
where MCP exposure and space-scoping matter. task never touches mana-
sync and never crosses the MCP boundary.
Recursion guard is defense-in-depth: the primitive throws
SubAgentRecursionError, this handler catches parentDepth >=
MAX_SUB_AGENT_DEPTH up front and returns a structured ToolResult
instead so the LLM sees it as regular tool-feedback.
Exceptions from the sub-agent (provider down, network) get wrapped
as `{ success: false, message: 'Sub-agent failed: ...' }`. The parent
loop's round continues.
14 new tests covering schema shape, recursion rejection, argument
validation (4 cases), happy path with tool dispatch, cumulative
usage tracking across multiple invocations, exception wrapping,
and parent-dispatcher routing.
107 shared-ai tests green total (was 93).
M3.3 consumer wiring follows.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
264 lines
8.1 KiB
TypeScript
264 lines
8.1 KiB
TypeScript
import { describe, expect, it, vi } from 'vitest';
|
|
import { TASK_TOOL_NAME, TASK_TOOL_SCHEMA, createTaskToolHandler } from './task-tool';
|
|
import { MAX_SUB_AGENT_DEPTH } from './sub-agent';
|
|
import { MockLlmClient } from './mock-llm';
|
|
import type { ToolCallRequest, ToolResult } from './loop';
|
|
import type { ToolSchema } from '../tools/schemas';
|
|
|
|
const parentTools: ToolSchema[] = [
|
|
{
|
|
name: 'list_things',
|
|
module: 'test',
|
|
description: 'read',
|
|
defaultPolicy: 'auto',
|
|
parameters: [],
|
|
},
|
|
{
|
|
name: 'create_thing',
|
|
module: 'test',
|
|
description: 'write',
|
|
defaultPolicy: 'propose',
|
|
parameters: [{ name: 'title', type: 'string', description: 't', required: true }],
|
|
},
|
|
];
|
|
|
|
function makeCall(args: Record<string, unknown>): ToolCallRequest {
|
|
return { id: 'tc-1', name: TASK_TOOL_NAME, arguments: args };
|
|
}
|
|
|
|
// ─── Schema shape ──────────────────────────────────────────────────
|
|
|
|
describe('TASK_TOOL_SCHEMA', () => {
|
|
it('is named "task"', () => {
|
|
expect(TASK_TOOL_SCHEMA.name).toBe('task');
|
|
expect(TASK_TOOL_NAME).toBe('task');
|
|
});
|
|
|
|
it('carries subagent_type enum with research/plan/general', () => {
|
|
const typeParam = TASK_TOOL_SCHEMA.parameters.find((p) => p.name === 'subagent_type');
|
|
expect(typeParam).toBeDefined();
|
|
expect(typeParam!.enum).toEqual(['research', 'plan', 'general']);
|
|
});
|
|
|
|
it('requires description + prompt + subagent_type', () => {
|
|
const required = TASK_TOOL_SCHEMA.parameters.filter((p) => p.required).map((p) => p.name);
|
|
expect(required).toEqual(['subagent_type', 'description', 'prompt']);
|
|
});
|
|
|
|
it('defaultPolicy is auto (control-flow primitive, not a write)', () => {
|
|
expect(TASK_TOOL_SCHEMA.defaultPolicy).toBe('auto');
|
|
});
|
|
});
|
|
|
|
// ─── Recursion rejection ───────────────────────────────────────────
|
|
|
|
describe('createTaskToolHandler — recursion', () => {
|
|
it('refuses when parentDepth is at the cap (structured error, not throw)', async () => {
|
|
const handler = createTaskToolHandler({
|
|
llm: new MockLlmClient(),
|
|
model: 'x/y',
|
|
parentDepth: MAX_SUB_AGENT_DEPTH,
|
|
parentTools,
|
|
parentOnToolCall: async () => ({ success: true, message: '' }),
|
|
});
|
|
|
|
const res = await handler.handle(
|
|
makeCall({ subagent_type: 'research', description: 'nested', prompt: 'do it' })
|
|
);
|
|
expect(res.success).toBe(false);
|
|
expect(res.message).toContain('nicht verschachtelt');
|
|
});
|
|
});
|
|
|
|
// ─── Input validation ──────────────────────────────────────────────
|
|
|
|
describe('createTaskToolHandler — argument validation', () => {
|
|
function make() {
|
|
return createTaskToolHandler({
|
|
llm: new MockLlmClient(),
|
|
model: 'x/y',
|
|
parentDepth: 0,
|
|
parentTools,
|
|
parentOnToolCall: async () => ({ success: true, message: '' }),
|
|
});
|
|
}
|
|
|
|
it('rejects non-object args', async () => {
|
|
const res = await make().handle({
|
|
id: 't',
|
|
name: 'task',
|
|
arguments: null as unknown as Record<string, unknown>,
|
|
});
|
|
expect(res.success).toBe(false);
|
|
expect(res.message).toContain('object');
|
|
});
|
|
|
|
it('rejects invalid subagent_type', async () => {
|
|
const res = await make().handle(
|
|
makeCall({ subagent_type: 'evil', description: 'x', prompt: 'y' })
|
|
);
|
|
expect(res.success).toBe(false);
|
|
expect(res.message).toContain('research|plan|general');
|
|
});
|
|
|
|
it('rejects empty description', async () => {
|
|
const res = await make().handle(
|
|
makeCall({ subagent_type: 'research', description: '', prompt: 'y' })
|
|
);
|
|
expect(res.success).toBe(false);
|
|
expect(res.message).toContain('description');
|
|
});
|
|
|
|
it('rejects empty prompt', async () => {
|
|
const res = await make().handle(
|
|
makeCall({ subagent_type: 'research', description: 'x', prompt: '' })
|
|
);
|
|
expect(res.success).toBe(false);
|
|
expect(res.message).toContain('prompt');
|
|
});
|
|
});
|
|
|
|
// ─── Happy path ────────────────────────────────────────────────────
|
|
|
|
describe('createTaskToolHandler — happy path', () => {
|
|
it('spawns a sub-agent and returns its summary as ToolResult.message', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
|
.enqueueStop('Found 2 items: a, b');
|
|
|
|
const parentDispatch = vi.fn(
|
|
async (_c: ToolCallRequest): Promise<ToolResult> => ({
|
|
success: true,
|
|
data: ['a', 'b'],
|
|
message: '2 items',
|
|
})
|
|
);
|
|
|
|
const handler = createTaskToolHandler({
|
|
llm,
|
|
model: 'google/gemini-2.5-flash-lite',
|
|
parentDepth: 0,
|
|
parentTools,
|
|
parentOnToolCall: parentDispatch,
|
|
});
|
|
|
|
const res = await handler.handle(
|
|
makeCall({
|
|
subagent_type: 'research',
|
|
description: 'scan things',
|
|
prompt: 'list everything and report back',
|
|
})
|
|
);
|
|
|
|
expect(res.success).toBe(true);
|
|
expect(res.message).toBe('Found 2 items: a, b');
|
|
|
|
const data = res.data as {
|
|
subAgentType: string;
|
|
toolsCalled: number;
|
|
rounds: number;
|
|
stopReason: string;
|
|
};
|
|
expect(data.subAgentType).toBe('research');
|
|
expect(data.toolsCalled).toBe(1);
|
|
expect(data.rounds).toBeGreaterThanOrEqual(2);
|
|
expect(parentDispatch).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('tracks cumulative usage across multiple invocations', async () => {
|
|
const llm = new MockLlmClient();
|
|
// Two sub-agent runs, each reports usage.
|
|
for (let i = 0; i < 2; i++) {
|
|
(llm as unknown as { queue: unknown[] }).queue.push({
|
|
content: `summary-${i}`,
|
|
toolCalls: [],
|
|
finishReason: 'stop',
|
|
usage: { promptTokens: 100, completionTokens: 30, totalTokens: 130 },
|
|
});
|
|
}
|
|
|
|
const handler = createTaskToolHandler({
|
|
llm,
|
|
model: 'google/gemini-2.5-flash-lite',
|
|
parentDepth: 0,
|
|
parentTools,
|
|
parentOnToolCall: async () => ({ success: true, message: '' }),
|
|
});
|
|
|
|
await handler.handle(makeCall({ subagent_type: 'plan', description: 'a', prompt: 'one' }));
|
|
await handler.handle(makeCall({ subagent_type: 'plan', description: 'b', prompt: 'two' }));
|
|
|
|
expect(handler.invocationCount()).toBe(2);
|
|
const usage = handler.cumulativeUsage();
|
|
expect(usage.promptTokens).toBe(200);
|
|
expect(usage.completionTokens).toBe(60);
|
|
expect(usage.totalTokens).toBe(260);
|
|
});
|
|
|
|
it('counts zero usage if no successful sub-agent ran', async () => {
|
|
const handler = createTaskToolHandler({
|
|
llm: new MockLlmClient(),
|
|
model: 'x/y',
|
|
parentDepth: 0,
|
|
parentTools,
|
|
parentOnToolCall: async () => ({ success: true, message: '' }),
|
|
});
|
|
expect(handler.invocationCount()).toBe(0);
|
|
expect(handler.cumulativeUsage()).toEqual({
|
|
promptTokens: 0,
|
|
completionTokens: 0,
|
|
totalTokens: 0,
|
|
});
|
|
});
|
|
|
|
it('wraps sub-agent exceptions as structured ToolResult failures', async () => {
|
|
const llm = {
|
|
async complete() {
|
|
throw new Error('provider is down');
|
|
},
|
|
};
|
|
|
|
const handler = createTaskToolHandler({
|
|
llm,
|
|
model: 'x/y',
|
|
parentDepth: 0,
|
|
parentTools,
|
|
parentOnToolCall: async () => ({ success: true, message: '' }),
|
|
});
|
|
|
|
const res = await handler.handle(
|
|
makeCall({ subagent_type: 'general', description: 'x', prompt: 'y' })
|
|
);
|
|
expect(res.success).toBe(false);
|
|
expect(res.message).toContain('Sub-agent failed');
|
|
expect(res.message).toContain('provider is down');
|
|
});
|
|
});
|
|
|
|
// ─── Tool-routing through parent dispatcher ────────────────────────
|
|
|
|
describe('createTaskToolHandler — tool routing', () => {
|
|
it('sub-agent tool calls route through parent dispatcher (policy/audit stays reused)', async () => {
|
|
const llm = new MockLlmClient()
|
|
.enqueueToolCalls([{ name: 'list_things', args: {} }])
|
|
.enqueueStop('summary');
|
|
|
|
let parentCalled = false;
|
|
const parentDispatch = async (_c: ToolCallRequest): Promise<ToolResult> => {
|
|
parentCalled = true;
|
|
return { success: true, message: 'from parent' };
|
|
};
|
|
|
|
const handler = createTaskToolHandler({
|
|
llm,
|
|
model: 'x/y',
|
|
parentDepth: 0,
|
|
parentTools,
|
|
parentOnToolCall: parentDispatch,
|
|
});
|
|
|
|
await handler.handle(makeCall({ subagent_type: 'research', description: 'd', prompt: 'p' }));
|
|
|
|
expect(parentCalled).toBe(true);
|
|
});
|
|
});
|