diff --git a/packages/mana-tool-registry/src/modules/index.ts b/packages/mana-tool-registry/src/modules/index.ts index d37f959c6..14c7ee758 100644 --- a/packages/mana-tool-registry/src/modules/index.ts +++ b/packages/mana-tool-registry/src/modules/index.ts @@ -17,6 +17,7 @@ import { registerMoodTools } from './mood.ts'; import { registerNotesTools } from './notes.ts'; import { registerSpacesTools } from './spaces.ts'; import { registerTodoTools } from './todo.ts'; +import { registerWardrobeTools } from './wardrobe.ts'; export function registerAllModules(): void { registerHabitsTools(); @@ -26,6 +27,7 @@ export function registerAllModules(): void { registerNotesTools(); registerSpacesTools(); registerTodoTools(); + registerWardrobeTools(); } export { @@ -36,4 +38,5 @@ export { registerNotesTools, registerSpacesTools, registerTodoTools, + registerWardrobeTools, }; diff --git a/packages/mana-tool-registry/src/modules/wardrobe.ts b/packages/mana-tool-registry/src/modules/wardrobe.ts new file mode 100644 index 000000000..6919751a2 --- /dev/null +++ b/packages/mana-tool-registry/src/modules/wardrobe.ts @@ -0,0 +1,572 @@ +/** + * Wardrobe — tools for agents to browse a user's digital closet, + * compose outfits, and run try-on generations. Four tools: + * + * - wardrobe.listGarments (read) — what do I own, filtered by + * category / tags + * - wardrobe.listOutfits (read) — which combinations exist, + * filtered by occasion / favorite + * - wardrobe.createOutfit (write) — compose a named outfit from + * garment ids + * - wardrobe.tryOn (write) — render the user wearing the + * outfit; wraps the existing + * picture/generate-with-reference + * endpoint with resolved refs + * + * Space scope: garments and outfits live in the active space. meImages + * (the face/body references needed for try-on) likewise space-scoped + * after the v40 migration. Everything in this module filters client- + * side (after mana-sync pull) on `row.spaceId === ctx.spaceId`, matching + * the webapp's scopedForModule behaviour. + * + * Plan: docs/plans/wardrobe-module.md M5. + */ + +import { z } from 'zod'; +import { decryptRecordFields, encryptRecordFields } from '@mana/shared-crypto'; +import { pullAll, pushInsert } from '../sync-client.ts'; +import { registerTool } from '../registry.ts'; +import type { ToolContext, ToolSpec } from '../types.ts'; + +const GARMENTS_APP_ID = 'wardrobe'; +const GARMENTS_TABLE = 'wardrobeGarments'; +const GARMENT_ENCRYPTED_FIELDS = [ + 'name', + 'brand', + 'color', + 'size', + 'material', + 'tags', + 'notes', +] as const; + +const OUTFITS_APP_ID = 'wardrobe'; +const OUTFITS_TABLE = 'wardrobeOutfits'; +const OUTFIT_ENCRYPTED_FIELDS = ['name', 'description', 'tags'] as const; + +const ME_APP_ID = 'profile'; +const ME_TABLE = 'meImages'; +const ME_ENCRYPTED_FIELDS = ['label', 'tags'] as const; + +const SYNC_URL = () => process.env.MANA_SYNC_URL ?? 'http://localhost:3050'; +const PICTURE_API_URL = () => process.env.MANA_API_URL ?? 'http://localhost:3060'; +const CLIENT_ID = () => process.env.MANA_MCP_CLIENT_ID ?? 'mana-mcp'; + +function syncCfg(ctx: ToolContext) { + return { baseUrl: SYNC_URL(), jwt: ctx.jwt, clientId: CLIENT_ID() }; +} + +// ─── Domain shapes (zod) ────────────────────────────────────────── + +const garmentCategory = z.enum([ + 'top', + 'bottom', + 'dress', + 'outerwear', + 'shoes', + 'bag', + 'accessory', + 'glasses', + 'jewelry', + 'hat', + 'other', +]); +type GarmentCategory = z.infer; + +const FACE_ONLY_CATEGORIES: ReadonlySet = new Set([ + 'accessory', + 'glasses', + 'jewelry', + 'hat', +]); + +const outfitOccasion = z.enum([ + 'casual', + 'work', + 'formal', + 'workout', + 'date', + 'travel', + 'event', + 'sleep', + 'other', +]); + +const garmentSchema = z.object({ + id: z.string(), + name: z.string(), + category: garmentCategory, + mediaIds: z.array(z.string()), + brand: z.string().nullable(), + color: z.string().nullable(), + size: z.string().nullable(), + material: z.string().nullable(), + tags: z.array(z.string()), + notes: z.string().nullable(), +}); + +const outfitSchema = z.object({ + id: z.string(), + name: z.string(), + description: z.string().nullable(), + garmentIds: z.array(z.string()), + occasion: outfitOccasion.nullable(), + tags: z.array(z.string()), + isFavorite: z.boolean(), +}); + +// Raw row shapes — fields beyond what we consume are tolerated. +interface RawGarmentRow { + id?: string; + name?: string; + category?: string; + mediaIds?: string[]; + brand?: string | null; + color?: string | null; + size?: string | null; + material?: string | null; + tags?: string[] | null; + notes?: string | null; + isArchived?: boolean; + deletedAt?: string | null; + spaceId?: string | null; +} + +interface RawOutfitRow { + id?: string; + name?: string; + description?: string | null; + garmentIds?: string[]; + occasion?: string | null; + tags?: string[] | null; + isFavorite?: boolean; + isArchived?: boolean; + deletedAt?: string | null; + spaceId?: string | null; +} + +interface RawMeImageRow { + id?: string; + mediaId?: string; + primaryFor?: string | null; + deletedAt?: string | null; + spaceId?: string | null; +} + +// ─── wardrobe.listGarments ──────────────────────────────────────── + +const listGarmentsInput = z.object({ + category: garmentCategory.optional(), + /** Intersection filter: rows must contain EVERY tag listed. Empty = no filter. */ + tags: z.array(z.string()).max(10).default([]), + limit: z.number().int().positive().max(200).default(50), +}); + +const listGarmentsOutput = z.object({ + garments: z.array(garmentSchema), +}); + +export const wardrobeListGarments: ToolSpec = + { + name: 'wardrobe.listGarments', + module: 'wardrobe', + scope: 'user-space', + policyHint: 'read', + description: + "List the caller's garments in the active space. Filter by `category` (closed enum) and/or `tags` (intersection — every listed tag must be present). Returns at most `limit` rows, newest first. Archived + soft-deleted items are excluded.", + input: listGarmentsInput, + output: listGarmentsOutput, + encryptedFields: { table: GARMENTS_TABLE, fields: [...GARMENT_ENCRYPTED_FIELDS] }, + async handler(input, ctx) { + const key = await ctx.getMasterKey(); + const res = await pullAll(syncCfg(ctx), GARMENTS_APP_ID, GARMENTS_TABLE); + const alive = res.changes + .filter((c) => c.op !== 'delete' && c.data) + .map((c) => c.data as RawGarmentRow) + .filter((row) => !row.deletedAt && !row.isArchived) + .filter((row) => row.spaceId === ctx.spaceId); + + const decrypted = (await Promise.all( + alive.map((row) => + decryptRecordFields(row as unknown as Record, GARMENT_ENCRYPTED_FIELDS, key) + ) + )) as unknown as RawGarmentRow[]; + + const filtered = decrypted + .filter((row): row is RawGarmentRow & { id: string; name: string; category: string } => + Boolean(row.id && row.name && row.category) + ) + .filter((row) => !input.category || row.category === input.category) + .filter((row) => { + if (input.tags.length === 0) return true; + const rowTags = new Set(row.tags ?? []); + return input.tags.every((t) => rowTags.has(t)); + }) + .slice(0, input.limit); + + const garments = filtered.map((row) => ({ + id: row.id, + name: row.name, + category: row.category as GarmentCategory, + mediaIds: row.mediaIds ?? [], + brand: row.brand ?? null, + color: row.color ?? null, + size: row.size ?? null, + material: row.material ?? null, + tags: row.tags ?? [], + notes: row.notes ?? null, + })); + + ctx.logger.info('wardrobe.listGarments', { + count: garments.length, + category: input.category ?? 'all', + }); + + return { garments }; + }, + }; + +// ─── wardrobe.listOutfits ───────────────────────────────────────── + +const listOutfitsInput = z.object({ + occasion: outfitOccasion.optional(), + favoriteOnly: z.boolean().default(false), + limit: z.number().int().positive().max(200).default(50), +}); + +const listOutfitsOutput = z.object({ + outfits: z.array(outfitSchema), +}); + +export const wardrobeListOutfits: ToolSpec = { + name: 'wardrobe.listOutfits', + module: 'wardrobe', + scope: 'user-space', + policyHint: 'read', + description: + "List the caller's outfits in the active space. Filter by `occasion` and/or `favoriteOnly`. The returned rows include garmentIds — use `wardrobe.listGarments` to resolve them to full rows when you need more than ids.", + input: listOutfitsInput, + output: listOutfitsOutput, + encryptedFields: { table: OUTFITS_TABLE, fields: [...OUTFIT_ENCRYPTED_FIELDS] }, + async handler(input, ctx) { + const key = await ctx.getMasterKey(); + const res = await pullAll(syncCfg(ctx), OUTFITS_APP_ID, OUTFITS_TABLE); + const alive = res.changes + .filter((c) => c.op !== 'delete' && c.data) + .map((c) => c.data as RawOutfitRow) + .filter((row) => !row.deletedAt && !row.isArchived) + .filter((row) => row.spaceId === ctx.spaceId); + + const decrypted = (await Promise.all( + alive.map((row) => + decryptRecordFields(row as unknown as Record, OUTFIT_ENCRYPTED_FIELDS, key) + ) + )) as unknown as RawOutfitRow[]; + + const filtered = decrypted + .filter((row): row is RawOutfitRow & { id: string; name: string } => + Boolean(row.id && row.name) + ) + .filter((row) => !input.occasion || row.occasion === input.occasion) + .filter((row) => !input.favoriteOnly || row.isFavorite === true) + .slice(0, input.limit); + + const outfits = filtered.map((row) => ({ + id: row.id, + name: row.name, + description: row.description ?? null, + garmentIds: row.garmentIds ?? [], + occasion: (row.occasion ?? null) as z.infer | null, + tags: row.tags ?? [], + isFavorite: row.isFavorite === true, + })); + + ctx.logger.info('wardrobe.listOutfits', { + count: outfits.length, + occasion: input.occasion ?? 'all', + favoriteOnly: input.favoriteOnly, + }); + + return { outfits }; + }, +}; + +// ─── wardrobe.createOutfit ──────────────────────────────────────── + +const createOutfitInput = z.object({ + name: z.string().min(1).max(200), + garmentIds: z.array(z.string()).min(1).max(16), + description: z.string().max(2000).nullable().default(null), + occasion: outfitOccasion.nullable().default(null), + tags: z.array(z.string()).max(20).default([]), +}); + +const createOutfitOutput = z.object({ + outfit: outfitSchema, +}); + +export const wardrobeCreateOutfit: ToolSpec = + { + name: 'wardrobe.createOutfit', + module: 'wardrobe', + scope: 'user-space', + policyHint: 'write', + description: + "Compose a new outfit in the active space. `garmentIds` must reference garments the caller owns in the same space — the server will persist whatever you pass (there's no cross-space validation here), so call `wardrobe.listGarments` first to confirm the ids.", + input: createOutfitInput, + output: createOutfitOutput, + encryptedFields: { table: OUTFITS_TABLE, fields: [...OUTFIT_ENCRYPTED_FIELDS] }, + async handler(input, ctx) { + const key = await ctx.getMasterKey(); + const id = crypto.randomUUID(); + const plaintext = { + id, + name: input.name, + description: input.description, + garmentIds: input.garmentIds, + occasion: input.occasion, + tags: input.tags, + isFavorite: false, + }; + + const encrypted = await encryptRecordFields( + plaintext as unknown as Record, + OUTFIT_ENCRYPTED_FIELDS, + key + ); + + await pushInsert(syncCfg(ctx), OUTFITS_APP_ID, { + table: OUTFITS_TABLE, + id, + spaceId: ctx.spaceId, + data: encrypted, + }); + + ctx.logger.info('wardrobe.createOutfit', { + outfitId: id, + garmentCount: input.garmentIds.length, + occasion: input.occasion ?? 'none', + }); + + return { + outfit: { + id, + name: input.name, + description: input.description, + garmentIds: input.garmentIds, + occasion: input.occasion, + tags: input.tags, + isFavorite: false, + }, + }; + }, + }; + +// ─── wardrobe.tryOn ─────────────────────────────────────────────── + +const tryOnInput = z.object({ + outfitId: z.string(), + /** Optional override; default is composed from the outfit's name + occasion. */ + prompt: z.string().max(2000).optional(), + /** + * Force accessory-only mode (face-only render, square 1024×1024). + * Auto-detected when every garment in the outfit is in the face- + * only category set — pass true explicitly to override on mixed + * outfits (rare). + */ + accessoryOnly: z.boolean().optional(), + quality: z.enum(['low', 'medium', 'high']).default('medium'), +}); + +const tryOnOutput = z.object({ + imageUrl: z.string(), + mediaId: z.string(), + prompt: z.string(), + model: z.string(), + referenceMediaIds: z.array(z.string()), + mode: z.literal('edit'), +}); + +export const wardrobeTryOn: ToolSpec = { + name: 'wardrobe.tryOn', + module: 'wardrobe', + // `write` rather than `destructive`: the result is additive (a new + // image in the Picture gallery) and credits are consumed at the + // standard picture-generation tarif. No existing data is overwritten. + scope: 'user-space', + policyHint: 'write', + description: + "Render the caller wearing the outfit using OpenAI gpt-image-2. Resolves the active space's primary face-ref (and body-ref when the outfit isn't accessory-only) from meImages, combines them with the outfit's garment photos, and calls the picture-generate-with-reference endpoint. Returns the generated image's URL + mana-media id. Consumes credits at the same tarif as text-to-image (medium = 10). Does NOT persist the result into the Picture gallery from here — that's deferred to avoid double-writes when a user is also on the page; treat this tool as a preview.", + input: tryOnInput, + output: tryOnOutput, + async handler(input, ctx) { + // 1. Fetch outfit + garments + meImages, decrypt what's needed. + const key = await ctx.getMasterKey(); + + const outfitsRes = await pullAll( + syncCfg(ctx), + OUTFITS_APP_ID, + OUTFITS_TABLE + ); + const outfit = outfitsRes.changes + .filter((c) => c.op !== 'delete' && c.data) + .map((c) => c.data as RawOutfitRow) + .find( + (row) => + row.id === input.outfitId && !row.deletedAt && row.spaceId === ctx.spaceId + ); + if (!outfit) { + throw new Error(`Outfit ${input.outfitId} not found in the active space`); + } + + const decryptedOutfit = (await decryptRecordFields( + outfit as unknown as Record, + OUTFIT_ENCRYPTED_FIELDS, + key + )) as unknown as RawOutfitRow; + + const garmentIds = decryptedOutfit.garmentIds ?? []; + if (garmentIds.length === 0) { + throw new Error('Outfit has no garments'); + } + + const garmentsRes = await pullAll( + syncCfg(ctx), + GARMENTS_APP_ID, + GARMENTS_TABLE + ); + const garmentSet = new Set(garmentIds); + const relevantGarments = garmentsRes.changes + .filter((c) => c.op !== 'delete' && c.data) + .map((c) => c.data as RawGarmentRow) + .filter( + (row) => + row.id && + garmentSet.has(row.id) && + !row.deletedAt && + row.spaceId === ctx.spaceId + ); + if (relevantGarments.length === 0) { + throw new Error( + 'None of the outfit garments exist in the active space (moved or deleted?)' + ); + } + + // Garment metadata we need (category, mediaIds) is plaintext; no + // decrypt round-trip needed for ref composition. + const garmentMediaIds = relevantGarments + .map((g) => g.mediaIds?.[0]) + .filter((id): id is string => Boolean(id)); + if (garmentMediaIds.length === 0) { + throw new Error('None of the outfit garments have a primary photo'); + } + + const meRes = await pullAll(syncCfg(ctx), ME_APP_ID, ME_TABLE); + const liveMeImages = meRes.changes + .filter((c) => c.op !== 'delete' && c.data) + .map((c) => c.data as RawMeImageRow) + .filter((row) => !row.deletedAt && row.spaceId === ctx.spaceId); + + const faceRef = liveMeImages.find((row) => row.primaryFor === 'face-ref'); + const bodyRef = liveMeImages.find((row) => row.primaryFor === 'body-ref'); + + if (!faceRef?.mediaId) { + throw new Error( + 'No primary face-ref meImage in the active space. Upload one via /profile/me-images.' + ); + } + + // 2. Accessory-only detection. + const allFaceOnly = relevantGarments.every((g) => + FACE_ONLY_CATEGORIES.has((g.category ?? 'other') as GarmentCategory) + ); + const accessoryOnly = input.accessoryOnly ?? allFaceOnly; + + if (!accessoryOnly && !bodyRef?.mediaId) { + throw new Error( + 'No primary body-ref meImage in the active space. Upload a fullbody photo via /profile/me-images, or pass accessoryOnly=true if the outfit is face-only.' + ); + } + + // 3. Compose reference list respecting the 8-slot server cap. + const referenceMediaIds: string[] = [faceRef.mediaId]; + if (!accessoryOnly && bodyRef?.mediaId) referenceMediaIds.push(bodyRef.mediaId); + for (const id of garmentMediaIds) { + if (referenceMediaIds.length >= 8) break; + referenceMediaIds.push(id); + } + + // 4. Compose prompt if none given. + const outfitName = decryptedOutfit.name ?? 'Outfit'; + const effectivePrompt = + input.prompt?.trim() || + (accessoryOnly + ? `Fotorealistisches Portrait von mir mit ${outfitName}, frontal, studio-Licht, neutraler Hintergrund, Fokus auf dem Accessoire` + : `Fotorealistisches Portrait von mir im Outfit ${outfitName}, natürliches Licht, neutraler Hintergrund`); + + const size: '1024x1024' | '1024x1536' = accessoryOnly ? '1024x1024' : '1024x1536'; + + // 5. Call the picture endpoint. + const res = await fetch(`${PICTURE_API_URL()}/api/v1/picture/generate-with-reference`, { + method: 'POST', + headers: { + 'content-type': 'application/json', + authorization: `Bearer ${ctx.jwt}`, + }, + body: JSON.stringify({ + prompt: effectivePrompt, + referenceMediaIds, + model: 'openai/gpt-image-2', + quality: input.quality, + size, + n: 1, + }), + }); + + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error( + `picture.generate-with-reference failed: ${res.status} ${res.statusText} — ${text.slice(0, 500)}` + ); + } + + const data = (await res.json()) as { + images?: Array<{ imageUrl: string; mediaId?: string }>; + imageUrl?: string; + mediaId?: string; + prompt: string; + model: string; + referenceMediaIds?: string[]; + }; + const first = + (data.images && data.images[0]) ?? + (data.imageUrl ? { imageUrl: data.imageUrl, mediaId: data.mediaId } : null); + if (!first?.imageUrl || !first.mediaId) { + throw new Error('picture endpoint returned no image'); + } + + ctx.logger.info('wardrobe.tryOn', { + outfitId: input.outfitId, + accessoryOnly, + refs: referenceMediaIds.length, + }); + + return { + imageUrl: first.imageUrl, + mediaId: first.mediaId, + prompt: data.prompt, + model: data.model, + referenceMediaIds: data.referenceMediaIds ?? referenceMediaIds, + mode: 'edit' as const, + }; + }, +}; + +// ─── Registration barrel ────────────────────────────────────────── + +export function registerWardrobeTools(): void { + registerTool(wardrobeListGarments); + registerTool(wardrobeListOutfits); + registerTool(wardrobeCreateOutfit); + registerTool(wardrobeTryOn); +} diff --git a/packages/mana-tool-registry/src/types.ts b/packages/mana-tool-registry/src/types.ts index a36b50029..fec4747e7 100644 --- a/packages/mana-tool-registry/src/types.ts +++ b/packages/mana-tool-registry/src/types.ts @@ -29,7 +29,9 @@ export type ModuleId = | 'tags' | 'mood' // — M5 (me-images + reference-based image generation) — - | 'me'; + | 'me' + // — Wardrobe M5 (garments + outfits + try-on) — + | 'wardrobe'; /** * `user-space` — operates on the caller's data within a specific Space. diff --git a/packages/shared-ai/src/index.ts b/packages/shared-ai/src/index.ts index e62d4aaff..6444f6c3f 100644 --- a/packages/shared-ai/src/index.ts +++ b/packages/shared-ai/src/index.ts @@ -90,14 +90,24 @@ export { DEFAULT_COMPACT_KEEP_RECENT, DEFAULT_COMPACT_MODEL, DEFAULT_COMPACT_THRESHOLD, + MAX_SUB_AGENT_DEPTH, MockLlmClient, parseCompactSummary, parsePlannerResponse, renderCompactSummary, runPlannerLoop, + runSubAgent, shouldCompact, + SubAgentRecursionError, +} from './planner'; +export type { + CompactHistoryOptions, + CompactHistoryResult, + CompactSummary, + RunSubAgentInput, + SubAgentResult, + SubAgentType, } from './planner'; -export type { CompactHistoryOptions, CompactHistoryResult, CompactSummary } from './planner'; export { AI_PROPOSABLE_TOOL_NAMES, diff --git a/packages/shared-ai/src/planner/index.ts b/packages/shared-ai/src/planner/index.ts index 8b36654c3..fe10a2b89 100644 --- a/packages/shared-ai/src/planner/index.ts +++ b/packages/shared-ai/src/planner/index.ts @@ -21,6 +21,8 @@ export { shouldCompact, } from './compact'; export type { CompactHistoryOptions, CompactHistoryResult, CompactSummary } from './compact'; +export { MAX_SUB_AGENT_DEPTH, SubAgentRecursionError, runSubAgent } from './sub-agent'; +export type { RunSubAgentInput, SubAgentResult, SubAgentType } from './sub-agent'; export { MockLlmClient } from './mock-llm'; export type { MockLlmTurn } from './mock-llm'; export type { diff --git a/packages/shared-ai/src/planner/sub-agent.test.ts b/packages/shared-ai/src/planner/sub-agent.test.ts new file mode 100644 index 000000000..570390d8a --- /dev/null +++ b/packages/shared-ai/src/planner/sub-agent.test.ts @@ -0,0 +1,288 @@ +import { describe, expect, it, vi } from 'vitest'; +import { + MAX_SUB_AGENT_DEPTH, + SubAgentRecursionError, + runSubAgent, + type SubAgentType, +} from './sub-agent'; +import { MockLlmClient } from './mock-llm'; +import type { ToolCallRequest, ToolResult } from './loop'; +import type { ToolSchema } from '../tools/schemas'; + +// ─── Fixtures ────────────────────────────────────────────────────── + +const tools: ToolSchema[] = [ + { + name: 'list_things', + module: 'test', + description: 'read-only listing', + defaultPolicy: 'auto', + parameters: [], + }, + { + name: 'get_thing', + module: 'test', + description: 'read one', + defaultPolicy: 'auto', + parameters: [{ name: 'id', type: 'string', description: 'id', required: true }], + }, + { + name: 'create_thing', + module: 'test', + description: 'writes', + defaultPolicy: 'propose', + parameters: [{ name: 'title', type: 'string', description: 'title', required: true }], + }, + { + name: 'delete_thing', + module: 'test', + description: 'destructive', + defaultPolicy: 'propose', + parameters: [{ name: 'id', type: 'string', description: 'id', required: true }], + }, +]; + +function baseInput(type: SubAgentType) { + return { + type, + task: 'Find all todo items that mention foo and summarise.', + parentTools: tools, + parentDepth: 0, + model: 'google/gemini-2.5-flash', + }; +} + +// ─── Recursion guard ─────────────────────────────────────────────── + +describe('runSubAgent — recursion guard', () => { + it('throws SubAgentRecursionError when parentDepth >= MAX_SUB_AGENT_DEPTH', async () => { + const llm = new MockLlmClient(); + await expect( + runSubAgent({ + ...baseInput('research'), + parentDepth: MAX_SUB_AGENT_DEPTH, + llm, + onToolCall: async () => ({ success: true, message: '' }), + }) + ).rejects.toBeInstanceOf(SubAgentRecursionError); + }); + + it('proceeds at parentDepth = 0', async () => { + const llm = new MockLlmClient().enqueueStop('ok'); + const res = await runSubAgent({ + ...baseInput('research'), + parentDepth: 0, + llm, + onToolCall: async () => ({ success: true, message: '' }), + }); + expect(res.summary).toBe('ok'); + }); +}); + +// ─── Tool filtering by type ──────────────────────────────────────── + +describe('runSubAgent — tool whitelisting', () => { + it('research type exposes only auto-policy tools to the LLM', async () => { + const llm = new MockLlmClient().enqueueStop('done'); + const res = await runSubAgent({ + ...baseInput('research'), + llm, + onToolCall: async () => ({ success: true, message: '' }), + }); + expect(res.availableToolCount).toBe(2); // list_things + get_thing + // The LLM saw the filtered toolset in its schema + const toolNames = llm.calls[0].toolNames; + expect(toolNames).toEqual(expect.arrayContaining(['list_things', 'get_thing'])); + expect(toolNames).not.toContain('create_thing'); + expect(toolNames).not.toContain('delete_thing'); + }); + + it('general type passes every tool through', async () => { + const llm = new MockLlmClient().enqueueStop('done'); + const res = await runSubAgent({ + ...baseInput('general'), + llm, + onToolCall: async () => ({ success: true, message: '' }), + }); + expect(res.availableToolCount).toBe(tools.length); + }); + + it('plan type also exposes read-only (same filter as research)', async () => { + const llm = new MockLlmClient().enqueueStop('done'); + const res = await runSubAgent({ + ...baseInput('plan'), + llm, + onToolCall: async () => ({ success: true, message: '' }), + }); + expect(res.availableToolCount).toBe(2); + }); + + it('custom toolFilter overrides the type default', async () => { + const llm = new MockLlmClient().enqueueStop('done'); + const res = await runSubAgent({ + ...baseInput('general'), + toolFilter: (t) => t.name === 'get_thing', + llm, + onToolCall: async () => ({ success: true, message: '' }), + }); + expect(res.availableToolCount).toBe(1); + }); + + it('belt-and-suspenders: rejects tool calls outside the whitelist', async () => { + // LLM (misbehaving) asks for create_thing inside a research agent + const llm = new MockLlmClient() + .enqueueToolCalls([{ name: 'create_thing', args: { title: 'nope' } }]) + .enqueueStop('fell back to a summary'); + + const dispatcherCalls: string[] = []; + const onToolCall = async (call: ToolCallRequest): Promise => { + dispatcherCalls.push(call.name); + return { success: true, message: 'should-not-be-called' }; + }; + + const res = await runSubAgent({ + ...baseInput('research'), + llm, + onToolCall, + }); + + // The caller's dispatcher was NEVER invoked — the wrapper rejected it. + expect(dispatcherCalls).toEqual([]); + + // The LLM received a failure tool-message so it can change course. + const secondCall = llm.calls[1].messages; + const toolMsg = secondCall[secondCall.length - 1]; + expect(toolMsg.role).toBe('tool'); + expect(toolMsg.content).toContain('nicht freigegeben'); + expect(res.summary).toBe('fell back to a summary'); + }); +}); + +// ─── Isolation (context-laundering) ──────────────────────────────── + +describe('runSubAgent — context isolation', () => { + it('starts with a fresh messages array — no parent context leaks in', async () => { + const llm = new MockLlmClient().enqueueStop('clean'); + await runSubAgent({ + ...baseInput('research'), + task: 'scan things', + llm, + onToolCall: async () => ({ success: true, message: '' }), + }); + + // What the LLM saw: [system, user] — no prior-messages leakage + const seen = llm.calls[0].messages; + expect(seen).toHaveLength(2); + expect(seen[0].role).toBe('system'); + expect(seen[0].content).toContain('Sub-Agent'); + expect(seen[1].role).toBe('user'); + expect(seen[1].content).toBe('scan things'); + }); + + it('exposes usage roll-up from the underlying loop', async () => { + const llm = new MockLlmClient(); + (llm as unknown as { queue: unknown[] }).queue.push({ + content: 'done', + toolCalls: [], + finishReason: 'stop', + usage: { promptTokens: 500, completionTokens: 120, totalTokens: 620 }, + }); + + const res = await runSubAgent({ + ...baseInput('research'), + llm, + onToolCall: async () => ({ success: true, message: '' }), + }); + expect(res.usage.promptTokens).toBe(500); + expect(res.usage.completionTokens).toBe(120); + expect(res.usage.totalTokens).toBe(620); + }); + + it('falls back to a default summary when the LLM hits maxRounds without stopping', async () => { + const llm = new MockLlmClient(); + for (let i = 0; i < 10; i++) { + llm.enqueueToolCalls([{ name: 'list_things', args: {} }]); + } + + const res = await runSubAgent({ + ...baseInput('research'), + maxRounds: 3, + llm, + onToolCall: async () => ({ success: true, message: 'ok' }), + }); + + expect(res.rawResult.stopReason).toBe('max-rounds'); + expect(res.summary).toContain('3 Runden ohne Summary'); + }); +}); + +// ─── System prompt customisation ────────────────────────────────── + +describe('runSubAgent — system prompt', () => { + it('uses a type-specific default prompt', async () => { + const llm = new MockLlmClient().enqueueStop('done'); + await runSubAgent({ + ...baseInput('research'), + llm, + onToolCall: async () => ({ success: true, message: '' }), + }); + const seen = llm.calls[0].messages; + expect(seen[0].content).toContain('research'); + }); + + it('honours an explicit systemPrompt override', async () => { + const llm = new MockLlmClient().enqueueStop('done'); + await runSubAgent({ + ...baseInput('general'), + systemPrompt: 'CUSTOM SYSTEM: do exactly X.', + llm, + onToolCall: async () => ({ success: true, message: '' }), + }); + const seen = llm.calls[0].messages; + expect(seen[0].content).toBe('CUSTOM SYSTEM: do exactly X.'); + }); +}); + +// ─── Model contract ──────────────────────────────────────────────── + +describe('runSubAgent — model routing', () => { + it('throws when no model is supplied', async () => { + const llm = new MockLlmClient(); + await expect( + runSubAgent({ + ...baseInput('research'), + model: undefined, + llm, + onToolCall: async () => ({ success: true, message: '' }), + }) + ).rejects.toThrow(/no model supplied/); + }); +}); + +// ─── End-to-end: tool executed + summary returned ────────────────── + +describe('runSubAgent — end-to-end', () => { + it('loops: tool call → result → summary', async () => { + const llm = new MockLlmClient() + .enqueueToolCalls([{ name: 'list_things', args: {} }]) + .enqueueStop('Found 3 things: a, b, c'); + + const onToolCall = vi.fn( + async (_call: ToolCallRequest): Promise => ({ + success: true, + data: ['a', 'b', 'c'], + message: '3 items', + }) + ); + + const res = await runSubAgent({ + ...baseInput('research'), + llm, + onToolCall, + }); + + expect(onToolCall).toHaveBeenCalledTimes(1); + expect(res.summary).toBe('Found 3 things: a, b, c'); + expect(res.rawResult.executedCalls).toHaveLength(1); + }); +}); diff --git a/packages/shared-ai/src/planner/sub-agent.ts b/packages/shared-ai/src/planner/sub-agent.ts new file mode 100644 index 000000000..4237ac059 --- /dev/null +++ b/packages/shared-ai/src/planner/sub-agent.ts @@ -0,0 +1,259 @@ +/** + * In-process sub-agent loop — the `I2A` pattern from Claude Code. + * + * A sub-agent is `runPlannerLoop` run with four invariants flipped: + * + * 1. FRESH `messages[]` — the parent's history never leaks into the + * sub-agent. The sub-agent only sees its own system prompt + task + * description. This is the "context-laundering" point: hundreds + * of scanned files, retry loops, or noisy tool results stay + * inside the sub-agent and never pollute the parent log. + * + * 2. RESTRICTED tool-whitelist — the parent's full tool-set is + * filtered down to a subset appropriate for the sub-agent's type + * (e.g. `research` gets read-only tools, `general` gets whatever + * the parent had). The whitelist is enforced at THIS layer, not + * left to the LLM to "please don't use write tools". + * + * 3. SINGLE RETURN VALUE — the sub-agent loop produces one string + * summary back to the parent (rendered as the parent's `task` + * tool-result). The parent NEVER sees the sub-agent's individual + * tool calls. This is the Claude-Code contract and matches the + * original paper's sub-episode recipe from RL. + * + * 4. ONE LEVEL DEEP, STRICT — a sub-agent cannot launch another + * sub-agent. `parentDepth` in the input enforces this; the + * consumer-level `task` tool handler is the other guard. + * + * Token usage from the sub-agent rolls up to the caller (returned as + * part of `SubAgentResult.usage`) so budget tracking in mana-ai's + * agent snapshots sees the full sub-tree cost, not just the parent loop. + */ + +import type { ToolSchema } from '../tools/schemas'; +import { runPlannerLoop } from './loop'; +import type { + LlmClient, + PlannerLoopResult, + ReminderChannel, + TokenUsage, + ToolCallRequest, + ToolResult, +} from './loop'; + +/** + * Named sub-agent archetypes. Each type declares a default tool-filter + * predicate that the launcher uses to carve the allowed tool-set out of + * the parent's full catalog. + * + * - `research`: read-only. LLM may list/get/search but not mutate. + * Default for "go scan these things and tell me what's + * there" tasks. Matches Claude Code's `Explore` agent. + * + * - `general`: anything the parent could do (minus recursion). For + * heterogeneous tasks where the sub-agent may need + * writes. Equivalent to Claude Code's `general-purpose`. + * + * - `plan`: read-only, small round budget. For "think through + * this before acting" where the summary IS the value. + * Matches Claude Code's `Plan` mode. + * + * Consumers can supply a custom `toolFilter` to override these defaults. + */ +export type SubAgentType = 'research' | 'general' | 'plan'; + +const DEFAULT_TOOL_FILTERS: Record boolean> = { + research: (t) => t.defaultPolicy === 'auto', + general: () => true, + plan: (t) => t.defaultPolicy === 'auto', +}; + +const DEFAULT_MAX_ROUNDS: Record = { + research: 5, + general: 5, + plan: 3, +}; + +/** + * Hard cap on recursion — one level deep, period. Matches Claude Code's + * `KN5` launcher behaviour. + */ +export const MAX_SUB_AGENT_DEPTH = 1; + +export interface RunSubAgentInput { + /** LLM transport. Typically the same client as the parent; can be + * swapped for a cheaper-tier model for research-type sub-agents. */ + readonly llm: LlmClient; + /** Model id — in `provider/model` form. If omitted, the sub-agent + * falls back to the parent-supplied `model`. */ + readonly model?: string; + /** Archetype — see `SubAgentType` docs. */ + readonly type: SubAgentType; + /** Free-text task description the parent wants the sub-agent to + * execute. Becomes the sub-agent's `userPrompt`. */ + readonly task: string; + /** Parent's full tool catalog. The launcher applies the type's + * filter (or the caller's override) to produce the sub-agent's + * restricted set. */ + readonly parentTools: readonly ToolSchema[]; + /** Optional tool-filter override. Takes precedence over the + * type's default predicate. */ + readonly toolFilter?: (tool: ToolSchema) => boolean; + /** Tool dispatcher. Receives the sub-agent's tool calls — NOT the + * parent's. The dispatcher MUST validate against the restricted + * whitelist too (belt-and-suspenders); otherwise a malformed + * LLM response could invoke a filtered-out tool. */ + readonly onToolCall: (call: ToolCallRequest) => Promise; + /** Current recursion depth. Parent callers pass 0. A sub-agent + * spawning ANOTHER sub-agent must pass 1, which this function + * then rejects. */ + readonly parentDepth: number; + /** Optional per-round reminder channel for the sub-agent. Typically + * different from the parent's — e.g. a "you are a research + * sub-agent, don't write" nudge instead of the parent's budget + * warnings. */ + readonly reminderChannel?: ReminderChannel; + /** Max LLM rounds inside this sub-agent. Defaults to the type's + * value (research: 5, general: 5, plan: 3). */ + readonly maxRounds?: number; + /** Explicit system prompt. Defaults to a short generic "you are a + * sub-agent, return a summary" prompt matching the type. */ + readonly systemPrompt?: string; +} + +export interface SubAgentResult { + readonly type: SubAgentType; + /** Single-string digest the parent sees as `ToolResult.message`. + * Falls back to a generic line when the LLM hit the round budget + * without producing assistant text. */ + readonly summary: string; + /** Raw planner result for debug capture. Consumers typically do NOT + * forward this to the parent — only the summary crosses the + * boundary. Kept here so a debug log can record the full + * sub-episode if the caller wants to. */ + readonly rawResult: PlannerLoopResult; + /** Rolled-up usage so the caller can attribute tokens to the + * parent's mission/agent budget. */ + readonly usage: TokenUsage; + /** How many restricted tools the sub-agent ultimately got. Useful + * for debug logs and dashboards; if it drops to 0 the filter was + * too aggressive and the sub-agent probably couldn't do anything. */ + readonly availableToolCount: number; +} + +/** + * Thrown when a sub-agent tries to spawn another sub-agent. Callers + * at the tool-registry `task` handler layer also check this, but the + * primitive throws as a defense-in-depth signal the consumer handler + * shouldn't swallow silently. + */ +export class SubAgentRecursionError extends Error { + constructor(depth: number) { + super( + `Sub-agents are one-level-deep only; caller passed parentDepth=${depth}. ` + + `MAX_SUB_AGENT_DEPTH=${MAX_SUB_AGENT_DEPTH}.` + ); + this.name = 'SubAgentRecursionError'; + } +} + +function defaultSystemPrompt(type: SubAgentType): string { + const base = + 'Du bist ein Sub-Agent. Fuehre genau die Aufgabe aus, die dir der Parent ' + + 'Agent gibt, und liefere eine knappe Summary am Ende. Keine Seitendiskussion.'; + if (type === 'research') { + return ( + base + + '\n\nArchetyp: research. Du darfst nur Lese-Tools verwenden. Schreibe ' + + 'nichts. Ergebnis = Summary deiner Funde in maximal 10 Zeilen.' + ); + } + if (type === 'plan') { + return ( + base + + '\n\nArchetyp: plan. Keine Tool-Calls wenn moeglich. Denke durch die ' + + 'Aufgabe und formuliere einen strukturierten Plan (3-5 Schritte) als ' + + 'Summary.' + ); + } + return ( + base + + '\n\nArchetyp: general. Nutze Tools wie ein Parent-Agent es tun wuerde, ' + + 'aber halte die Summary auf das Wesentliche beschraenkt.' + ); +} + +/** + * Launch an in-process sub-agent. See module docstring for the four + * invariants this enforces. + * + * The returned `summary` is the single artifact that should cross back + * to the parent (typically as a `task` tool-result message). Everything + * else (`rawResult`, individual tool calls) is kept for the caller's + * own debug log but NEVER rendered into the parent's messages array. + */ +export async function runSubAgent(input: RunSubAgentInput): Promise { + if (input.parentDepth >= MAX_SUB_AGENT_DEPTH) { + throw new SubAgentRecursionError(input.parentDepth); + } + + const filter = input.toolFilter ?? DEFAULT_TOOL_FILTERS[input.type]; + const restrictedTools = input.parentTools.filter(filter); + const maxRounds = input.maxRounds ?? DEFAULT_MAX_ROUNDS[input.type]; + const systemPrompt = input.systemPrompt ?? defaultSystemPrompt(input.type); + const model = input.model ?? ''; + + // The loop requires a model string; surface a clear error rather + // than letting the LLM client fail with a cryptic provider error. + if (!model) { + throw new Error( + `runSubAgent: no model supplied. Pass opts.model explicitly — sub-agents ` + + `default to nothing on purpose so routing to a cheaper tier (Haiku) is ` + + `an explicit decision by the caller.` + ); + } + + const rawResult = await runPlannerLoop({ + llm: input.llm, + input: { + systemPrompt, + userPrompt: input.task, + tools: restrictedTools, + model, + maxRounds, + reminderChannel: input.reminderChannel, + // No compactor for sub-agents: they are short-lived by + // construction (maxRounds ≤ 5). If the caller needs a + // deeper sub-agent, lift that decision up — don't double + // the LLM call count inside a disposable context. + }, + onToolCall: async (call: ToolCallRequest): Promise => { + // Belt-and-suspenders: even though `restrictedTools` was + // passed to the loop, a buggy LLM response could still + // name a tool outside the whitelist. Reject it here so the + // caller's dispatcher never runs an unauthorised tool. + const isWhitelisted = restrictedTools.some((t) => t.name === call.name); + if (!isWhitelisted) { + return { + success: false, + message: + `Tool ${call.name} ist fuer diesen Sub-Agent (${input.type}) ` + + `nicht freigegeben. Wechsel die Strategie oder brich ab.`, + }; + } + return input.onToolCall(call); + }, + }); + + const summary = + rawResult.summary ?? + `(Sub-Agent ${input.type} beendet nach ${rawResult.rounds} Runden ohne Summary.)`; + + return { + type: input.type, + summary, + rawResult, + usage: rawResult.usage, + availableToolCount: restrictedTools.length, + }; +}