From fc635f9830e2cf3a90656c94814eadd091742b96 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 23 Apr 2026 14:43:56 +0200 Subject: [PATCH] feat(tool-registry): me.listReferenceImages + me.generateWithReference (M5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes M5 of docs/plans/me-images-and-reference-generation.md — exposes the meImages feature through the shared tool-registry so MCP clients (Claude Desktop) and the mana-ai mission runner can drive it alongside the built-in webapp UI. Two tools in packages/mana-tool-registry/src/modules/me.ts: - me.listReferenceImages(kind?) — scope: user-space, read. Pulls the user's meImages rows from mana-sync (app='profile'), filters to usage.aiReference=true and soft-live records, decrypts the `label` and `tags` fields with the caller's master key (same pattern as notes.search). Returns mediaIds + kind + primary-slot info so a persona can pick references intelligently. ZK users will see this fail at getMasterKey() — correct, because the label is truly unrecoverable server-side for them. - me.generateWithReference({prompt, referenceMediaIds, quality, size, n}) — scope: user-space, write. Thin proxy over the M3 endpoint POST /api/v1/picture/generate-with-reference in apps/api: forwards the JWT, lets apps/api re-verify ownership, and returns the generated images' mediaIds + URLs. Credits are consumed at the same 3/10/25 tarif as text-to-image, so a persona plan pass should gate this behind explicit budget rather than leaving it on auto-policy. Registered in modules/index.ts + adds 'me' to the ModuleId union in types.ts. No other wiring needed — mana-mcp's createMcpServerForUser iterates the registry and exposes any user-space tool, so both tools become available to Claude Desktop immediately on next deploy. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../mana-tool-registry/src/modules/index.ts | 3 + packages/mana-tool-registry/src/modules/me.ts | 251 ++++++++++++++++++ packages/mana-tool-registry/src/types.ts | 4 +- 3 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 packages/mana-tool-registry/src/modules/me.ts diff --git a/packages/mana-tool-registry/src/modules/index.ts b/packages/mana-tool-registry/src/modules/index.ts index c0def542b..f5cc8a598 100644 --- a/packages/mana-tool-registry/src/modules/index.ts +++ b/packages/mana-tool-registry/src/modules/index.ts @@ -12,6 +12,7 @@ import { registerHabitsTools } from './habits.ts'; import { registerJournalTools } from './journal.ts'; +import { registerMeTools } from './me.ts'; import { registerNotesTools } from './notes.ts'; import { registerSpacesTools } from './spaces.ts'; import { registerTodoTools } from './todo.ts'; @@ -19,6 +20,7 @@ import { registerTodoTools } from './todo.ts'; export function registerAllModules(): void { registerHabitsTools(); registerJournalTools(); + registerMeTools(); registerNotesTools(); registerSpacesTools(); registerTodoTools(); @@ -27,6 +29,7 @@ export function registerAllModules(): void { export { registerHabitsTools, registerJournalTools, + registerMeTools, registerNotesTools, registerSpacesTools, registerTodoTools, diff --git a/packages/mana-tool-registry/src/modules/me.ts b/packages/mana-tool-registry/src/modules/me.ts new file mode 100644 index 000000000..3d2f1d640 --- /dev/null +++ b/packages/mana-tool-registry/src/modules/me.ts @@ -0,0 +1,251 @@ +/** + * Me — tools that let Agents and MCP clients act on the user's own + * reference pool (meImages, plan docs/plans/me-images-and-reference- + * generation.md). Two tools only: + * + * - me.listReferenceImages — which of the user's face/body shots + * are currently opted-in for AI use. Read-only, pulled through + * mana-sync so RLS applies and the Persona Runner cannot see + * another tenant's pool. + * + * - me.generateWithReference — proxy over the M3 edit endpoint. + * The actual OpenAI /v1/images/edits multipart is handled by + * apps/api; this tool exists so a Persona ("Stil-Coach", + * "Outfit-Buddy") can trigger generations without re-hosting the + * multipart logic or hardcoding OpenAI knowledge in the runner. + * + * meImages are stored under the `profile` sync appId (matches the + * web app's module.config.ts). Only `label` and `tags` are encrypted + * there — kind, primaryFor, usage, mediaId, urls, dimensions stay + * plaintext because the generator UI filters by them. + */ + +import { z } from 'zod'; +import { decryptRecordFields } from '@mana/shared-crypto'; +import { pullAll } from '../sync-client.ts'; +import { registerTool } from '../registry.ts'; +import type { ToolContext, ToolSpec } from '../types.ts'; + +const APP_ID = 'profile'; +const TABLE = 'meImages'; +const ENCRYPTED_FIELDS = ['label', 'tags'] as const; + +const SYNC_URL = () => process.env.MANA_SYNC_URL ?? 'http://localhost:3050'; +const PICTURE_API_URL = () => process.env.MANA_API_URL ?? 'http://localhost:3060'; +const CLIENT_ID = () => process.env.MANA_MCP_CLIENT_ID ?? 'mana-mcp'; + +function syncCfg(ctx: ToolContext) { + return { baseUrl: SYNC_URL(), jwt: ctx.jwt, clientId: CLIENT_ID() }; +} + +// ─── Domain shapes ──────────────────────────────────────────────── + +const meImageKind = z.enum(['face', 'fullbody', 'halfbody', 'hands', 'reference']); +const meImagePrimarySlot = z.enum(['avatar', 'face-ref', 'body-ref']); + +const meImageSchema = z.object({ + id: z.string(), + mediaId: z.string(), + kind: meImageKind, + label: z.string().nullable(), + primaryFor: meImagePrimarySlot.nullable(), + publicUrl: z.string().nullable(), + thumbnailUrl: z.string().nullable(), + width: z.number().nullable(), + height: z.number().nullable(), +}); + +// Raw row shape we expect from mana-sync for meImages. Fields beyond +// what we care about are ignored — see the web-app types for the +// full shape (LocalMeImage). +interface RawMeImageRow { + id?: string; + mediaId?: string; + kind?: string; + label?: string | null; + primaryFor?: string | null; + publicUrl?: string | null; + thumbnailUrl?: string | null; + width?: number | null; + height?: number | null; + usage?: { aiReference?: boolean } | null; + deletedAt?: string | null; +} + +// ─── me.listReferenceImages ─────────────────────────────────────── + +const listInput = z.object({ + /** Optional kind filter. Omit to get every opted-in reference. */ + kind: meImageKind.optional(), +}); + +const listOutput = z.object({ + images: z.array(meImageSchema), +}); + +export const meListReferenceImages: ToolSpec = { + name: 'me.listReferenceImages', + module: 'me', + scope: 'user-space', + policyHint: 'read', + description: + "List the user's meImages that are explicitly opted in for AI reference use (usage.aiReference=true). The returned mediaIds are exactly what `me.generateWithReference` will accept. Excludes soft-deleted rows. Optional `kind` filter narrows to 'face', 'fullbody', 'halfbody', 'hands', or 'reference'.", + input: listInput, + output: listOutput, + // label + tags are encrypted in the web-app's crypto registry; this + // tool declaration keeps the audit (pnpm run check:crypto in future + // iterations) aware that we decrypt label on read. + encryptedFields: { table: TABLE, fields: [...ENCRYPTED_FIELDS] }, + async handler(input, ctx) { + // ZK users will hit the error at ctx.getMasterKey() below; that's + // fine — for ZK users the server genuinely cannot decrypt labels + // and the right behaviour is to surface the error to the caller. + const key = await ctx.getMasterKey(); + + const res = await pullAll(syncCfg(ctx), APP_ID, TABLE); + const alive = res.changes + .filter((c) => c.op !== 'delete' && c.data) + .map((c) => c.data as RawMeImageRow) + .filter((row) => !row.deletedAt); + + const optedIn = alive.filter((row) => row.usage?.aiReference === true); + const kindFiltered = input.kind ? optedIn.filter((row) => row.kind === input.kind) : optedIn; + + // Decrypt label + tags on the server side (same pattern as notes.search). + const decrypted = (await Promise.all( + kindFiltered.map((row) => + decryptRecordFields(row as unknown as Record, ENCRYPTED_FIELDS, key) + ) + )) as unknown as RawMeImageRow[]; + + const images = decrypted + .filter((row): row is RawMeImageRow & { id: string; mediaId: string } => + Boolean(row.id && row.mediaId) + ) + .map((row) => ({ + id: row.id, + mediaId: row.mediaId, + kind: (row.kind ?? 'reference') as z.infer, + label: row.label ?? null, + primaryFor: (row.primaryFor ?? null) as z.infer | null, + publicUrl: row.publicUrl ?? null, + thumbnailUrl: row.thumbnailUrl ?? null, + width: row.width ?? null, + height: row.height ?? null, + })); + + ctx.logger.info('me.listReferenceImages', { + count: images.length, + kindFilter: input.kind ?? 'all', + }); + + return { images }; + }, +}; + +// ─── me.generateWithReference ───────────────────────────────────── + +const generateInput = z.object({ + prompt: z.string().min(1).max(4000), + /** + * mana-media ids from `me.listReferenceImages`. apps/api will verify + * ownership again server-side, so mistakes here are caught with 404. + * Capped at 4 to match the M3 endpoint's own limit. + */ + referenceMediaIds: z.array(z.string()).min(1).max(4), + quality: z.enum(['low', 'medium', 'high']).default('medium'), + size: z.enum(['1024x1024', '1536x1024', '1024x1536', 'auto']).default('1024x1024'), + n: z.number().int().min(1).max(4).default(1), +}); + +const generatedImageSchema = z.object({ + mediaId: z.string(), + imageUrl: z.string(), + thumbnailUrl: z.string().optional(), +}); + +const generateOutput = z.object({ + images: z.array(generatedImageSchema), + prompt: z.string(), + model: z.string(), + referenceMediaIds: z.array(z.string()), + mode: z.literal('edit'), +}); + +export const meGenerateWithReference: ToolSpec = { + name: 'me.generateWithReference', + module: 'me', + scope: 'user-space', + // `write` rather than `destructive`: credits are consumed, but the + // result is purely additive (new image rows, no overwrites). A + // future policy pass may still want to require explicit user consent + // since credits have real cost; that lives in the consumer's policy + // config, not in this hint. + policyHint: 'write', + description: + "Run an OpenAI gpt-image-2 edit using the user's opted-in meImages as references. Pass mediaIds obtained from `me.listReferenceImages`. Consumes credits at the same rate as text-to-image generation (3/10/25 per quality, times n). Returns the generated images' mana-media ids + URLs; they are also persisted in the Picture module's gallery.", + input: generateInput, + output: generateOutput, + async handler(input, ctx) { + const url = `${PICTURE_API_URL()}/api/v1/picture/generate-with-reference`; + const res = await fetch(url, { + method: 'POST', + headers: { + 'content-type': 'application/json', + authorization: `Bearer ${ctx.jwt}`, + }, + body: JSON.stringify({ + prompt: input.prompt, + referenceMediaIds: input.referenceMediaIds, + model: 'openai/gpt-image-2', + quality: input.quality, + size: input.size, + n: input.n, + }), + }); + + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error( + `mana-api /picture/generate-with-reference failed: ${res.status} ${res.statusText} — ${text.slice(0, 500)}` + ); + } + + const data = (await res.json()) as { + images?: Array<{ mediaId?: string; imageUrl?: string; thumbnailUrl?: string }>; + prompt?: string; + model?: string; + referenceMediaIds?: string[]; + }; + + const images = (data.images ?? []) + .filter((img): img is { mediaId: string; imageUrl: string; thumbnailUrl?: string } => + Boolean(img.mediaId && img.imageUrl) + ) + .map((img) => ({ + mediaId: img.mediaId, + imageUrl: img.imageUrl, + thumbnailUrl: img.thumbnailUrl, + })); + + ctx.logger.info('me.generateWithReference', { + count: images.length, + references: input.referenceMediaIds.length, + }); + + return { + images, + prompt: data.prompt ?? input.prompt, + model: data.model ?? 'openai/gpt-image-2', + referenceMediaIds: data.referenceMediaIds ?? input.referenceMediaIds, + mode: 'edit' as const, + }; + }, +}; + +// ─── Registration barrel ────────────────────────────────────────── + +export function registerMeTools(): void { + registerTool(meListReferenceImages); + registerTool(meGenerateWithReference); +} diff --git a/packages/mana-tool-registry/src/types.ts b/packages/mana-tool-registry/src/types.ts index ba7dd319c..13cda09ec 100644 --- a/packages/mana-tool-registry/src/types.ts +++ b/packages/mana-tool-registry/src/types.ts @@ -26,7 +26,9 @@ export type ModuleId = | 'contacts' | 'articles' | 'missions' - | 'tags'; + | 'tags' + // — M5 (me-images + reference-based image generation) — + | 'me'; /** * `user-space` — operates on the caller's data within a specific Space.