From 38dc806549a3ecb42f94e1d1859d7ee82e0b252c Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 23 Apr 2026 14:16:36 +0200 Subject: [PATCH] =?UTF-8?q?feat(personas):=20M3.b-d=20=E2=80=94=20tick=20l?= =?UTF-8?q?oop=20+=20Claude=20Agent=20SDK=20+=20persistence?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the M3 loop from docs/plans/mana-mcp-and-personas.md. The runner now picks up due personas, drives them through Claude + MCP for one simulated turn, collects actions + ratings, and persists them through service-key internal endpoints in mana-auth. Internal endpoints (mana-auth, service-key-gated) - GET /api/v1/internal/personas/due Returns personas whose tickCadence + lastActiveAt say they're due. Rules: hourly > 1h, daily > 24h, weekdays > 24h mon-fri. NULLS FIRST so never-run personas go ahead of stale ones. - POST /api/v1/internal/personas/:id/actions Batch ≤ 500. Row ids are deterministic (`${tickId}-${i}-${toolName}`) + ON CONFLICT DO NOTHING so the runner can retry a tick without doubling audit rows. Also bumps personas.last_active_at so the next /due call sees it. - POST /api/v1/internal/personas/:id/feedback Batch ≤ 100. Row id is `${tickId}-${module}` — natural key is one rating per module per tick. Runner tick pipeline (services/mana-persona-runner/src/runner/) - claude-session.ts Two phases per tick. runMainTurn feeds the persona's system prompt + a German "simulate a day" user prompt to Claude Agent SDK's query(), with mana-mcp wired in as a streamable-HTTP MCP server. We iterate the returned AsyncGenerator and extract tool_use blocks into ActionRows; tool_result with is_error=true flips the most recent action. runRatingTurn is a fresh query() with tools:[] asking Claude in character to rate each used module 1-5 as strict JSON, which we parse with tolerance for surrounding whitespace / fences. Unparseable output becomes a synthetic '__parse' feedback row so operators see the failure. - tick.ts Orchestrator. Skips if config.paused. Fetches /due, processes in batches of config.concurrency (Promise.allSettled so one failure doesn't kill the batch), returns {due, ranSuccessfully, failed[], durationMs}. - types.ts ActionRow and FeedbackRow shapes shared between claude-session and the internal client; mirrors the mana-auth schema but in narrow plain TS for the wire. Runner bootstrap (src/index.ts) - setInterval(config.tickIntervalMs) starts the tick loop on boot. tickInFlight guards against overlap when Claude latency > interval. If MANA_SERVICE_KEY or ANTHROPIC_API_KEY is missing, loop is disabled with a warn line — /health still works, /diag/login still works. - New dev-only POST /diag/tick fires a single tick on demand and returns the result, so you can verify without waiting 60 s. - Graceful SIGTERM/SIGINT shutdown clears the interval. Client - clients/mana-auth-internal.ts X-Service-Key client for the three endpoints above. Constructor throws if serviceKey is empty — fail loud, not silent. Boot smoke: /health + /diag/tick both return descriptive 500s when keys are absent, 200/JSON when present. Warning lines show up on boot for missing keys. Type-check green across mana-auth, tool- registry, mcp, persona-runner. End-to-end smoke recipe (docker up → db:push → seed:personas → diag/tick → psql) documented in services/mana-persona-runner/CLAUDE.md. That's the M3 exit gate. M2.d (cross-space family/team memberships) still deferred. Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/api/src/lib/media.ts | 50 +++++++ apps/api/src/modules/picture/routes.ts | 190 +++++++++++++++++++++++++ 2 files changed, 240 insertions(+) diff --git a/apps/api/src/lib/media.ts b/apps/api/src/lib/media.ts index a0929d0b6..8cb509c8d 100644 --- a/apps/api/src/lib/media.ts +++ b/apps/api/src/lib/media.ts @@ -38,3 +38,53 @@ export function getMediaUrls(mediaId: string) { export function isImageMimeType(mimeType: string): boolean { return mimeType.startsWith('image/') && mimeType !== 'image/svg+xml'; } + +/** + * Download a media file by id. The mana-media `/file` route is CDN-style + * public — no auth on the URL itself — so this is a plain fetch. Callers + * that need to gate on ownership MUST call `verifyMediaOwnership` first. + */ +export async function getMediaBuffer( + mediaId: string +): Promise<{ buffer: ArrayBuffer; mimeType: string }> { + const url = getMediaClient().getOriginalUrl(mediaId); + const res = await fetch(url); + if (!res.ok) { + throw new Error(`mana-media fetch failed for ${mediaId}: HTTP ${res.status}`); + } + const mimeType = res.headers.get('content-type') ?? 'application/octet-stream'; + const buffer = await res.arrayBuffer(); + return { buffer, mimeType }; +} + +/** + * Verify that every id in `mediaIds` is owned by `userId` under the given + * `app` scope. Throws { status: 404 } when one or more ids are not in the + * user's reference set — the caller turns that into an HTTP response. + * + * One `list()` round-trip is all we need: the response is the full set of + * the user's uploads under that app tag, so set-membership check is O(N) + * in memory. The `limit: 500` cap is the sanity fence — a single user with + * more than 500 reference images under one app is already far beyond the + * product's intended shape; we'd catch that as a design regression long + * before it breaks this check. + */ +export async function verifyMediaOwnership( + userId: string, + mediaIds: readonly string[], + app: string +): Promise { + if (mediaIds.length === 0) return; + const owned = await getMediaClient().list({ userId, app, limit: 500 }); + const ownedSet = new Set(owned.map((m) => m.id)); + const missing = mediaIds.filter((id) => !ownedSet.has(id)); + if (missing.length > 0) { + const err = new Error(`Reference media not owned: ${missing.join(', ')}`) as Error & { + status?: number; + missing?: string[]; + }; + err.status = 404; + err.missing = missing; + throw err; + } +} diff --git a/apps/api/src/modules/picture/routes.ts b/apps/api/src/modules/picture/routes.ts index c24aa36bd..84161bc30 100644 --- a/apps/api/src/modules/picture/routes.ts +++ b/apps/api/src/modules/picture/routes.ts @@ -226,6 +226,196 @@ routes.post('/generate', async (c) => { } }); +// ─── Reference-based Image Edits (OpenAI /v1/images/edits) ───────── +// +// Takes 1..MAX_REFERENCE_IMAGES media ids from the caller (expected to +// come from meImages — plan M1, filtered by usage.aiReference=true on +// the client), verifies ownership under the `me` app-tag, downloads the +// raw bytes from mana-media, and forwards a multipart POST to OpenAI's +// `/v1/images/edits`. Generated outputs are pushed back into mana-media +// under app='picture' so the Dexie picture-store can pin them exactly +// like a text-to-image result. +// +// Only gpt-image-1 / gpt-image-2 are wired here — they accept multi- +// image input natively. Replicate/local fallback is a later milestone. + +// OpenAI gpt-image-1 / gpt-image-2 accept up to 16 reference images per +// edit call. We clamp at 4 to keep credit exposure + upload payload size +// predictable while still covering the common "face + fullbody + outfit" +// workflow the plan targets. +const MAX_REFERENCE_IMAGES = 4; + +routes.post('/generate-with-reference', async (c) => { + const userId = c.get('userId'); + const body = (await c.req.json()) as { + prompt?: string; + referenceMediaIds?: string[]; + model?: string; + quality?: string; + size?: OpenAiSize; + n?: number; + }; + + const prompt = (body.prompt ?? '').trim(); + if (!prompt) return c.json({ error: 'prompt required' }, 400); + + const refIds = Array.isArray(body.referenceMediaIds) + ? body.referenceMediaIds.filter((id): id is string => typeof id === 'string' && id.length > 0) + : []; + if (refIds.length === 0) return c.json({ error: 'referenceMediaIds required' }, 400); + if (refIds.length > MAX_REFERENCE_IMAGES) { + return c.json( + { error: `Too many references (max ${MAX_REFERENCE_IMAGES})`, limit: MAX_REFERENCE_IMAGES }, + 400 + ); + } + + if (!OPENAI_API_KEY) { + return c.json({ error: 'OpenAI image edits not configured' }, 503); + } + + const model = body.model ?? 'openai/gpt-image-2'; + if (!model.startsWith('openai/')) { + // Edits routing for non-OpenAI backends (local FLUX+PuLID, Replicate) + // is future work — plan M5. Fail loud rather than silently downgrading. + return c.json({ error: `Model ${model} not supported for edits`, model }, 400); + } + const openaiModel = model.slice('openai/'.length) || 'gpt-image-2'; + const quality = (body.quality as 'low' | 'medium' | 'high' | undefined) ?? 'medium'; + const size: OpenAiSize = body.size ?? '1024x1024'; + const effectiveBatch = Math.max(1, Math.min(4, Number(body.n) || 1)); + + // Credits: same per-output tarif as /generate. References don't add + // a surcharge — OpenAI doesn't bill extra for input images, so we + // don't either (plan decision #4). + const cost = creditsFor(model, quality) * effectiveBatch; + const validation = await validateCredits(userId, 'AI_IMAGE_GENERATION', cost); + if (!validation.hasCredits) { + return c.json({ error: 'Insufficient credits', required: cost }, 402); + } + + // Ownership check before we spend credits or burn OpenAI quota. + // meImages are tagged `app='me'` at upload time by the profile + // module; a mediaId that isn't in the caller's set is either stale + // or malicious, treat both as 404. + try { + const { verifyMediaOwnership } = await import('../../lib/media'); + await verifyMediaOwnership(userId, refIds, 'me'); + } catch (err) { + const e = err as Error & { status?: number; missing?: string[] }; + if (e.status === 404) { + return c.json({ error: 'Reference media not found', missing: e.missing }, 404); + } + return c.json({ error: 'Ownership check failed' }, 502); + } + + // Fetch reference buffers in parallel. The mana-media /file route is + // public, so no auth header needed — ownership was already verified. + let referenceBlobs: Array<{ blob: Blob; filename: string }>; + try { + const { getMediaBuffer } = await import('../../lib/media'); + const buffers = await Promise.all(refIds.map((id) => getMediaBuffer(id))); + referenceBlobs = buffers.map((b, i) => { + const ext = b.mimeType.split('/')[1]?.split(';')[0] ?? 'png'; + return { + blob: new Blob([b.buffer], { type: b.mimeType }), + filename: `ref-${i}.${ext === 'jpeg' ? 'jpg' : ext}`, + }; + }); + } catch (_err) { + return c.json({ error: 'Failed to fetch reference media' }, 502); + } + + // Multipart POST to OpenAI. FormData auto-sets Content-Type with a + // boundary; setting it manually would break parsing on OpenAI's end. + const formData = new FormData(); + formData.append('model', openaiModel); + formData.append('prompt', prompt); + formData.append('size', size); + formData.append('quality', quality); + formData.append('n', String(effectiveBatch)); + // gpt-image-* accepts a repeated `image` field for multi-reference. + for (const ref of referenceBlobs) { + formData.append('image', ref.blob, ref.filename); + } + + let generatedBuffers: ArrayBuffer[]; + try { + const res = await fetch('https://api.openai.com/v1/images/edits', { + method: 'POST', + headers: { Authorization: `Bearer ${OPENAI_API_KEY}` }, + body: formData, + }); + if (!res.ok) { + const detail = await res.text().catch(() => ''); + return c.json({ error: 'OpenAI image edit failed', detail: detail.slice(0, 500) }, 502); + } + const data = (await res.json()) as { data?: Array<{ b64_json?: string }> }; + const blobs = (data.data ?? []).map((d) => d.b64_json).filter((b): b is string => !!b); + if (blobs.length === 0) return c.json({ error: 'OpenAI returned no image data' }, 502); + generatedBuffers = blobs.map((b64) => { + const bin = Buffer.from(b64, 'base64'); + return bin.buffer.slice(bin.byteOffset, bin.byteOffset + bin.byteLength) as ArrayBuffer; + }); + } catch (_err) { + return c.json({ error: 'OpenAI image edit failed' }, 502); + } + + // Success path: consume credits, then upload the new images. + // Credits are consumed before the mana-media upload so a mana-media + // outage doesn't let the user retry free of charge after the model + // already ran (OpenAI already billed us). + await consumeCredits(userId, 'AI_IMAGE_GENERATION', cost, `Image edit: ${prompt.slice(0, 50)}`); + + try { + const { uploadImageToMedia } = await import('../../lib/media'); + const images: Array<{ imageUrl: string; mediaId: string; thumbnailUrl?: string }> = []; + const ts = Date.now(); + let idx = 0; + for (const buf of generatedBuffers) { + const media = await uploadImageToMedia(buf, `edit-${ts}-${idx}.png`, { + app: 'picture', + userId, + }); + images.push({ + imageUrl: media.urls.original, + mediaId: media.id, + thumbnailUrl: media.urls.thumbnail, + }); + idx++; + } + + return c.json({ + images, + prompt, + model, + referenceMediaIds: refIds, + mode: 'edit', + // Back-compat: first image exposed at top level too, matching /generate. + imageUrl: images[0]?.imageUrl, + mediaId: images[0]?.mediaId, + thumbnailUrl: images[0]?.thumbnailUrl, + }); + } catch (_err) { + // OpenAI already produced images and credits were consumed — degrade + // to returning the base64 inline so the client can still persist + // them locally rather than losing the generation entirely. + const inlineImages = generatedBuffers.map((buf, i) => ({ + mediaId: `inline-${Date.now()}-${i}`, + imageUrl: `data:image/png;base64,${Buffer.from(buf).toString('base64')}`, + })); + return c.json({ + images: inlineImages, + prompt, + model, + referenceMediaIds: refIds, + mode: 'edit', + warning: 'mana-media upload failed, images returned inline', + imageUrl: inlineImages[0]?.imageUrl, + }); + } +}); + // ─── Image Upload (server-only: S3) ───────────────────────── routes.post('/upload', async (c) => {