mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-18 01:49:40 +02:00
M4 of docs/plans/wardrobe-module.md — the loop closes. A user with at
least a face-ref in the active space can click "Anprobieren" on an
outfit detail page; the client composes a reference call against the
existing M3 `/generate-with-reference` endpoint, persists the result
into the Picture gallery with a `wardrobeOutfitId` back-reference,
and pins a `lastTryOn` snapshot on the outfit so its card instantly
shows the AI preview next time.
Server side — picture/routes.ts:
- verifyMediaOwnership now accepts `apps: string | readonly string[]`.
Under the hood it runs one list() per app-tag and unions the owned
set before the missing-id check. Preserves the 500-row per-app
sanity cap. Single-tag callers unchanged — it's an additive widen.
- Picture /generate-with-reference passes `['me', 'wardrobe']` so
face/body portraits (me-images) and garment photos (wardrobe) can
ride in the same referenceMediaIds array. Anything outside those
two tags still 404s — no expansion of the trust surface.
Client side — wardrobe/api/try-on.ts:
- `runOutfitTryOn({ outfit, garments, faceRefMediaId, bodyRefMediaId?, ... })`
composes the ref list (face → body → up to 6 garments, respecting
the 8-slot server cap), picks portrait 1024x1536 by default (or
1024x1024 in accessory-only mode), and POSTs with
`model='openai/gpt-image-2'`, `quality='medium'`, `n=1`. One render
per click; multi-variant is a future Generator-style extension.
- Default prompts are composed in DE from the outfit meta (name +
occasion); callers can override via `prompt`. Accessory-only mode
uses a tighter studio-portrait phrasing since the fullbody ref is
dropped there.
- `isAccessoryOnlyOutfit()` helper — iff every garment is in
FACE_ONLY_CATEGORIES, skip body-ref and render square. Covers the
Brille-Try-On headline use case.
- On success: inserts a `picture.images` row with generationMode=
'reference', referenceImageIds, and wardrobeOutfitId set; then
calls wardrobeOutfitsStore.setLastTryOn() with imageId + imageUrl
so OutfitCard + DetailOutfitView immediately flip to the AI cover.
TryOnButton — wardrobe/components/TryOnButton.svelte:
- Three states: ready (click to render), missing-references (shows
UserCircle + link to /profile/me-images, with the right hint for
accessory-only vs. fullbody), loading (spinner).
- Credit estimate on the button (10c medium quality).
- Hints: accessory-only, too-many-garments (>6, over server cap),
and non-personal-space disclosure — the family-space case gets its
own sentence since "Try-On rendert dich, nicht dein Kind" is
non-obvious.
- Reads face-ref/body-ref via useImageByPrimary (space-scoped after
the v40 meImages migration — brand/club/family spaces need their
own references uploaded).
UI wiring:
- DetailOutfitView replaces the M3 stub button with <TryOnButton/>.
The existing "Try-On Verlauf"-Strip already reads
`useOutfitTryOns(outfit.id)` which filters `picture.images` by
wardrobeOutfitId — it lights up automatically on first render.
Not in M4 (punted to follow-ups):
- Solo-garment try-on on DetailGarmentView ("nur diese Brille auf
mein Gesicht"). Plan called it out as optional; the outfit flow
already covers it when the outfit contains only that one garment.
- Multi-variant rendering (n=2/4). Usable "show me 3 looks" needs a
picker UI on top, not just a param bump.
- Quality + prompt override in the button. A power-user panel can
come later; default medium + auto-prompt keeps M4's click-to-try-on
one-tap.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
451 lines
16 KiB
TypeScript
451 lines
16 KiB
TypeScript
/**
|
|
* Picture module — AI image generation + upload
|
|
* Ported from apps/picture/apps/server
|
|
*
|
|
* CRUD for images/boards/boardItems handled by mana-sync.
|
|
* This module handles Replicate API, S3 uploads, and explore.
|
|
*/
|
|
|
|
import { Hono } from 'hono';
|
|
import { consumeCredits, validateCredits } from '@mana/shared-hono/credits';
|
|
import type { AuthVariables } from '@mana/shared-hono';
|
|
|
|
const REPLICATE_TOKEN = process.env.REPLICATE_API_TOKEN || '';
|
|
const IMAGE_GEN_URL = process.env.MANA_IMAGE_GEN_URL || '';
|
|
const OPENAI_API_KEY = process.env.OPENAI_API_KEY || '';
|
|
|
|
// Credit cost for OpenAI gpt-image-2 by quality. Reflects ~$0.006 / $0.053 / $0.211
|
|
// per 1024² image so users bear roughly linear cost (1 credit ≈ $0.008).
|
|
// Flux/local stays at the flat 10-credit legacy rate.
|
|
function creditsFor(model: string | undefined, quality: string | undefined): number {
|
|
if (model?.startsWith('openai/')) {
|
|
if (quality === 'low') return 3;
|
|
if (quality === 'high') return 25;
|
|
return 10; // medium / auto
|
|
}
|
|
return 10;
|
|
}
|
|
|
|
type OpenAiSize = '1024x1024' | '1536x1024' | '1024x1536' | 'auto';
|
|
function resolveOpenAiSize(width?: number, height?: number): OpenAiSize {
|
|
if (!width || !height) return '1024x1024';
|
|
const landscape = width > height;
|
|
const portrait = height > width;
|
|
if (landscape) return '1536x1024';
|
|
if (portrait) return '1024x1536';
|
|
return '1024x1024';
|
|
}
|
|
|
|
const routes = new Hono<{ Variables: AuthVariables }>();
|
|
|
|
// ─── AI Image Generation (server-only: Replicate/local/OpenAI) ─────
|
|
|
|
routes.post('/generate', async (c) => {
|
|
const userId = c.get('userId');
|
|
const { prompt, model, width, height, negativePrompt, steps, guidanceScale, quality, n } =
|
|
await c.req.json();
|
|
|
|
if (!prompt) return c.json({ error: 'prompt required' }, 400);
|
|
|
|
// Batch count. OpenAI gpt-image-2 supports up to 8; we clamp to 4 to stay
|
|
// well under Tier-1 IPM limits and cap credit exposure on accidental max-n.
|
|
// Non-OpenAI paths ignore this (Replicate/local produce a single image).
|
|
const batchCount = Math.max(1, Math.min(4, Number(n) || 1));
|
|
const effectiveBatch = model?.startsWith('openai/') ? batchCount : 1;
|
|
const cost = creditsFor(model, quality) * effectiveBatch;
|
|
const validation = await validateCredits(userId, 'AI_IMAGE_GENERATION', cost);
|
|
if (!validation.hasCredits) {
|
|
return c.json({ error: 'Insufficient credits', required: cost }, 402);
|
|
}
|
|
|
|
try {
|
|
const imageUrls: string[] = [];
|
|
const imageBuffers: ArrayBuffer[] = [];
|
|
|
|
if (model?.startsWith('openai/') && OPENAI_API_KEY) {
|
|
// OpenAI gpt-image-2 — returns base64, not URL, supports n > 1
|
|
const openaiModel = model.slice('openai/'.length) || 'gpt-image-2';
|
|
const res = await fetch('https://api.openai.com/v1/images/generations', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
Authorization: `Bearer ${OPENAI_API_KEY}`,
|
|
},
|
|
body: JSON.stringify({
|
|
model: openaiModel,
|
|
prompt,
|
|
size: resolveOpenAiSize(width, height),
|
|
quality: quality || 'medium',
|
|
n: effectiveBatch,
|
|
}),
|
|
});
|
|
if (!res.ok) {
|
|
const detail = await res.text().catch(() => '');
|
|
return c.json({ error: 'OpenAI image API failed', detail: detail.slice(0, 500) }, 502);
|
|
}
|
|
const data = (await res.json()) as { data?: Array<{ b64_json?: string }> };
|
|
const blobs = (data.data ?? []).map((d) => d.b64_json).filter((b): b is string => !!b);
|
|
if (blobs.length === 0) return c.json({ error: 'OpenAI returned no image data' }, 502);
|
|
for (const b64 of blobs) {
|
|
const binary = Buffer.from(b64, 'base64');
|
|
imageBuffers.push(
|
|
binary.buffer.slice(
|
|
binary.byteOffset,
|
|
binary.byteOffset + binary.byteLength
|
|
) as ArrayBuffer
|
|
);
|
|
}
|
|
} else if (model?.startsWith('local/') && IMAGE_GEN_URL) {
|
|
// Local generation via mana-image-gen
|
|
const res = await fetch(`${IMAGE_GEN_URL}/generate`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
prompt,
|
|
negative_prompt: negativePrompt,
|
|
width: width || 1024,
|
|
height: height || 1024,
|
|
steps: steps || 20,
|
|
guidance_scale: guidanceScale || 7.5,
|
|
}),
|
|
});
|
|
if (!res.ok) return c.json({ error: 'Local generation failed' }, 502);
|
|
const data = await res.json();
|
|
const localUrl = data.image_url || data.url;
|
|
if (localUrl) imageUrls.push(localUrl);
|
|
} else if (REPLICATE_TOKEN) {
|
|
// Cloud generation via Replicate
|
|
const res = await fetch('https://api.replicate.com/v1/predictions', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
Authorization: `Bearer ${REPLICATE_TOKEN}`,
|
|
},
|
|
body: JSON.stringify({
|
|
model: model || 'black-forest-labs/flux-schnell',
|
|
input: {
|
|
prompt,
|
|
negative_prompt: negativePrompt,
|
|
width: width || 1024,
|
|
height: height || 1024,
|
|
num_inference_steps: steps || 4,
|
|
guidance_scale: guidanceScale || 0,
|
|
},
|
|
}),
|
|
});
|
|
if (!res.ok) return c.json({ error: 'Replicate API failed' }, 502);
|
|
|
|
const prediction = await res.json();
|
|
|
|
// Poll for completion
|
|
let output = prediction.output;
|
|
if (!output && prediction.urls?.get) {
|
|
for (let i = 0; i < 60; i++) {
|
|
await new Promise((r) => setTimeout(r, 2000));
|
|
const pollRes = await fetch(prediction.urls.get, {
|
|
headers: { Authorization: `Bearer ${REPLICATE_TOKEN}` },
|
|
});
|
|
const pollData = await pollRes.json();
|
|
if (pollData.status === 'succeeded') {
|
|
output = pollData.output;
|
|
break;
|
|
}
|
|
if (pollData.status === 'failed') {
|
|
return c.json({ error: 'Generation failed' }, 500);
|
|
}
|
|
}
|
|
}
|
|
|
|
const replicateUrl = Array.isArray(output) ? output[0] : output;
|
|
if (replicateUrl) imageUrls.push(replicateUrl);
|
|
} else {
|
|
return c.json({ error: 'No image generation service configured' }, 503);
|
|
}
|
|
|
|
const producedCount = imageBuffers.length + imageUrls.length;
|
|
if (producedCount === 0) return c.json({ error: 'Generation produced no image' }, 502);
|
|
|
|
await consumeCredits(userId, 'AI_IMAGE_GENERATION', cost, `Image: ${prompt.slice(0, 50)}`);
|
|
|
|
// Store each generated image in mana-media for dedup, thumbnails & Photos gallery.
|
|
// OpenAI contributed pre-decoded buffers; Replicate/local contributed URLs to fetch.
|
|
try {
|
|
const { uploadImageToMedia } = await import('../../lib/media');
|
|
const images: Array<{ imageUrl: string; mediaId: string; thumbnailUrl?: string }> = [];
|
|
const ts = Date.now();
|
|
let idx = 0;
|
|
for (const buf of imageBuffers) {
|
|
const media = await uploadImageToMedia(buf, `generated-${ts}-${idx}.png`, {
|
|
app: 'picture',
|
|
userId,
|
|
});
|
|
images.push({
|
|
imageUrl: media.urls.original,
|
|
mediaId: media.id,
|
|
thumbnailUrl: media.urls.thumbnail,
|
|
});
|
|
idx++;
|
|
}
|
|
for (const url of imageUrls) {
|
|
const imgRes = await fetch(url);
|
|
const imgBuffer = await imgRes.arrayBuffer();
|
|
const media = await uploadImageToMedia(imgBuffer, `generated-${ts}-${idx}.png`, {
|
|
app: 'picture',
|
|
userId,
|
|
});
|
|
images.push({
|
|
imageUrl: media.urls.original,
|
|
mediaId: media.id,
|
|
thumbnailUrl: media.urls.thumbnail,
|
|
});
|
|
idx++;
|
|
}
|
|
|
|
return c.json({
|
|
images,
|
|
prompt,
|
|
model: model || 'flux-schnell',
|
|
// Back-compat: first image exposed at top level too.
|
|
imageUrl: images[0]?.imageUrl,
|
|
mediaId: images[0]?.mediaId,
|
|
thumbnailUrl: images[0]?.thumbnailUrl,
|
|
});
|
|
} catch {
|
|
// Fallback: return raw imageUrls if mana-media is unavailable. OpenAI's
|
|
// base64-only path has no fallback URL — surface an error instead.
|
|
if (imageUrls.length === 0) return c.json({ error: 'Media upload failed' }, 502);
|
|
return c.json({
|
|
images: imageUrls.map((u) => ({ imageUrl: u })),
|
|
imageUrl: imageUrls[0],
|
|
prompt,
|
|
model: model || 'flux-schnell',
|
|
});
|
|
}
|
|
} catch (_err) {
|
|
return c.json({ error: 'Generation failed' }, 500);
|
|
}
|
|
});
|
|
|
|
// ─── Reference-based Image Edits (OpenAI /v1/images/edits) ─────────
|
|
//
|
|
// Takes 1..MAX_REFERENCE_IMAGES media ids from the caller (expected to
|
|
// come from meImages — plan M1, filtered by usage.aiReference=true on
|
|
// the client), verifies ownership under the `me` app-tag, downloads the
|
|
// raw bytes from mana-media, and forwards a multipart POST to OpenAI's
|
|
// `/v1/images/edits`. Generated outputs are pushed back into mana-media
|
|
// under app='picture' so the Dexie picture-store can pin them exactly
|
|
// like a text-to-image result.
|
|
//
|
|
// Only gpt-image-1 / gpt-image-2 are wired here — they accept multi-
|
|
// image input natively. Replicate/local fallback is a later milestone.
|
|
|
|
// OpenAI gpt-image-1 / gpt-image-2 accept up to 16 reference images per
|
|
// edit call. We clamp at 8 to cover the Wardrobe try-on workflow — one
|
|
// face-ref + one body-ref + up to six garment photos (top/bottom/shoes/
|
|
// outerwear + two accessories) — while keeping credit exposure and
|
|
// upload payload size predictable. Pre-wardrobe the cap was 4; bumped
|
|
// in docs/plans/wardrobe-module.md M1.
|
|
const MAX_REFERENCE_IMAGES = 8;
|
|
|
|
routes.post('/generate-with-reference', async (c) => {
|
|
const userId = c.get('userId');
|
|
const body = (await c.req.json()) as {
|
|
prompt?: string;
|
|
referenceMediaIds?: string[];
|
|
model?: string;
|
|
quality?: string;
|
|
size?: OpenAiSize;
|
|
n?: number;
|
|
};
|
|
|
|
const prompt = (body.prompt ?? '').trim();
|
|
if (!prompt) return c.json({ error: 'prompt required' }, 400);
|
|
|
|
const refIds = Array.isArray(body.referenceMediaIds)
|
|
? body.referenceMediaIds.filter((id): id is string => typeof id === 'string' && id.length > 0)
|
|
: [];
|
|
if (refIds.length === 0) return c.json({ error: 'referenceMediaIds required' }, 400);
|
|
if (refIds.length > MAX_REFERENCE_IMAGES) {
|
|
return c.json(
|
|
{ error: `Too many references (max ${MAX_REFERENCE_IMAGES})`, limit: MAX_REFERENCE_IMAGES },
|
|
400
|
|
);
|
|
}
|
|
|
|
if (!OPENAI_API_KEY) {
|
|
return c.json({ error: 'OpenAI image edits not configured' }, 503);
|
|
}
|
|
|
|
const model = body.model ?? 'openai/gpt-image-2';
|
|
if (!model.startsWith('openai/')) {
|
|
// Edits routing for non-OpenAI backends (local FLUX+PuLID, Replicate)
|
|
// is future work — plan M5. Fail loud rather than silently downgrading.
|
|
return c.json({ error: `Model ${model} not supported for edits`, model }, 400);
|
|
}
|
|
const openaiModel = model.slice('openai/'.length) || 'gpt-image-2';
|
|
const quality = (body.quality as 'low' | 'medium' | 'high' | undefined) ?? 'medium';
|
|
const size: OpenAiSize = body.size ?? '1024x1024';
|
|
const effectiveBatch = Math.max(1, Math.min(4, Number(body.n) || 1));
|
|
|
|
// Credits: same per-output tarif as /generate. References don't add
|
|
// a surcharge — OpenAI doesn't bill extra for input images, so we
|
|
// don't either (plan decision #4).
|
|
const cost = creditsFor(model, quality) * effectiveBatch;
|
|
const validation = await validateCredits(userId, 'AI_IMAGE_GENERATION', cost);
|
|
if (!validation.hasCredits) {
|
|
return c.json({ error: 'Insufficient credits', required: cost }, 402);
|
|
}
|
|
|
|
// Ownership check before we spend credits or burn OpenAI quota.
|
|
// References span two upload tags: `me` for face/body portraits
|
|
// (profile module) and `wardrobe` for garment photos (wardrobe
|
|
// module, M4 try-on flow). Anything outside those two apps is
|
|
// treated as not-owned regardless of mana-media's own view.
|
|
try {
|
|
const { verifyMediaOwnership } = await import('../../lib/media');
|
|
await verifyMediaOwnership(userId, refIds, ['me', 'wardrobe']);
|
|
} catch (err) {
|
|
const e = err as Error & { status?: number; missing?: string[] };
|
|
if (e.status === 404) {
|
|
return c.json({ error: 'Reference media not found', missing: e.missing }, 404);
|
|
}
|
|
return c.json({ error: 'Ownership check failed' }, 502);
|
|
}
|
|
|
|
// Fetch reference buffers in parallel. The mana-media /file route is
|
|
// public, so no auth header needed — ownership was already verified.
|
|
let referenceBlobs: Array<{ blob: Blob; filename: string }>;
|
|
try {
|
|
const { getMediaBuffer } = await import('../../lib/media');
|
|
const buffers = await Promise.all(refIds.map((id) => getMediaBuffer(id)));
|
|
referenceBlobs = buffers.map((b, i) => {
|
|
const ext = b.mimeType.split('/')[1]?.split(';')[0] ?? 'png';
|
|
return {
|
|
blob: new Blob([b.buffer], { type: b.mimeType }),
|
|
filename: `ref-${i}.${ext === 'jpeg' ? 'jpg' : ext}`,
|
|
};
|
|
});
|
|
} catch (_err) {
|
|
return c.json({ error: 'Failed to fetch reference media' }, 502);
|
|
}
|
|
|
|
// Multipart POST to OpenAI. FormData auto-sets Content-Type with a
|
|
// boundary; setting it manually would break parsing on OpenAI's end.
|
|
const formData = new FormData();
|
|
formData.append('model', openaiModel);
|
|
formData.append('prompt', prompt);
|
|
formData.append('size', size);
|
|
formData.append('quality', quality);
|
|
formData.append('n', String(effectiveBatch));
|
|
// gpt-image-* accepts a repeated `image` field for multi-reference.
|
|
for (const ref of referenceBlobs) {
|
|
formData.append('image', ref.blob, ref.filename);
|
|
}
|
|
|
|
let generatedBuffers: ArrayBuffer[];
|
|
try {
|
|
const res = await fetch('https://api.openai.com/v1/images/edits', {
|
|
method: 'POST',
|
|
headers: { Authorization: `Bearer ${OPENAI_API_KEY}` },
|
|
body: formData,
|
|
});
|
|
if (!res.ok) {
|
|
const detail = await res.text().catch(() => '');
|
|
return c.json({ error: 'OpenAI image edit failed', detail: detail.slice(0, 500) }, 502);
|
|
}
|
|
const data = (await res.json()) as { data?: Array<{ b64_json?: string }> };
|
|
const blobs = (data.data ?? []).map((d) => d.b64_json).filter((b): b is string => !!b);
|
|
if (blobs.length === 0) return c.json({ error: 'OpenAI returned no image data' }, 502);
|
|
generatedBuffers = blobs.map((b64) => {
|
|
const bin = Buffer.from(b64, 'base64');
|
|
return bin.buffer.slice(bin.byteOffset, bin.byteOffset + bin.byteLength) as ArrayBuffer;
|
|
});
|
|
} catch (_err) {
|
|
return c.json({ error: 'OpenAI image edit failed' }, 502);
|
|
}
|
|
|
|
// Success path: consume credits, then upload the new images.
|
|
// Credits are consumed before the mana-media upload so a mana-media
|
|
// outage doesn't let the user retry free of charge after the model
|
|
// already ran (OpenAI already billed us).
|
|
await consumeCredits(userId, 'AI_IMAGE_GENERATION', cost, `Image edit: ${prompt.slice(0, 50)}`);
|
|
|
|
try {
|
|
const { uploadImageToMedia } = await import('../../lib/media');
|
|
const images: Array<{ imageUrl: string; mediaId: string; thumbnailUrl?: string }> = [];
|
|
const ts = Date.now();
|
|
let idx = 0;
|
|
for (const buf of generatedBuffers) {
|
|
const media = await uploadImageToMedia(buf, `edit-${ts}-${idx}.png`, {
|
|
app: 'picture',
|
|
userId,
|
|
});
|
|
images.push({
|
|
imageUrl: media.urls.original,
|
|
mediaId: media.id,
|
|
thumbnailUrl: media.urls.thumbnail,
|
|
});
|
|
idx++;
|
|
}
|
|
|
|
return c.json({
|
|
images,
|
|
prompt,
|
|
model,
|
|
referenceMediaIds: refIds,
|
|
mode: 'edit',
|
|
// Back-compat: first image exposed at top level too, matching /generate.
|
|
imageUrl: images[0]?.imageUrl,
|
|
mediaId: images[0]?.mediaId,
|
|
thumbnailUrl: images[0]?.thumbnailUrl,
|
|
});
|
|
} catch (_err) {
|
|
// OpenAI already produced images and credits were consumed — degrade
|
|
// to returning the base64 inline so the client can still persist
|
|
// them locally rather than losing the generation entirely.
|
|
const inlineImages = generatedBuffers.map((buf, i) => ({
|
|
mediaId: `inline-${Date.now()}-${i}`,
|
|
imageUrl: `data:image/png;base64,${Buffer.from(buf).toString('base64')}`,
|
|
}));
|
|
return c.json({
|
|
images: inlineImages,
|
|
prompt,
|
|
model,
|
|
referenceMediaIds: refIds,
|
|
mode: 'edit',
|
|
warning: 'mana-media upload failed, images returned inline',
|
|
imageUrl: inlineImages[0]?.imageUrl,
|
|
});
|
|
}
|
|
});
|
|
|
|
// ─── Image Upload (server-only: S3) ─────────────────────────
|
|
|
|
routes.post('/upload', async (c) => {
|
|
const userId = c.get('userId');
|
|
const formData = await c.req.formData();
|
|
const file = formData.get('file') as File | null;
|
|
|
|
if (!file) return c.json({ error: 'No file' }, 400);
|
|
if (file.size > 10 * 1024 * 1024) return c.json({ error: 'Max 10MB' }, 400);
|
|
|
|
try {
|
|
const { uploadImageToMedia } = await import('../../lib/media');
|
|
const buffer = await file.arrayBuffer();
|
|
const result = await uploadImageToMedia(buffer, file.name, { app: 'picture', userId });
|
|
|
|
return c.json(
|
|
{
|
|
storagePath: result.id,
|
|
publicUrl: result.urls.original,
|
|
mediaId: result.id,
|
|
thumbnailUrl: result.urls.thumbnail,
|
|
},
|
|
201
|
|
);
|
|
} catch (_err) {
|
|
return c.json({ error: 'Upload failed' }, 500);
|
|
}
|
|
});
|
|
|
|
export { routes as pictureRoutes };
|