managarten/apps/api/src/modules/picture/routes.ts
Till JS 8a882a3760 feat(wardrobe,picture): Google Nano Banana as a Try-On option
Add Google's Gemini image edit family (Nano Banana) as a user-
selectable model for Wardrobe Try-On next to the existing OpenAI
path. Three concrete choices now expose themselves in the Solo and
Outfit Try-On buttons:

  - openai/gpt-image-2          (default, falls back to gpt-image-1
                                 server-side when the org isn't
                                 verified)
  - google/gemini-3-pro-image-preview   (Nano Banana Pro — premium
                                 identity / character consistency)
  - google/gemini-3.1-flash-image-preview (Nano Banana 2 — newest,
                                 fast, cheapest)

All three accept multi-image refs (face + body + garment) through
the same /api/v1/picture/generate-with-reference endpoint; the only
differences are the provider-specific request/response shape and
the model-id routing.

Server (apps/api/src/modules/picture/routes.ts):
- Guard now accepts `openai/*` and `google/*` prefixes and rejects
  everything else as "not supported for edits". Each provider's key
  is validated separately so missing GEMINI_API_KEY doesn't break
  OpenAI calls and vice versa.
- New `callGeminiEdits(modelName)` helper mirrors the shape of
  callOpenAiEdits: encodes the normalized PNG refs as base64
  inline_data parts, POSTs to
  generativelanguage.googleapis.com/v1beta/models/{model}:generateContent
  with responseModalities=["TEXT","IMAGE"] and imageConfig
  (aspectRatio + imageSize), pulls the generated image out of
  candidates[].content.parts[].inlineData.
- Our internal size strings map cleanly: 1024x1024 → 1:1 / 1K,
  1024x1536 → 2:3 / 1K, 1536x1024 → 3:2 / 1K. Gemini 1K is enough
  for the thumbnail sizes Wardrobe renders; going higher bloats
  payload without visible gain.
- creditsFor() gains a google/ branch proportional to upstream
  pricing (pro ≈ 18, 3.1-flash ≈ 6, 2.5-flash ≈ 5).
- Response `model` reports `${provider}/${modelUsed}` so the picture
  row's model metadata is accurate across providers.

Client (apps/mana/apps/web/src/lib/modules/wardrobe):
- api/try-on.ts: export `TryOnModel` union + `DEFAULT_TRY_ON_MODEL`.
  RunGarmentTryOnParams / RunOutfitTryOnParams gain an optional
  `model` field, threaded through `callGenerateWithReference`.
- components/TryOnModelPicker.svelte: new segmented control, three
  options with label + one-line hint. Grid-auto-fits so it reflows
  on the narrow workbench card.
- components/GarmentTryOnButton.svelte + TryOnButton.svelte: both
  mount the picker above the Sparkle CTA. `estimatedCredits` on the
  button label updates live when the user switches model so the
  cost signal matches what the server will actually charge.

Env (scripts/generate-env.mjs): GEMINI_API_KEY and GOOGLE_API_KEY
now propagate from the root `.env.development` into `apps/api/.env`
so mana-api can pick them up at boot. The route reads GEMINI_API_KEY
with GOOGLE_API_KEY as fallback, matching how mana-llm ships today.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 16:04:21 +02:00

645 lines
24 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Picture module — AI image generation + upload
* Ported from apps/picture/apps/server
*
* CRUD for images/boards/boardItems handled by mana-sync.
* This module handles Replicate API, S3 uploads, and explore.
*/
import { Hono } from 'hono';
import { consumeCredits, validateCredits } from '@mana/shared-hono/credits';
import type { AuthVariables } from '@mana/shared-hono';
const REPLICATE_TOKEN = process.env.REPLICATE_API_TOKEN || '';
const IMAGE_GEN_URL = process.env.MANA_IMAGE_GEN_URL || '';
const OPENAI_API_KEY = process.env.OPENAI_API_KEY || '';
// Gemini uses API-key auth against generativelanguage.googleapis.com; the
// same AIza... key works for the Nano Banana (gemini-*-image) family.
const GEMINI_API_KEY = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY || '';
// Credit cost by model × quality. Rough proportionality to upstream
// pricing at 1 credit ≈ $0.008. OpenAI gpt-image-* billed $0.006 / $0.053
// / $0.211 per 1024² image for low/medium/high. Google Nano Banana:
// 2.5-flash-image $0.039, 3.1-flash-image-preview $0.045, 3-pro-image-
// preview $0.134. Gemini doesn't expose quality tiers — quality input is
// ignored and the tier is chosen by model id. Flux/local legacy stays
// at a flat 10.
function creditsFor(model: string | undefined, quality: string | undefined): number {
if (model?.startsWith('openai/')) {
if (quality === 'low') return 3;
if (quality === 'high') return 25;
return 10; // medium / auto
}
if (model?.startsWith('google/')) {
const id = model.slice('google/'.length);
if (id === 'gemini-3-pro-image-preview') return 18;
if (id === 'gemini-3.1-flash-image-preview') return 6;
if (id === 'gemini-2.5-flash-image') return 5;
return 10;
}
return 10;
}
type OpenAiSize = '1024x1024' | '1536x1024' | '1024x1536' | 'auto';
function resolveOpenAiSize(width?: number, height?: number): OpenAiSize {
if (!width || !height) return '1024x1024';
const landscape = width > height;
const portrait = height > width;
if (landscape) return '1536x1024';
if (portrait) return '1024x1536';
return '1024x1024';
}
const routes = new Hono<{ Variables: AuthVariables }>();
// ─── AI Image Generation (server-only: Replicate/local/OpenAI) ─────
routes.post('/generate', async (c) => {
const userId = c.get('userId');
const { prompt, model, width, height, negativePrompt, steps, guidanceScale, quality, n } =
await c.req.json();
if (!prompt) return c.json({ error: 'prompt required' }, 400);
// Batch count. OpenAI gpt-image-2 supports up to 8; we clamp to 4 to stay
// well under Tier-1 IPM limits and cap credit exposure on accidental max-n.
// Non-OpenAI paths ignore this (Replicate/local produce a single image).
const batchCount = Math.max(1, Math.min(4, Number(n) || 1));
const effectiveBatch = model?.startsWith('openai/') ? batchCount : 1;
const cost = creditsFor(model, quality) * effectiveBatch;
const validation = await validateCredits(userId, 'AI_IMAGE_GENERATION', cost);
if (!validation.hasCredits) {
return c.json({ error: 'Insufficient credits', required: cost }, 402);
}
try {
const imageUrls: string[] = [];
const imageBuffers: ArrayBuffer[] = [];
if (model?.startsWith('openai/') && OPENAI_API_KEY) {
// OpenAI gpt-image-2 — returns base64, not URL, supports n > 1
const openaiModel = model.slice('openai/'.length) || 'gpt-image-2';
const res = await fetch('https://api.openai.com/v1/images/generations', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: openaiModel,
prompt,
size: resolveOpenAiSize(width, height),
quality: quality || 'medium',
n: effectiveBatch,
}),
});
if (!res.ok) {
const detail = await res.text().catch(() => '');
return c.json({ error: 'OpenAI image API failed', detail: detail.slice(0, 500) }, 502);
}
const data = (await res.json()) as { data?: Array<{ b64_json?: string }> };
const blobs = (data.data ?? []).map((d) => d.b64_json).filter((b): b is string => !!b);
if (blobs.length === 0) return c.json({ error: 'OpenAI returned no image data' }, 502);
for (const b64 of blobs) {
const binary = Buffer.from(b64, 'base64');
imageBuffers.push(
binary.buffer.slice(
binary.byteOffset,
binary.byteOffset + binary.byteLength
) as ArrayBuffer
);
}
} else if (model?.startsWith('local/') && IMAGE_GEN_URL) {
// Local generation via mana-image-gen
const res = await fetch(`${IMAGE_GEN_URL}/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
prompt,
negative_prompt: negativePrompt,
width: width || 1024,
height: height || 1024,
steps: steps || 20,
guidance_scale: guidanceScale || 7.5,
}),
});
if (!res.ok) return c.json({ error: 'Local generation failed' }, 502);
const data = await res.json();
const localUrl = data.image_url || data.url;
if (localUrl) imageUrls.push(localUrl);
} else if (REPLICATE_TOKEN) {
// Cloud generation via Replicate
const res = await fetch('https://api.replicate.com/v1/predictions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${REPLICATE_TOKEN}`,
},
body: JSON.stringify({
model: model || 'black-forest-labs/flux-schnell',
input: {
prompt,
negative_prompt: negativePrompt,
width: width || 1024,
height: height || 1024,
num_inference_steps: steps || 4,
guidance_scale: guidanceScale || 0,
},
}),
});
if (!res.ok) return c.json({ error: 'Replicate API failed' }, 502);
const prediction = await res.json();
// Poll for completion
let output = prediction.output;
if (!output && prediction.urls?.get) {
for (let i = 0; i < 60; i++) {
await new Promise((r) => setTimeout(r, 2000));
const pollRes = await fetch(prediction.urls.get, {
headers: { Authorization: `Bearer ${REPLICATE_TOKEN}` },
});
const pollData = await pollRes.json();
if (pollData.status === 'succeeded') {
output = pollData.output;
break;
}
if (pollData.status === 'failed') {
return c.json({ error: 'Generation failed' }, 500);
}
}
}
const replicateUrl = Array.isArray(output) ? output[0] : output;
if (replicateUrl) imageUrls.push(replicateUrl);
} else {
return c.json({ error: 'No image generation service configured' }, 503);
}
const producedCount = imageBuffers.length + imageUrls.length;
if (producedCount === 0) return c.json({ error: 'Generation produced no image' }, 502);
await consumeCredits(userId, 'AI_IMAGE_GENERATION', cost, `Image: ${prompt.slice(0, 50)}`);
// Store each generated image in mana-media for dedup, thumbnails & Photos gallery.
// OpenAI contributed pre-decoded buffers; Replicate/local contributed URLs to fetch.
try {
const { uploadImageToMedia } = await import('../../lib/media');
const images: Array<{ imageUrl: string; mediaId: string; thumbnailUrl?: string }> = [];
const ts = Date.now();
let idx = 0;
for (const buf of imageBuffers) {
const media = await uploadImageToMedia(buf, `generated-${ts}-${idx}.png`, {
app: 'picture',
userId,
});
images.push({
imageUrl: media.urls.original,
mediaId: media.id,
thumbnailUrl: media.urls.thumbnail,
});
idx++;
}
for (const url of imageUrls) {
const imgRes = await fetch(url);
const imgBuffer = await imgRes.arrayBuffer();
const media = await uploadImageToMedia(imgBuffer, `generated-${ts}-${idx}.png`, {
app: 'picture',
userId,
});
images.push({
imageUrl: media.urls.original,
mediaId: media.id,
thumbnailUrl: media.urls.thumbnail,
});
idx++;
}
return c.json({
images,
prompt,
model: model || 'flux-schnell',
// Back-compat: first image exposed at top level too.
imageUrl: images[0]?.imageUrl,
mediaId: images[0]?.mediaId,
thumbnailUrl: images[0]?.thumbnailUrl,
});
} catch {
// Fallback: return raw imageUrls if mana-media is unavailable. OpenAI's
// base64-only path has no fallback URL — surface an error instead.
if (imageUrls.length === 0) return c.json({ error: 'Media upload failed' }, 502);
return c.json({
images: imageUrls.map((u) => ({ imageUrl: u })),
imageUrl: imageUrls[0],
prompt,
model: model || 'flux-schnell',
});
}
} catch (_err) {
return c.json({ error: 'Generation failed' }, 500);
}
});
// ─── Reference-based Image Edits (OpenAI /v1/images/edits) ─────────
//
// Takes 1..MAX_REFERENCE_IMAGES media ids from the caller (expected to
// come from meImages — plan M1, filtered by usage.aiReference=true on
// the client), verifies ownership under the `me` app-tag, downloads the
// raw bytes from mana-media, and forwards a multipart POST to OpenAI's
// `/v1/images/edits`. Generated outputs are pushed back into mana-media
// under app='picture' so the Dexie picture-store can pin them exactly
// like a text-to-image result.
//
// Only gpt-image-1 / gpt-image-2 are wired here — they accept multi-
// image input natively. Replicate/local fallback is a later milestone.
// OpenAI gpt-image-1 / gpt-image-2 accept up to 16 reference images per
// edit call. We clamp at 8 to cover the Wardrobe try-on workflow — one
// face-ref + one body-ref + up to six garment photos (top/bottom/shoes/
// outerwear + two accessories) — while keeping credit exposure and
// upload payload size predictable. Pre-wardrobe the cap was 4; bumped
// in docs/plans/wardrobe-module.md M1.
const MAX_REFERENCE_IMAGES = 8;
routes.post('/generate-with-reference', async (c) => {
const userId = c.get('userId');
const body = (await c.req.json()) as {
prompt?: string;
referenceMediaIds?: string[];
model?: string;
quality?: string;
size?: OpenAiSize;
n?: number;
};
const prompt = (body.prompt ?? '').trim();
if (!prompt) return c.json({ error: 'prompt required' }, 400);
const refIds = Array.isArray(body.referenceMediaIds)
? body.referenceMediaIds.filter((id): id is string => typeof id === 'string' && id.length > 0)
: [];
if (refIds.length === 0) return c.json({ error: 'referenceMediaIds required' }, 400);
if (refIds.length > MAX_REFERENCE_IMAGES) {
return c.json(
{ error: `Too many references (max ${MAX_REFERENCE_IMAGES})`, limit: MAX_REFERENCE_IMAGES },
400
);
}
const model = body.model ?? 'openai/gpt-image-2';
// Two edit providers wired today: OpenAI's gpt-image-1/2 (openai/)
// and Google's Nano Banana family (google/) — Gemini 2.5 Flash Image,
// 3.1 Flash Image Preview, 3 Pro Image Preview. Everything else
// (Replicate, local FLUX+PuLID) is not supported for multi-ref edits.
const isOpenAi = model.startsWith('openai/');
const isGoogle = model.startsWith('google/');
if (!isOpenAi && !isGoogle) {
return c.json({ error: `Model ${model} not supported for edits`, model }, 400);
}
if (isOpenAi && !OPENAI_API_KEY) {
return c.json({ error: 'OpenAI image edits not configured' }, 503);
}
if (isGoogle && !GEMINI_API_KEY) {
return c.json({ error: 'Google Gemini image edits not configured' }, 503);
}
const openaiModel = isOpenAi ? model.slice('openai/'.length) || 'gpt-image-2' : '';
const googleModel = isGoogle ? model.slice('google/'.length) || 'gemini-3-pro-image-preview' : '';
const quality = (body.quality as 'low' | 'medium' | 'high' | undefined) ?? 'medium';
const size: OpenAiSize = body.size ?? '1024x1024';
const effectiveBatch = Math.max(1, Math.min(4, Number(body.n) || 1));
// Credits: same per-output tarif as /generate. References don't add
// a surcharge — OpenAI doesn't bill extra for input images, so we
// don't either (plan decision #4).
const cost = creditsFor(model, quality) * effectiveBatch;
const validation = await validateCredits(userId, 'AI_IMAGE_GENERATION', cost);
if (!validation.hasCredits) {
return c.json({ error: 'Insufficient credits', required: cost }, 402);
}
// Ownership check before we spend credits or burn OpenAI quota.
// References span three upload tags today:
// - `me` — face/body portraits from the profile module
// - `wardrobe` — garment photos (M4 try-on flow)
// - `comic` — comic-specific anchor / backdrop uploads
// (slot reserved for M6+; no writer lands in
// this app today, M1 character refs come from
// me + wardrobe only).
// Anything outside these apps is treated as not-owned regardless of
// mana-media's own view.
try {
const { verifyMediaOwnership } = await import('../../lib/media');
await verifyMediaOwnership(userId, refIds, ['me', 'wardrobe', 'comic']);
} catch (err) {
const e = err as Error & { status?: number; missing?: string[] };
if (e.status === 404) {
return c.json({ error: 'Reference media not found', missing: e.missing }, 404);
}
console.error('[picture/generate-with-reference] ownership check failed', {
userId,
refIds,
error: e.message,
});
return c.json({ error: 'Ownership check failed', detail: e.message }, 502);
}
// Fetch reference buffers in parallel, normalized to clean RGB PNG via
// mana-media's transform endpoint. gpt-image-1 is picky about color
// modes and rejects HEIC / CMYK / palette-PNG / APNG with
// `Invalid image file or mode for image N` — routing through sharp
// server-side normalizes every upload before it hits OpenAI, and caps
// the longest side at 1024px to stay well under the 4 MB/image limit.
// No aspect-ratio distortion (fit=inside).
let referenceBlobs: Array<{ blob: Blob; filename: string }>;
try {
const { getMediaBufferAsPng } = await import('../../lib/media');
const buffers = await Promise.all(refIds.map((id) => getMediaBufferAsPng(id, 1024)));
referenceBlobs = buffers.map((b, i) => ({
blob: new Blob([b.buffer], { type: b.mimeType }),
filename: `ref-${i}.png`,
}));
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.error('[picture/generate-with-reference] failed to fetch reference media', {
refIds,
error: message,
});
return c.json({ error: 'Failed to fetch reference media', detail: message }, 502);
}
// Multipart POST to OpenAI. FormData auto-sets Content-Type with a
// boundary; setting it manually would break parsing on OpenAI's end.
// gpt-image-* requires the array-syntax `image[]` for multi-reference
// calls — a repeated plain `image` field triggers OpenAI's
// `duplicate_parameter` error even though the old DALL·E edits
// endpoint tolerated it. Keep `image[]` for the single-ref case too:
// OpenAI accepts the array form with any cardinality ≥ 1, so there's
// no need to branch here.
function buildFormData(modelName: string): FormData {
const fd = new FormData();
fd.append('model', modelName);
fd.append('prompt', prompt);
fd.append('size', size);
fd.append('quality', quality);
fd.append('n', String(effectiveBatch));
for (const ref of referenceBlobs) {
fd.append('image[]', ref.blob, ref.filename);
}
return fd;
}
async function callOpenAiEdits(
modelName: string
): Promise<
| { ok: true; data: { data?: Array<{ b64_json?: string }> } }
| { ok: false; status: number; body: string }
> {
const res = await fetch('https://api.openai.com/v1/images/edits', {
method: 'POST',
headers: { Authorization: `Bearer ${OPENAI_API_KEY}` },
body: buildFormData(modelName),
});
if (!res.ok) {
const body = await res.text().catch(() => '');
return { ok: false, status: res.status, body };
}
return { ok: true, data: (await res.json()) as { data?: Array<{ b64_json?: string }> } };
}
// "Verify your organization to use gpt-image-2" is a known OpenAI
// rejection that stays blocked until the user completes their org
// verification (a manual step on platform.openai.com, sometimes with
// a 15-min propagation delay). Falling back to gpt-image-1 keeps the
// Try-On flow usable in the meantime — same edits endpoint, same
// `image[]` multi-reference semantics, same quality/size values.
// Only kicks in when the client requested gpt-image-2 (or left the
// default): an explicit `openai/gpt-image-1` request stays on 1.
function needsGptImage1Fallback(body: string, attemptedModel: string): boolean {
if (attemptedModel !== 'gpt-image-2') return false;
return /verified to use the model/i.test(body);
}
// Map our internal size ("1024x1024" | "1024x1536" | "1536x1024")
// onto Gemini's separate `aspectRatio` + `imageSize`. 1K covers every
// Try-On output — going higher bloats payload without identifiable
// quality gain at the thumbnail sizes Wardrobe actually renders.
function sizeToGemini(s: OpenAiSize): { aspectRatio: string; imageSize: string } {
if (s === '1024x1536') return { aspectRatio: '2:3', imageSize: '1K' };
if (s === '1536x1024') return { aspectRatio: '3:2', imageSize: '1K' };
return { aspectRatio: '1:1', imageSize: '1K' };
}
/** Call the Gemini API's generateContent endpoint with multi-image
* inline_data refs + a text prompt, asking for IMAGE back. Returns
* the raw base64 PNG(s) Gemini produced, or a structured failure. */
async function callGeminiEdits(
modelName: string
): Promise<{ ok: true; images: ArrayBuffer[] } | { ok: false; status: number; body: string }> {
const geminiSize = sizeToGemini(size);
const parts: Array<{ text: string } | { inline_data: { mime_type: string; data: string } }> = [
{ text: prompt },
];
for (const ref of referenceBlobs) {
const ab = await ref.blob.arrayBuffer();
const b64 = Buffer.from(new Uint8Array(ab)).toString('base64');
parts.push({ inline_data: { mime_type: 'image/png', data: b64 } });
}
const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(modelName)}:generateContent?key=${encodeURIComponent(GEMINI_API_KEY)}`;
const res = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
contents: [{ parts }],
generationConfig: {
// IMAGE alone is rejected; TEXT is required alongside.
responseModalities: ['TEXT', 'IMAGE'],
imageConfig: {
aspectRatio: geminiSize.aspectRatio,
imageSize: geminiSize.imageSize,
},
},
}),
});
if (!res.ok) {
const body = await res.text().catch(() => '');
return { ok: false, status: res.status, body };
}
const data = (await res.json()) as {
candidates?: Array<{
content?: {
parts?: Array<{ inlineData?: { data?: string; mimeType?: string } }>;
};
}>;
};
const out: ArrayBuffer[] = [];
for (const cand of data.candidates ?? []) {
for (const p of cand.content?.parts ?? []) {
const b64 = p.inlineData?.data;
if (!b64) continue;
const bin = Buffer.from(b64, 'base64');
out.push(bin.buffer.slice(bin.byteOffset, bin.byteOffset + bin.byteLength) as ArrayBuffer);
}
}
if (out.length === 0) {
return { ok: false, status: 502, body: 'Gemini returned no image parts' };
}
return { ok: true, images: out };
}
let generatedBuffers: ArrayBuffer[];
let modelUsed = isOpenAi ? openaiModel : googleModel;
try {
if (isOpenAi) {
let result = await callOpenAiEdits(openaiModel);
if (!result.ok && needsGptImage1Fallback(result.body, openaiModel)) {
console.warn(
'[picture/generate-with-reference] gpt-image-2 unavailable (org not verified), falling back to gpt-image-1'
);
modelUsed = 'gpt-image-1';
result = await callOpenAiEdits('gpt-image-1');
}
if (!result.ok) {
console.error('[picture/generate-with-reference] OpenAI returned non-ok', {
status: result.status,
body: result.body.slice(0, 1000),
refCount: referenceBlobs.length,
prompt: prompt.slice(0, 120),
model: modelUsed,
size,
quality,
});
return c.json(
{ error: 'OpenAI image edit failed', detail: result.body.slice(0, 500) },
502
);
}
const blobs = (result.data.data ?? []).map((d) => d.b64_json).filter((b): b is string => !!b);
if (blobs.length === 0) return c.json({ error: 'OpenAI returned no image data' }, 502);
generatedBuffers = blobs.map((b64) => {
const bin = Buffer.from(b64, 'base64');
return bin.buffer.slice(bin.byteOffset, bin.byteOffset + bin.byteLength) as ArrayBuffer;
});
} else {
// Google / Gemini Nano Banana family.
const result = await callGeminiEdits(googleModel);
if (!result.ok) {
console.error('[picture/generate-with-reference] Gemini returned non-ok', {
status: result.status,
body: result.body.slice(0, 1000),
refCount: referenceBlobs.length,
prompt: prompt.slice(0, 120),
model: modelUsed,
size,
});
return c.json(
{ error: 'Gemini image edit failed', detail: result.body.slice(0, 500) },
502
);
}
generatedBuffers = result.images;
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.error('[picture/generate-with-reference] provider fetch threw', {
provider: isOpenAi ? 'openai' : 'google',
error: message,
});
return c.json(
{ error: `${isOpenAi ? 'OpenAI' : 'Gemini'} image edit failed`, detail: message },
502
);
}
// Success path: consume credits, then upload the new images.
// Credits are consumed before the mana-media upload so a mana-media
// outage doesn't let the user retry free of charge after the model
// already ran (OpenAI already billed us).
await consumeCredits(userId, 'AI_IMAGE_GENERATION', cost, `Image edit: ${prompt.slice(0, 50)}`);
try {
const { uploadImageToMedia } = await import('../../lib/media');
const images: Array<{ imageUrl: string; mediaId: string; thumbnailUrl?: string }> = [];
const ts = Date.now();
let idx = 0;
for (const buf of generatedBuffers) {
const media = await uploadImageToMedia(buf, `edit-${ts}-${idx}.png`, {
app: 'picture',
userId,
});
images.push({
imageUrl: media.urls.original,
mediaId: media.id,
thumbnailUrl: media.urls.thumbnail,
});
idx++;
}
// Report the model that actually produced the image, not the one
// the client asked for — matters when the gpt-image-2 fallback
// kicked in (we want the picture row's `model` metadata to match
// the real source for future re-generation / audit).
const providerPrefix = isOpenAi ? 'openai' : 'google';
return c.json({
images,
prompt,
model: `${providerPrefix}/${modelUsed}`,
referenceMediaIds: refIds,
mode: 'edit',
// Back-compat: first image exposed at top level too, matching /generate.
imageUrl: images[0]?.imageUrl,
mediaId: images[0]?.mediaId,
thumbnailUrl: images[0]?.thumbnailUrl,
});
} catch (_err) {
// OpenAI already produced images and credits were consumed — degrade
// to returning the base64 inline so the client can still persist
// them locally rather than losing the generation entirely.
const inlineImages = generatedBuffers.map((buf, i) => ({
mediaId: `inline-${Date.now()}-${i}`,
imageUrl: `data:image/png;base64,${Buffer.from(buf).toString('base64')}`,
}));
return c.json({
images: inlineImages,
prompt,
model,
referenceMediaIds: refIds,
mode: 'edit',
warning: 'mana-media upload failed, images returned inline',
imageUrl: inlineImages[0]?.imageUrl,
});
}
});
// ─── Image Upload (server-only: S3) ─────────────────────────
routes.post('/upload', async (c) => {
const userId = c.get('userId');
const formData = await c.req.formData();
const file = formData.get('file') as File | null;
if (!file) return c.json({ error: 'No file' }, 400);
if (file.size > 10 * 1024 * 1024) return c.json({ error: 'Max 10MB' }, 400);
try {
const { uploadImageToMedia } = await import('../../lib/media');
const buffer = await file.arrayBuffer();
const result = await uploadImageToMedia(buffer, file.name, { app: 'picture', userId });
return c.json(
{
storagePath: result.id,
publicUrl: result.urls.original,
mediaId: result.id,
thumbnailUrl: result.urls.thumbnail,
},
201
);
} catch (_err) {
return c.json({ error: 'Upload failed' }, 500);
}
});
export { routes as pictureRoutes };