diff --git a/apps/api/src/lib/media.ts b/apps/api/src/lib/media.ts index 90ec41260..f1d99f112 100644 --- a/apps/api/src/lib/media.ts +++ b/apps/api/src/lib/media.ts @@ -57,6 +57,34 @@ export async function getMediaBuffer( return { buffer, mimeType }; } +/** + * Download a media file normalized to plain RGB PNG, max `longestSide` + * pixels on its longer edge (default 1024). Uses mana-media's `/transform` + * endpoint, which pipes the original through `sharp` server-side — that + * handles HEIC from iPhones, palette-mode PNGs, CMYK JPEGs, weird color + * profiles, and other formats OpenAI's gpt-image-1 rejects with + * `invalid_image_file` or `Invalid image file or mode`. + * + * `fit=inside` preserves aspect ratio (no distortion on portrait/landscape + * refs) and only caps the longer side, which keeps payloads comfortably + * under OpenAI's 4 MB/image limit without losing reference fidelity. + */ +export async function getMediaBufferAsPng( + mediaId: string, + longestSide = 1024 +): Promise<{ buffer: ArrayBuffer; mimeType: 'image/png' }> { + const base = getMediaClient() + .getOriginalUrl(mediaId) + .replace(/\/file$/, '/transform'); + const url = `${base}?format=png&w=${longestSide}&h=${longestSide}&fit=inside`; + const res = await fetch(url); + if (!res.ok) { + throw new Error(`mana-media transform failed for ${mediaId}: HTTP ${res.status}`); + } + const buffer = await res.arrayBuffer(); + return { buffer, mimeType: 'image/png' }; +} + /** * Verify that every id in `mediaIds` is owned by `userId` under one of * the given app scopes. Throws `{ status: 404, missing }` when any id diff --git a/apps/api/src/modules/picture/routes.ts b/apps/api/src/modules/picture/routes.ts index 156702963..e1757ac82 100644 --- a/apps/api/src/modules/picture/routes.ts +++ b/apps/api/src/modules/picture/routes.ts @@ -317,19 +317,21 @@ routes.post('/generate-with-reference', async (c) => { return c.json({ error: 'Ownership check failed', detail: e.message }, 502); } - // Fetch reference buffers in parallel. The mana-media /file route is - // public, so no auth header needed — ownership was already verified. + // Fetch reference buffers in parallel, normalized to clean RGB PNG via + // mana-media's transform endpoint. gpt-image-1 is picky about color + // modes and rejects HEIC / CMYK / palette-PNG / APNG with + // `Invalid image file or mode for image N` — routing through sharp + // server-side normalizes every upload before it hits OpenAI, and caps + // the longest side at 1024px to stay well under the 4 MB/image limit. + // No aspect-ratio distortion (fit=inside). let referenceBlobs: Array<{ blob: Blob; filename: string }>; try { - const { getMediaBuffer } = await import('../../lib/media'); - const buffers = await Promise.all(refIds.map((id) => getMediaBuffer(id))); - referenceBlobs = buffers.map((b, i) => { - const ext = b.mimeType.split('/')[1]?.split(';')[0] ?? 'png'; - return { - blob: new Blob([b.buffer], { type: b.mimeType }), - filename: `ref-${i}.${ext === 'jpeg' ? 'jpg' : ext}`, - }; - }); + const { getMediaBufferAsPng } = await import('../../lib/media'); + const buffers = await Promise.all(refIds.map((id) => getMediaBufferAsPng(id, 1024))); + referenceBlobs = buffers.map((b, i) => ({ + blob: new Blob([b.buffer], { type: b.mimeType }), + filename: `ref-${i}.png`, + })); } catch (err) { const message = err instanceof Error ? err.message : String(err); console.error('[picture/generate-with-reference] failed to fetch reference media', {