From 3a68a63728316baa74d0a3fc38299269291cd550 Mon Sep 17 00:00:00 2001 From: Till JS Date: Thu, 23 Apr 2026 00:37:15 +0200 Subject: [PATCH] feat(picture,api): GPT-Image-2 image generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a third provider path to /api/v1/picture/generate that calls OpenAI gpt-image-2 when model starts with "openai/". Supports n=1..4 batch generation with character continuity, base64 response decoded server-side and uploaded to mana-media for dedup + thumbnails. Credit cost scales by quality (low=3, medium=10, high=25) × n. Env plumbing: - scripts/generate-env.mjs: new apps/api/.env stanza propagates OPENAI_API_KEY + REPLICATE_API_TOKEN from .env.secrets - .env.macmini.example: documents OPENAI_API_KEY for prod Frontend /picture/generate: model + quality + aspect-ratio + batch-count selectors, real fetch with auth, persists each image via imagesStore.insert (encrypted + synced). Wrapped in ModuleShell variant=fill with back-arrow to /picture and a live credit badge in the header actions slot. Co-Authored-By: Claude Opus 4.7 (1M context) --- .env.macmini.example | 7 + apps/api/src/modules/picture/routes.ts | 141 +++++- .../(app)/picture/generate/+page.svelte | 417 +++++++++++++----- scripts/generate-env.mjs | 25 ++ 4 files changed, 459 insertions(+), 131 deletions(-) diff --git a/.env.macmini.example b/.env.macmini.example index 6634c71a5..c35c8c9cc 100644 --- a/.env.macmini.example +++ b/.env.macmini.example @@ -76,6 +76,13 @@ SUPABASE_SERVICE_ROLE_KEY= AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ AZURE_OPENAI_API_KEY=your-api-key-here +# ============================================ +# OpenAI (direct, non-Azure) +# ============================================ +# Consumed by mana-research (deep research) and mana-api picture module +# for gpt-image-2 image generation. Distinct from AZURE_OPENAI_* above. +OPENAI_API_KEY= + # ============================================ # Monitoring (Grafana) # ============================================ diff --git a/apps/api/src/modules/picture/routes.ts b/apps/api/src/modules/picture/routes.ts index c7e8648e3..c24aa36bd 100644 --- a/apps/api/src/modules/picture/routes.ts +++ b/apps/api/src/modules/picture/routes.ts @@ -12,27 +12,90 @@ import type { AuthVariables } from '@mana/shared-hono'; const REPLICATE_TOKEN = process.env.REPLICATE_API_TOKEN || ''; const IMAGE_GEN_URL = process.env.MANA_IMAGE_GEN_URL || ''; +const OPENAI_API_KEY = process.env.OPENAI_API_KEY || ''; + +// Credit cost for OpenAI gpt-image-2 by quality. Reflects ~$0.006 / $0.053 / $0.211 +// per 1024² image so users bear roughly linear cost (1 credit ≈ $0.008). +// Flux/local stays at the flat 10-credit legacy rate. +function creditsFor(model: string | undefined, quality: string | undefined): number { + if (model?.startsWith('openai/')) { + if (quality === 'low') return 3; + if (quality === 'high') return 25; + return 10; // medium / auto + } + return 10; +} + +type OpenAiSize = '1024x1024' | '1536x1024' | '1024x1536' | 'auto'; +function resolveOpenAiSize(width?: number, height?: number): OpenAiSize { + if (!width || !height) return '1024x1024'; + const landscape = width > height; + const portrait = height > width; + if (landscape) return '1536x1024'; + if (portrait) return '1024x1536'; + return '1024x1024'; +} const routes = new Hono<{ Variables: AuthVariables }>(); -// ─── AI Image Generation (server-only: Replicate/local) ───── +// ─── AI Image Generation (server-only: Replicate/local/OpenAI) ───── routes.post('/generate', async (c) => { const userId = c.get('userId'); - const { prompt, model, width, height, negativePrompt, steps, guidanceScale } = await c.req.json(); + const { prompt, model, width, height, negativePrompt, steps, guidanceScale, quality, n } = + await c.req.json(); if (!prompt) return c.json({ error: 'prompt required' }, 400); - const cost = 10; + // Batch count. OpenAI gpt-image-2 supports up to 8; we clamp to 4 to stay + // well under Tier-1 IPM limits and cap credit exposure on accidental max-n. + // Non-OpenAI paths ignore this (Replicate/local produce a single image). + const batchCount = Math.max(1, Math.min(4, Number(n) || 1)); + const effectiveBatch = model?.startsWith('openai/') ? batchCount : 1; + const cost = creditsFor(model, quality) * effectiveBatch; const validation = await validateCredits(userId, 'AI_IMAGE_GENERATION', cost); if (!validation.hasCredits) { return c.json({ error: 'Insufficient credits', required: cost }, 402); } try { - let imageUrl: string; + const imageUrls: string[] = []; + const imageBuffers: ArrayBuffer[] = []; - if (model?.startsWith('local/') && IMAGE_GEN_URL) { + if (model?.startsWith('openai/') && OPENAI_API_KEY) { + // OpenAI gpt-image-2 — returns base64, not URL, supports n > 1 + const openaiModel = model.slice('openai/'.length) || 'gpt-image-2'; + const res = await fetch('https://api.openai.com/v1/images/generations', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${OPENAI_API_KEY}`, + }, + body: JSON.stringify({ + model: openaiModel, + prompt, + size: resolveOpenAiSize(width, height), + quality: quality || 'medium', + n: effectiveBatch, + }), + }); + if (!res.ok) { + const detail = await res.text().catch(() => ''); + return c.json({ error: 'OpenAI image API failed', detail: detail.slice(0, 500) }, 502); + } + const data = (await res.json()) as { data?: Array<{ b64_json?: string }> }; + const blobs = (data.data ?? []).map((d) => d.b64_json).filter((b): b is string => !!b); + if (blobs.length === 0) return c.json({ error: 'OpenAI returned no image data' }, 502); + for (const b64 of blobs) { + const binary = Buffer.from(b64, 'base64'); + imageBuffers.push( + binary.buffer.slice( + binary.byteOffset, + binary.byteOffset + binary.byteLength + ) as ArrayBuffer + ); + } + } else if (model?.startsWith('local/') && IMAGE_GEN_URL) { // Local generation via mana-image-gen const res = await fetch(`${IMAGE_GEN_URL}/generate`, { method: 'POST', @@ -48,7 +111,8 @@ routes.post('/generate', async (c) => { }); if (!res.ok) return c.json({ error: 'Local generation failed' }, 502); const data = await res.json(); - imageUrl = data.image_url || data.url; + const localUrl = data.image_url || data.url; + if (localUrl) imageUrls.push(localUrl); } else if (REPLICATE_TOKEN) { // Cloud generation via Replicate const res = await fetch('https://api.replicate.com/v1/predictions', { @@ -92,33 +156,70 @@ routes.post('/generate', async (c) => { } } - imageUrl = Array.isArray(output) ? output[0] : output; + const replicateUrl = Array.isArray(output) ? output[0] : output; + if (replicateUrl) imageUrls.push(replicateUrl); } else { return c.json({ error: 'No image generation service configured' }, 503); } + const producedCount = imageBuffers.length + imageUrls.length; + if (producedCount === 0) return c.json({ error: 'Generation produced no image' }, 502); + await consumeCredits(userId, 'AI_IMAGE_GENERATION', cost, `Image: ${prompt.slice(0, 50)}`); - // Store generated image in mana-media for dedup, thumbnails & Photos gallery + // Store each generated image in mana-media for dedup, thumbnails & Photos gallery. + // OpenAI contributed pre-decoded buffers; Replicate/local contributed URLs to fetch. try { const { uploadImageToMedia } = await import('../../lib/media'); - const imgRes = await fetch(imageUrl); - const imgBuffer = await imgRes.arrayBuffer(); - const media = await uploadImageToMedia(imgBuffer, `generated-${Date.now()}.png`, { - app: 'picture', - userId, - }); + const images: Array<{ imageUrl: string; mediaId: string; thumbnailUrl?: string }> = []; + const ts = Date.now(); + let idx = 0; + for (const buf of imageBuffers) { + const media = await uploadImageToMedia(buf, `generated-${ts}-${idx}.png`, { + app: 'picture', + userId, + }); + images.push({ + imageUrl: media.urls.original, + mediaId: media.id, + thumbnailUrl: media.urls.thumbnail, + }); + idx++; + } + for (const url of imageUrls) { + const imgRes = await fetch(url); + const imgBuffer = await imgRes.arrayBuffer(); + const media = await uploadImageToMedia(imgBuffer, `generated-${ts}-${idx}.png`, { + app: 'picture', + userId, + }); + images.push({ + imageUrl: media.urls.original, + mediaId: media.id, + thumbnailUrl: media.urls.thumbnail, + }); + idx++; + } return c.json({ - imageUrl: media.urls.original, - mediaId: media.id, - thumbnailUrl: media.urls.thumbnail, + images, + prompt, + model: model || 'flux-schnell', + // Back-compat: first image exposed at top level too. + imageUrl: images[0]?.imageUrl, + mediaId: images[0]?.mediaId, + thumbnailUrl: images[0]?.thumbnailUrl, + }); + } catch { + // Fallback: return raw imageUrls if mana-media is unavailable. OpenAI's + // base64-only path has no fallback URL — surface an error instead. + if (imageUrls.length === 0) return c.json({ error: 'Media upload failed' }, 502); + return c.json({ + images: imageUrls.map((u) => ({ imageUrl: u })), + imageUrl: imageUrls[0], prompt, model: model || 'flux-schnell', }); - } catch { - // Fallback: return raw imageUrl if mana-media is unavailable - return c.json({ imageUrl, prompt, model: model || 'flux-schnell' }); } } catch (_err) { return c.json({ error: 'Generation failed' }, 500); diff --git a/apps/mana/apps/web/src/routes/(app)/picture/generate/+page.svelte b/apps/mana/apps/web/src/routes/(app)/picture/generate/+page.svelte index 26aa62c5a..9af360f1a 100644 --- a/apps/mana/apps/web/src/routes/(app)/picture/generate/+page.svelte +++ b/apps/mana/apps/web/src/routes/(app)/picture/generate/+page.svelte @@ -1,22 +1,133 @@