From 6432ef7e6b50063cec9416d0153f76c7904cd958 Mon Sep 17 00:00:00 2001 From: Till JS Date: Fri, 24 Apr 2026 16:06:03 +0200 Subject: [PATCH] =?UTF-8?q?feat(comic):=20M4=20=E2=80=94=20AI-Storyboard?= =?UTF-8?q?=20aus=20Cross-Modul-Text?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User wählt einen bestehenden Text (Tagebuch-Eintrag, Notiz oder Bibliotheks-Review), das Modell schlägt eine geordnete Panel-Sequenz vor (prompt + optional caption + dialogue pro Panel), der User prüft/editiert und feuert Batch-Gen mit sourceInput- Tagging — damit wird `useStoriesByInput` später cross-referenzieren können ("Welche Comics sind aus diesem Journal-Eintrag entstanden?"). Backend: - POST /api/v1/comic/storyboard (Hono route) nimmt style + sourceText + panelCount (+ optional storyContext / sourceModule) und ruft llmJson() mit einem response_format=json_object-Prompt an mana-llm. System-Prompt instruiert das Modell auf eine exakte {panels: [{prompt, caption?, dialogue?}]}-Shape, Rules wie "keine Style-Instruktionen" (kommen aus dem Story-Prefix downstream) und "kein Panel-Nummerieren". - Defense-in-depth Coerce auf der Response: Panel ohne prompt wird gefiltert, Strings werden gecappt (caption/dialogue 200, prompt 800), Zahl der Panels auf panelCount geclampt. - Model via COMIC_STORYBOARD_MODEL env var überschreibbar; Default ollama/gemma3:4b wie writing (lokal + billig). - Beide Erfolgs- und Fehler-Pfade mit logger.info / logger.error + userId + sourceModule für Observability. - Route registriert in apps/api/src/index.ts als /api/v1/comic. Client: - api/storyboard.ts: suggestPanels({style, sourceText, panelCount, storyContext?, sourceModule?}) — thin fetch-Wrapper + Error-Messaging für 402 / 502 / no-panels-Responses. - ReferenceInputPicker: Tabs über Journal / Notizen / Bibliothek (die drei inhalts-dichtesten Quellen), pro Tab Live-Query + Suche + Entry-Liste. Click emittiert {module, entryId, label, sourceText} — label ist der Display-Name für die "Gequellt aus…"-Chip, sourceText ist bereits decrypted (Queries liefern plaintext zurück). Bibliotheks-Einträge ohne Review sind disabled (kein Text = nichts zu rendern). - StoryboardSuggester: 4-Schritt-Flow (pick-source → generating-plan → review-plan → rendering). Schritt 3 ist der eigentliche Editor: jede Claude-Zeile ist editierbar (Prompt, Caption, Dialog) mit Trash-Button; Quality + Format-Toggle teilen sich M3-Batch-Style. "Generieren" ruft parallel runPanelGenerate() via Promise.allSettled mit sourceInput={module, entryId} im panelMeta, alle Panels gehen durch den identischen M2-HTTP-Pfad. - DetailView bekommt einen dritten Editor-Modus "ai" neben "single" und "batch" — eine Sparkle-Button-CTA öffnet den Suggester. Kein Writing-Draft / Calendar-Event-Input in dieser Runde — Drafts brauchen Version-Chain-Resolve, Events sind meist zu dünn an Prosa. Follow-up wenn gewünscht (rein additiv: Tab + Hook). Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/api/src/index.ts | 2 + apps/api/src/modules/comic/routes.ts | 216 ++++++++ .../src/lib/modules/comic/api/storyboard.ts | 73 +++ .../components/ReferenceInputPicker.svelte | 236 +++++++++ .../components/StoryboardSuggester.svelte | 475 ++++++++++++++++++ .../lib/modules/comic/views/DetailView.svelte | 14 +- 6 files changed, 1015 insertions(+), 1 deletion(-) create mode 100644 apps/api/src/modules/comic/routes.ts create mode 100644 apps/mana/apps/web/src/lib/modules/comic/api/storyboard.ts create mode 100644 apps/mana/apps/web/src/lib/modules/comic/components/ReferenceInputPicker.svelte create mode 100644 apps/mana/apps/web/src/lib/modules/comic/components/StoryboardSuggester.svelte diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 638a10624..325621509 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -43,6 +43,7 @@ import { newsResearchRoutes } from './modules/news-research/routes'; import { articlesRoutes } from './modules/articles/routes'; import { tracesRoutes } from './modules/traces/routes'; import { writingRoutes } from './modules/writing/routes'; +import { comicRoutes } from './modules/comic/routes'; import { presiRoutes } from './modules/presi/routes'; import { researchRoutes } from './modules/research/routes'; import { whoRoutes } from './modules/who/routes'; @@ -134,6 +135,7 @@ app.route('/api/v1/research', researchRoutes); app.route('/api/v1/website', websiteRoutes); app.route('/api/v1/who', whoRoutes); app.route('/api/v1/writing', writingRoutes); +app.route('/api/v1/comic', comicRoutes); // ─── Server Info ──────────────────────────────────────────── console.log(`mana-api starting on port ${PORT}...`); diff --git a/apps/api/src/modules/comic/routes.ts b/apps/api/src/modules/comic/routes.ts new file mode 100644 index 000000000..e83253a61 --- /dev/null +++ b/apps/api/src/modules/comic/routes.ts @@ -0,0 +1,216 @@ +/** + * Comic module — server endpoints. + * + * Current scope (M4): + * - POST /storyboard — one-shot panel-sequence suggestion from a text + * input (journal entry, note, library review, writing draft, + * calendar event description). The client decrypts the source + * locally, sends the plaintext + style, and we round-trip to + * mana-llm with a JSON-schema system prompt, returning + * `{ panels: Array<{ prompt, caption?, dialogue? }> }`. Panel + * rendering itself still happens through /picture/generate-with- + * reference — this endpoint is pure text → plan. + * + * Future (M5+): + * - Upload endpoint for comic-specific anchor / backdrop images if + * M6 character-cast scope happens; the 'comic' upload slot is + * already allowed by verifyMediaOwnership (set in M1). + * + * Why not reuse /api/v1/writing/generations? + * That endpoint is a free-text prose endpoint (no JSON parsing) and + * is wired for one-shot writing drafts. Comic storyboarding wants a + * structured Panel[] envelope the client can iterate over cheaply — + * different prompt shape, different parser, different observability + * tag. Keeping them apart avoids prompt-contamination between the + * two use-cases and keeps each module's logs grep-able. + */ + +import { Hono } from 'hono'; +import { llmJson, LlmError } from '../../lib/llm'; +import { logger, type AuthVariables } from '@mana/shared-hono'; + +const STORYBOARD_MODEL = process.env.COMIC_STORYBOARD_MODEL || 'ollama/gemma3:4b'; + +type ComicStyle = 'comic' | 'manga' | 'cartoon' | 'graphic-novel' | 'webtoon'; + +const STYLE_HINTS: Record = { + comic: 'US comic book, bold linework, cell-shading, dramatic framing', + manga: 'Japanese manga, black-and-white with screen tones, dynamic perspective', + cartoon: 'soft pastel cartoon, rounded shapes, Saturday-morning animation', + 'graphic-novel': 'graphic novel, painterly watercolor, muted atmospheric palette', + webtoon: 'webtoon, vertical framing, bright saturated colors, soft cel-shading', +}; + +const VALID_STYLES = Object.keys(STYLE_HINTS) as readonly ComicStyle[]; +const MAX_SOURCE_TEXT_CHARS = 8_000; +const MIN_PANEL_COUNT = 2; +const MAX_PANEL_COUNT = 8; + +interface StoryboardRequest { + style: ComicStyle; + sourceText: string; + /** Optional — if omitted we ask for 4 panels (plan default). */ + panelCount?: number; + /** Optional story-level briefing the author wrote at create-time. + * Gets prepended to the source-text so Claude knows the tonal + * register ("make it funny" / "stay serious"). */ + storyContext?: string | null; + /** Where this text came from — logged only, not sent to the LLM. + * Useful for observability ("which module drives most storyboards"). */ + sourceModule?: string; +} + +interface StoryboardPanel { + prompt: string; + caption?: string; + dialogue?: string; +} + +interface StoryboardResponse { + panels: StoryboardPanel[]; + model: string; + durationMs: number; +} + +function isValidStyle(v: unknown): v is ComicStyle { + return typeof v === 'string' && (VALID_STYLES as readonly string[]).includes(v); +} + +function buildSystemPrompt(style: ComicStyle): string { + const hint = STYLE_HINTS[style]; + return [ + `You are a comic-story editor. Given a short piece of text (journal entry, note, review, or event description), break it into a sequence of visual comic panels.`, + `Style: ${hint}.`, + `Return ONLY a JSON object with this exact shape:`, + `{"panels": [{"prompt": string, "caption"?: string, "dialogue"?: string}, ...]}`, + `Rules:`, + `- "prompt" is the visual scene description (what the artist draws). One or two short English sentences. Focus on composition, action, mood, setting. Do NOT describe style — the style prefix is added downstream.`, + `- "caption" (optional) is a short narration line rendered at the top or bottom of the panel, max 80 chars. Use sparingly — only when scene-setting or transitions need it.`, + `- "dialogue" (optional) is what the protagonist says inside a speech bubble, max 80 chars. Use when the scene has a spoken moment.`, + `- Do not number panels. Do not add meta commentary. Do not explain your choices.`, + `- The protagonist of every panel is the same person (the story's author).`, + ].join('\n'); +} + +function buildUserPrompt( + sourceText: string, + panelCount: number, + storyContext: string | null | undefined +): string { + const trimmed = sourceText.trim().slice(0, MAX_SOURCE_TEXT_CHARS); + const contextBlock = storyContext?.trim() + ? `Story briefing from the author:\n${storyContext.trim()}\n\n---\n\n` + : ''; + return [ + contextBlock, + `Source text:\n${trimmed}\n\n---\n\n`, + `Generate exactly ${panelCount} panels that tell this as a comic. Output the JSON object described in the system message.`, + ].join(''); +} + +const routes = new Hono<{ Variables: AuthVariables }>(); + +routes.post('/storyboard', async (c) => { + const userId = c.get('userId'); + const body = (await c.req.json()) as Partial; + + if (!isValidStyle(body.style)) { + return c.json({ error: `Invalid style, expected one of: ${VALID_STYLES.join(', ')}` }, 400); + } + if (!body.sourceText || typeof body.sourceText !== 'string') { + return c.json({ error: 'sourceText required' }, 400); + } + if (body.sourceText.trim().length === 0) { + return c.json({ error: 'sourceText must not be blank' }, 400); + } + + const panelCount = Math.max( + MIN_PANEL_COUNT, + Math.min(MAX_PANEL_COUNT, Number(body.panelCount) || 4) + ); + + const startedAt = Date.now(); + try { + const parsed = await llmJson<{ panels?: unknown }>({ + model: STORYBOARD_MODEL, + system: buildSystemPrompt(body.style), + user: buildUserPrompt(body.sourceText, panelCount, body.storyContext), + temperature: 0.7, + maxTokens: 2000, + }); + + const rawPanels = Array.isArray(parsed?.panels) ? parsed.panels : []; + // Defense-in-depth: coerce + strip unknown shapes, clamp to + // requested count. If the model returns more panels than asked + // for we keep the first N; less is fine (fewer credits later). + const panels: StoryboardPanel[] = rawPanels + .map((raw): StoryboardPanel | null => { + if (!raw || typeof raw !== 'object') return null; + const entry = raw as Record; + const prompt = typeof entry.prompt === 'string' ? entry.prompt.trim() : ''; + if (!prompt) return null; + const caption = + typeof entry.caption === 'string' && entry.caption.trim().length > 0 + ? entry.caption.trim().slice(0, 200) + : undefined; + const dialogue = + typeof entry.dialogue === 'string' && entry.dialogue.trim().length > 0 + ? entry.dialogue.trim().slice(0, 200) + : undefined; + return { prompt: prompt.slice(0, 800), caption, dialogue }; + }) + .filter((p): p is StoryboardPanel => p !== null) + .slice(0, panelCount); + + const durationMs = Date.now() - startedAt; + + if (panels.length === 0) { + logger.warn('comic.storyboard_empty', { + userId, + style: body.style, + sourceModule: body.sourceModule, + model: STORYBOARD_MODEL, + durationMs, + }); + return c.json( + { + error: 'Model returned no usable panels', + detail: 'Try again, shorten the input, or pick a different style', + durationMs, + }, + 502 + ); + } + + logger.info('comic.storyboard_ok', { + userId, + style: body.style, + sourceModule: body.sourceModule, + panelCount: panels.length, + model: STORYBOARD_MODEL, + durationMs, + }); + + const response: StoryboardResponse = { + panels, + model: STORYBOARD_MODEL, + durationMs, + }; + return c.json(response); + } catch (err) { + const durationMs = Date.now() - startedAt; + const message = err instanceof Error ? err.message : String(err); + logger.error('comic.storyboard_failed', { + userId, + style: body.style, + sourceModule: body.sourceModule, + model: STORYBOARD_MODEL, + error: message, + status: err instanceof LlmError ? err.status : undefined, + durationMs, + }); + return c.json({ error: 'Storyboard generation failed', detail: message, durationMs }, 500); + } +}); + +export { routes as comicRoutes }; diff --git a/apps/mana/apps/web/src/lib/modules/comic/api/storyboard.ts b/apps/mana/apps/web/src/lib/modules/comic/api/storyboard.ts new file mode 100644 index 000000000..ffe48426c --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/comic/api/storyboard.ts @@ -0,0 +1,73 @@ +/** + * Storyboard client. Calls `/api/v1/comic/storyboard` with the + * decrypted source text (journal entry, note, library review, + * writing draft, calendar event description) and the chosen style, + * receives an ordered `Panel[]` suggestion that the user reviews + + * edits before firing the batch-gen flow (M3). + * + * Cross-module decrypt stays client-side — the browser loads the + * source module's row, passes it through its own decryptor, and + * hands us plaintext. No Key-Grants / server-side decrypts involved + * (matches the plan §6 decision: M4 is interactive client-side). + * + * Plan: docs/plans/comic-module.md M4. + */ + +import { getManaApiUrl } from '$lib/api/config'; +import { authStore } from '$lib/stores/auth.svelte'; +import type { ComicStyle } from '../types'; + +export type StoryboardSourceModule = 'journal' | 'notes' | 'library' | 'writing' | 'calendar'; + +export interface StoryboardPanel { + prompt: string; + caption?: string; + dialogue?: string; +} + +export interface SuggestPanelsParams { + style: ComicStyle; + sourceText: string; + panelCount: number; + /** Story-level briefing the author typed when creating the story. + * Gets prepended server-side so Claude knows the tonal register. */ + storyContext?: string | null; + /** Logged for observability only — not sent to the LLM. */ + sourceModule?: StoryboardSourceModule; +} + +export interface SuggestPanelsResult { + panels: StoryboardPanel[]; + model: string; + durationMs: number; +} + +export async function suggestPanels(params: SuggestPanelsParams): Promise { + const token = await authStore.getValidToken(); + const res = await fetch(`${getManaApiUrl()}/api/v1/comic/storyboard`, { + method: 'POST', + headers: { + 'content-type': 'application/json', + ...(token ? { Authorization: `Bearer ${token}` } : {}), + }, + body: JSON.stringify({ + style: params.style, + sourceText: params.sourceText, + panelCount: params.panelCount, + storyContext: params.storyContext, + sourceModule: params.sourceModule, + }), + }); + + if (!res.ok) { + const body = (await res.json().catch(() => ({}))) as { error?: string; detail?: string }; + const label = body.error ?? `Storyboard fehlgeschlagen (${res.status})`; + throw new Error(body.detail ? `${label}: ${body.detail}` : label); + } + + const data = (await res.json()) as SuggestPanelsResult; + if (!Array.isArray(data.panels) || data.panels.length === 0) { + throw new Error('Keine Panels vom Modell zurück — versuche es mit anderem Text oder Stil.'); + } + return data; +} diff --git a/apps/mana/apps/web/src/lib/modules/comic/components/ReferenceInputPicker.svelte b/apps/mana/apps/web/src/lib/modules/comic/components/ReferenceInputPicker.svelte new file mode 100644 index 000000000..7f6d51aba --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/comic/components/ReferenceInputPicker.svelte @@ -0,0 +1,236 @@ + + + +
+
+

Quelle wählen

+

+ Aus welchem Text soll die KI eine Panel-Folge bauen? Alles bleibt lokal — erst der + verschlüsselte Klartext wird an das Modell gesendet, nur für diesen einen Call. +

+
+ + + +
+ + +
+ +
+ {#if activeTab === 'journal'} + {#if journalFiltered.length === 0} +

+ {journal.length === 0 + ? 'Noch keine Tagebuch-Einträge in diesem Space.' + : 'Keine Einträge passen zur Suche.'} +

+ {:else} + {#each journalFiltered as entry (entry.id)} + + {/each} + {/if} + {:else if activeTab === 'notes'} + {#if notesFiltered.length === 0} +

+ {notes.length === 0 + ? 'Noch keine Notizen in diesem Space.' + : 'Keine Notizen passen zur Suche.'} +

+ {:else} + {#each notesFiltered as note (note.id)} + + {/each} + {/if} + {:else if activeTab === 'library'} + {#if libraryFiltered.length === 0} +

+ {library.length === 0 + ? 'Noch keine Bibliotheks-Einträge in diesem Space.' + : 'Keine Einträge passen zur Suche.'} +

+ {:else} + {#each libraryFiltered as entry (entry.id)} + {@const hasReview = entry.review && entry.review.trim().length > 0} + + {/each} + {/if} + {/if} +
+
diff --git a/apps/mana/apps/web/src/lib/modules/comic/components/StoryboardSuggester.svelte b/apps/mana/apps/web/src/lib/modules/comic/components/StoryboardSuggester.svelte new file mode 100644 index 000000000..28bacbc97 --- /dev/null +++ b/apps/mana/apps/web/src/lib/modules/comic/components/StoryboardSuggester.svelte @@ -0,0 +1,475 @@ + + + +
+
+
+

+ + Mit KI aus Text generieren +

+

+ {#if step === 'pick-source'} + Schritt 1 · Quelle auswählen + {:else if step === 'generating-plan'} + Schritt 2 · Panels werden vorgeschlagen… + {:else if step === 'review-plan'} + Schritt 3 · Vorschläge prüfen und generieren + {:else} + Schritt 4 · Panels werden gerendert… + {/if} +

+
+ +
+ + {#if step === 'pick-source'} +
+
+ + + + ({MIN_STORYBOARD_PANEL_COUNT}–{MAX_STORYBOARD_PANEL_COUNT}) + +
+ + {#if planError} + + {/if} + + +
+ {:else if step === 'generating-plan'} +
+ +

+ Das Modell denkt über deine {requestedCount} Panels nach… +

+
+ {:else if step === 'review-plan' || step === 'rendering'} +
+ {#if selection} +
+
+

+ Quelle: {selection.label} +

+

{selection.module}

+
+ {#if !renderBusy} + + {/if} +
+ {/if} + + {#if warn && !renderBusy} +

+ Hinweis: Ab ~{PANEL_COUNT_WARN_THRESHOLD} Panels wird Character-Konsistenz spürbar schwerer. +

+ {/if} + + {#if roomLeft < rows.length} + + {/if} + +
+ {#each rows as row, index (row.id)} + {@const status = rowStatus[row.id]} + {@const overRoom = index >= roomLeft} +
+
+
+ + {panelCount + index + 1} + + Panel {index + 1} + {#if status?.status === 'pending'} + + + Wird generiert… + + {:else if status?.status === 'ok'} + + + Fertig + + {:else if status?.status === 'error'} + + + Fehlgeschlagen + + {/if} +
+
+ {#if status?.status === 'error'} + + {/if} + {#if rows.length > 1 && !renderBusy} + + {/if} +
+
+ + + +
+ + +
+ + {#if status?.status === 'error' && status.error} + + {/if} +
+ {/each} +
+ + {#if !renderBusy} + + {/if} + +
+
+ Qualität: + {#each QUALITIES as q (q)} + + {/each} +
+
+ Format: + + +
+
+ +
+ +
+
+ {/if} +
+ + diff --git a/apps/mana/apps/web/src/lib/modules/comic/views/DetailView.svelte b/apps/mana/apps/web/src/lib/modules/comic/views/DetailView.svelte index ffc6597d1..d68b8b8de 100644 --- a/apps/mana/apps/web/src/lib/modules/comic/views/DetailView.svelte +++ b/apps/mana/apps/web/src/lib/modules/comic/views/DetailView.svelte @@ -19,6 +19,7 @@ import PanelStrip from '../components/PanelStrip.svelte'; import PanelEditor from '../components/PanelEditor.svelte'; import BatchPanelEditor from '../components/BatchPanelEditor.svelte'; + import StoryboardSuggester from '../components/StoryboardSuggester.svelte'; import { encryptRecord } from '$lib/data/crypto'; import type { ComicPanelMeta, LocalComicStory } from '../types'; @@ -32,7 +33,7 @@ const story$ = useStory(id); const story = $derived(story$.value); - type EditorMode = 'off' | 'single' | 'batch'; + type EditorMode = 'off' | 'single' | 'batch' | 'ai'; let editorMode = $state('off'); async function handleToggleFavorite() { @@ -189,6 +190,15 @@ Batch + {/if} @@ -211,6 +221,8 @@ /> {:else if editorMode === 'batch' && !story.isArchived} (editorMode = 'off')} /> + {:else if editorMode === 'ai' && !story.isArchived} + (editorMode = 'off')} /> {/if}