feat(cards): Deck-Generierung aus Bildern und PDFs via Vision-LLM

Neuer Endpoint POST /api/v1/decks/from-image akzeptiert bis zu 5 Bilder
(PNG/JPG/WebP, max 10 MiB je) oder PDFs (max 30 MiB je) als multipart/form-data.
Alle Dateien werden in einem einzigen mana-llm Vision-Call verarbeitet
(mana/vision → llava → Gemini 2.5-flash → GPT-4o Fallback-Chain).

PDFs werden von Gemini nativ verstanden (Layout, Tabellen, Bilder im Dokument)
ohne Zwischenschritt über Text-Extraktion oder Rendering. Der google.py-Provider
reicht den MIME-Type aus dem data:-URI direkt an types.Part.from_bytes() weiter.

- llm-client: chatVisionJson() mit images[]-Array (mehrere Bilder/Dokumente)
- decks-generate: GeneratedDeckSchema + insertGeneratedDeck() exportiert
- decks-from-image: neuer Route-Handler, MIME-Filter für image/* + application/pdf
- index: neue Route gemountet
- client.ts: apiForm() für multipart-Uploads ohne JSON.stringify
- decks.ts: generateDeckFromImage(files, opts)
- NewDeckCard + /decks/new: Dropzone mit Multi-File, Thumbnail-Strip, PDF-Icon

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-05-10 15:21:35 +02:00
parent 170a2825a4
commit 1212b62613
8 changed files with 667 additions and 94 deletions

View file

@ -56,6 +56,32 @@ async function doFetch(path: string, opts: RequestOptions): Promise<Response> {
});
}
// Für Multipart-Uploads (z.B. Bild → Deck). Content-Type wird vom Browser
// automatisch mit Boundary gesetzt — kein manueller Header nötig.
export async function apiForm<T>(path: string, form: FormData): Promise<T> {
await devUser.ensureFreshToken();
const headers: Record<string, string> = {};
if (devUser.token) {
headers['Authorization'] = `Bearer ${devUser.token}`;
} else if (devUser.stubId) {
headers['X-User-Id'] = devUser.stubId;
}
const res = await fetch(`${API_BASE}${path}`, { method: 'POST', headers, body: form });
if (!res.ok) {
let body: unknown = null;
try {
body = await res.json();
} catch {
body = await res.text();
}
throw new ApiError(res.status, body);
}
return (await res.json()) as T;
}
export async function api<T>(path: string, opts: RequestOptions = {}): Promise<T> {
// Proaktive Frische-Prüfung: wenn Token <60s gültig ist, refreshen
// wir, bevor der Request rausgeht. Coalesced über tryRefresh().

View file

@ -1,5 +1,5 @@
import type { Deck, DeckCreate, DeckUpdate } from '@cards/domain';
import { api } from './client.ts';
import { api, apiForm } from './client.ts';
export function listDecks(opts: { forkedFromMarketplace?: boolean } = {}) {
const qs = opts.forkedFromMarketplace ? '?forked_from_marketplace=true' : '';
@ -28,3 +28,15 @@ export function generateDeck(input: { prompt: string; language?: 'de' | 'en'; co
body: input,
});
}
export function generateDeckFromImage(
files: File | File[],
opts: { language?: 'de' | 'en'; count?: number },
) {
const form = new FormData();
const arr = Array.isArray(files) ? files : [files];
for (const f of arr) form.append('file', f);
if (opts.language) form.append('language', opts.language);
if (opts.count != null) form.append('count', String(opts.count));
return apiForm<{ deck: Deck; cards_created: number }>('/api/v1/decks/from-image', form);
}

View file

@ -1,7 +1,7 @@
<script lang="ts">
import { goto } from '$app/navigation';
import { type DeckCategoryId, DECK_CATEGORY_IDS, DECK_CATEGORY_LABELS } from '@cards/domain';
import { createDeck, generateDeck } from '$lib/api/decks.ts';
import { createDeck, generateDeck, generateDeckFromImage } from '$lib/api/decks.ts';
import { devUser } from '$lib/auth/dev-stub.svelte.ts';
import { toasts } from '$lib/stores/toasts.svelte.ts';
import { i18n, t } from '$lib/i18n/index.svelte.ts';
@ -21,6 +21,47 @@
let generating = $state(false);
let aiError = $state<string | null>(null);
const MAX_IMAGE_FILES = 5;
let imageFiles = $state<File[]>([]);
let imagePreviews = $state<string[]>([]);
let imageGenerating = $state(false);
let imageError = $state<string | null>(null);
let fileInput = $state<HTMLInputElement | null>(null);
function addImageFiles(incoming: File[]) {
const images = incoming.filter(isAccepted);
const combined = [...imageFiles, ...images].slice(0, MAX_IMAGE_FILES);
// revoke URLs für Dateien die rausfallen (über Limit)
for (let i = combined.length; i < imagePreviews.length; i++) {
URL.revokeObjectURL(imagePreviews[i]);
}
imageFiles = combined;
imagePreviews = combined.map((f, i) => imagePreviews[i] ?? URL.createObjectURL(f));
imageError = null;
}
function removeImageFile(i: number) {
URL.revokeObjectURL(imagePreviews[i]);
imageFiles = imageFiles.filter((_, j) => j !== i);
imagePreviews = imagePreviews.filter((_, j) => j !== i);
}
function onFileChange(e: Event) {
const files = Array.from((e.target as HTMLInputElement).files ?? []);
if (files.length) addImageFiles(files);
}
function onImageDrop(e: DragEvent) {
e.preventDefault();
const files = Array.from(e.dataTransfer?.files ?? []);
if (files.length) addImageFiles(files);
}
function isAccepted(f: File) {
return f.type.startsWith('image/') || f.type === 'application/pdf';
}
function close() {
open = false;
catOpen = false;
@ -32,6 +73,25 @@
saving = false;
generating = false;
aiError = null;
for (const url of imagePreviews) URL.revokeObjectURL(url);
imageFiles = [];
imagePreviews = [];
imageGenerating = false;
imageError = null;
}
async function onFromImage() {
if (imageFiles.length === 0 || !devUser.id || imageGenerating) return;
imageError = null;
imageGenerating = true;
try {
const result = await generateDeckFromImage(imageFiles, { count, language });
toasts.success(`🖼 "${result.deck.name}" mit ${result.cards_created} Karten erstellt`);
goto(`/decks/${result.deck.id}`);
} catch (err) {
imageError = (err as Error).message;
imageGenerating = false;
}
}
function pickCategory(id: DeckCategoryId) {
@ -142,20 +202,75 @@
</select>
</label>
<!-- Bild-Dropzone für Vision-Generierung (bis zu 5 Bilder) -->
<div class="field">
<div
class="img-drop"
role="button"
tabindex="0"
onclick={() => fileInput?.click()}
onkeydown={(e) => e.key === 'Enter' && fileInput?.click()}
ondrop={onImageDrop}
ondragover={(e) => e.preventDefault()}
>
{#if imagePreviews.length > 0}
<div class="img-strip">
{#each imageFiles as file, i}
<div class="img-thumb-wrap">
{#if file.type === 'application/pdf'}
<div class="img-thumb img-pdf-thumb"><span>📄</span></div>
{:else}
<img src={imagePreviews[i]} alt="Bild {i + 1}" class="img-thumb" />
{/if}
<button
type="button"
class="img-remove"
onclick={(e) => { e.stopPropagation(); removeImageFile(i); }}
aria-label="Datei entfernen"
>×</button>
</div>
{/each}
{#if imagePreviews.length < MAX_IMAGE_FILES}
<div class="img-add-hint">+ weitere</div>
{/if}
</div>
{:else}
<span class="img-placeholder">🖼 Bilder oder PDFs für KI (bis zu 5)</span>
{/if}
<input
bind:this={fileInput}
type="file"
accept="image/*,application/pdf"
multiple
class="sr-only"
onchange={onFileChange}
/>
</div>
</div>
{#if aiError}
<p class="ai-error" role="alert">{aiError}</p>
{/if}
{#if imageError}
<p class="ai-error" role="alert">{imageError}</p>
{/if}
{#if generating}
<p class="ai-hint" aria-live="polite">Generiere… ca. 1060 s</p>
{/if}
{#if imageGenerating}
<p class="ai-hint" aria-live="polite">Analysiere Bild… ca. 1560 s</p>
{/if}
<div class="actions">
<button type="submit" disabled={saving || generating || !name.trim()} class="btn-primary">
<button type="submit" disabled={saving || generating || imageGenerating || !name.trim()} class="btn-primary">
{saving ? t('deck_new.creating') : t('deck_new.create')}
</button>
<button type="button" disabled={generating || saving || !name.trim()} onclick={onAi} class="btn-ai">
<button type="button" disabled={generating || saving || imageGenerating || !name.trim()} onclick={onAi} class="btn-ai">
{generating ? '✨ Generiere…' : '✨ Mit KI generieren'}
</button>
<button type="button" disabled={imageFiles.length === 0 || imageGenerating || saving || generating} onclick={onFromImage} class="btn-ai">
{imageGenerating ? '🖼 Analysiere…' : '🖼 Aus Bild'}
</button>
<button type="button" onclick={close} class="btn-cancel">
{t('deck_new.cancel')}
</button>
@ -395,4 +510,81 @@
.btn-cancel:hover {
background: hsl(var(--color-surface-hover));
}
.img-drop {
display: flex;
align-items: center;
justify-content: center;
min-height: 2.75rem;
border: 1.5px dashed hsl(var(--color-border));
border-radius: 0.3125rem;
cursor: pointer;
overflow: hidden;
transition: border-color 0.12s;
}
.img-drop:hover {
border-color: hsl(var(--color-primary) / 0.5);
}
.img-placeholder {
font-size: 0.6875rem;
color: hsl(var(--color-muted-foreground));
padding: 0.375rem 0.5rem;
text-align: center;
}
.img-strip {
display: flex;
align-items: center;
gap: 0.25rem;
padding: 0.25rem;
flex-wrap: wrap;
}
.img-thumb-wrap {
position: relative;
flex-shrink: 0;
}
.img-thumb {
width: 2.5rem;
height: 2.5rem;
object-fit: cover;
border-radius: 0.25rem;
display: block;
}
.img-pdf-thumb {
background: hsl(var(--color-surface));
border: 1px solid hsl(var(--color-border));
display: flex;
align-items: center;
justify-content: center;
font-size: 1.125rem;
}
.img-remove {
position: absolute;
top: -0.3rem;
right: -0.3rem;
width: 1rem;
height: 1rem;
font-size: 0.625rem;
line-height: 1;
border-radius: 50%;
border: none;
background: hsl(var(--color-error));
color: #fff;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
padding: 0;
}
.img-add-hint {
font-size: 0.625rem;
color: hsl(var(--color-muted-foreground));
padding: 0 0.25rem;
}
</style>

View file

@ -2,7 +2,7 @@
import { goto } from '$app/navigation';
import { onMount } from 'svelte';
import { type DeckCategoryId, DECK_CATEGORY_IDS, DECK_CATEGORY_LABELS } from '@cards/domain';
import { createDeck, generateDeck } from '$lib/api/decks.ts';
import { createDeck, generateDeck, generateDeckFromImage } from '$lib/api/decks.ts';
import { devUser } from '$lib/auth/dev-stub.svelte.ts';
import { toasts } from '$lib/stores/toasts.svelte.ts';
import { i18n, t } from '$lib/i18n/index.svelte.ts';
@ -18,6 +18,56 @@
let generating = $state(false);
let aiError = $state<string | null>(null);
const MAX_IMAGE_FILES = 5;
let imageFiles = $state<File[]>([]);
let imagePreviews = $state<string[]>([]);
let imageGenerating = $state(false);
let imageError = $state<string | null>(null);
let fileInput = $state<HTMLInputElement | null>(null);
function addImageFiles(incoming: File[]) {
const images = incoming.filter((f) => f.type.startsWith('image/') || f.type === 'application/pdf');
const combined = [...imageFiles, ...images].slice(0, MAX_IMAGE_FILES);
for (let i = combined.length; i < imagePreviews.length; i++) {
URL.revokeObjectURL(imagePreviews[i]);
}
imageFiles = combined;
imagePreviews = combined.map((f, i) => imagePreviews[i] ?? URL.createObjectURL(f));
imageError = null;
}
function removeImageFile(i: number) {
URL.revokeObjectURL(imagePreviews[i]);
imageFiles = imageFiles.filter((_, j) => j !== i);
imagePreviews = imagePreviews.filter((_, j) => j !== i);
}
function onFileChange(e: Event) {
const files = Array.from((e.target as HTMLInputElement).files ?? []);
if (files.length) addImageFiles(files);
}
function onImageDrop(e: DragEvent) {
e.preventDefault();
const files = Array.from(e.dataTransfer?.files ?? []);
if (files.length) addImageFiles(files);
}
async function onFromImage() {
if (imageFiles.length === 0 || imageGenerating) return;
imageError = null;
imageGenerating = true;
try {
const result = await generateDeckFromImage(imageFiles, { count, language });
toasts.success(`🖼 "${result.deck.name}" mit ${result.cards_created} Karten erstellt`);
goto(`/decks/${result.deck.id}`);
} catch (err) {
imageError = (err as Error).message;
imageGenerating = false;
}
}
onMount(() => {
if (!devUser.id) goto('/');
});
@ -179,6 +229,86 @@
>{t('deck_new.cancel')}</a>
</div>
</form>
<div class="mt-8 border-t border-[hsl(var(--color-border))] pt-6">
<p class="text-sm font-medium mb-3">Oder: Deck aus Bild generieren</p>
<p class="text-xs text-[hsl(var(--color-muted-foreground))] mb-3">
Foto von Notizen, Screenshot, Lehrbuchseite — Vision-Modell erstellt daraus Lernkarten.
Sprache und Kartenanzahl aus den Einstellungen oben.
</p>
<div
class="relative flex flex-col items-center justify-center rounded-lg border-2 border-dashed border-[hsl(var(--color-border))] p-4 text-center cursor-pointer transition-colors hover:border-[hsl(var(--color-primary)/0.5)]"
role="button"
tabindex="0"
onclick={() => fileInput?.click()}
onkeydown={(e) => e.key === 'Enter' && fileInput?.click()}
ondrop={onImageDrop}
ondragover={(e) => e.preventDefault()}
>
{#if imagePreviews.length > 0}
<div class="flex flex-wrap gap-2 justify-center mb-2">
{#each imageFiles as file, i}
<div class="relative">
{#if file.type === 'application/pdf'}
<div class="h-20 w-20 rounded bg-[hsl(var(--color-surface))] border border-[hsl(var(--color-border))] flex flex-col items-center justify-center gap-1">
<span class="text-lg">📄</span>
<span class="text-[0.5rem] text-[hsl(var(--color-muted-foreground))] text-center leading-tight px-1 truncate w-full text-center">{file.name}</span>
</div>
{:else}
<img src={imagePreviews[i]} alt="Bild {i + 1}" class="h-20 w-20 rounded object-cover" />
{/if}
<button
type="button"
class="absolute -top-1.5 -right-1.5 w-5 h-5 rounded-full bg-[hsl(var(--color-error))] text-white text-xs flex items-center justify-center leading-none"
onclick={(e) => { e.stopPropagation(); removeImageFile(i); }}
aria-label="Datei entfernen"
>×</button>
</div>
{/each}
</div>
<span class="text-xs text-[hsl(var(--color-muted-foreground))]">
{imagePreviews.length}/{MAX_IMAGE_FILES} Bilder
{#if imagePreviews.length < MAX_IMAGE_FILES}· klicken für mehr{/if}
</span>
{:else}
<span class="text-sm text-[hsl(var(--color-muted-foreground))]">🖼 Bilder oder PDFs hierher ziehen oder klicken</span>
<span class="text-xs text-[hsl(var(--color-muted-foreground))] mt-1">Bis zu 5 Dateien · PNG · JPG · WebP · PDF · Bilder max 10 MB · PDFs max 30 MB</span>
{/if}
<input
bind:this={fileInput}
type="file"
accept="image/*,application/pdf"
multiple
class="sr-only"
onchange={onFileChange}
/>
</div>
{#if imageError}
<div
class="mt-2 rounded border border-[hsl(var(--color-error))]/40 bg-[hsl(var(--color-error))]/10 p-3 text-sm text-[hsl(var(--color-error))]"
role="alert"
>
{imageError}
</div>
{/if}
{#if imageGenerating}
<p class="mt-2 text-xs text-[hsl(var(--color-muted-foreground))]" aria-live="polite">
Vision-Modell analysiert Bild… typischerweise 1560 Sekunden.
</p>
{/if}
<button
type="button"
disabled={imageFiles.length === 0 || imageGenerating || saving || generating}
onclick={onFromImage}
class="mt-3 rounded border border-[hsl(var(--color-primary))] px-4 py-2 text-sm text-[hsl(var(--color-primary))] disabled:opacity-50 hover:bg-[hsl(var(--color-primary)/0.08)]"
>
{imageGenerating ? '🖼 Analysiere…' : `🖼 Aus ${imagePreviews.length > 1 ? `${imagePreviews.length} Bildern` : 'Bild'} generieren`}
</button>
</div>
</div>
<style>