Phase 8c: Anki-Import via portiertem Parser

Strategie-B-Ausnahme: parse.ts (Anki-Format-Parser via JSZip + sql.js)
und AnkiImport.svelte (UI-Stages) sind aus mana-monorepo portiert,
mit Source-Comment-Header dokumentiert. Anki-Format ist standalone
Parser-Logik, kein Architektur-Schmuggel.

Neuer server-authoritative import.ts schreibt direkt gegen die
cards-api ($lib/api/decks + cards) — keine Stores, keine Dexie.
Anki "::"-Hierarchie wird zu " / "-Strings flach. Fallback-Deck
"Anki-Import" für Karten ohne explizites Deck. Cloze-Karten kommen
first-class durch (Sub-Index pro Cluster, Sprint 8a/8b).

Phase-8-MVP-Scope: Bilder + Audio werden gedroppt (Option A) — der
sanitizeAnkiHtml entfernt <img> und [sound:…] ersatzlos. Späterer
Media-Pfad (lokaler Cards-Upload oder mana-media nach Phase 2) ist
additiv.

Neue Route /import + Top-Nav-Link. Hermetic Vitest (5 Cases): baut
zur Laufzeit ein Mini-.apkg via sql.js + JSZip und prüft den
Parser-Output (basic, basic-reverse, cloze, sanitize, dedupe auf
Note-Ebene). svelte-check 0 errors, prod-Build sauber.

sql-wasm.wasm liegt in static/ (660kB) — fix für sql.js 1.14.1, vom
Browser einmal geladen.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-05-08 17:43:12 +02:00
parent 0b609c46fd
commit 2ca09fe0c3
9 changed files with 916 additions and 3 deletions

View file

@ -0,0 +1,120 @@
/**
* Server-authoritative Anki-Import.
*
* Schreibt gegen die cards-api HTTP-Endpoints keine Dexie, keine
* lokalen Stores. Anki-Decks werden 1:1 in cards-Decks gemappt
* (Anki-`::` zu ` / ` flacht die Hierarchie aus, wie im Original).
* Karten werden mit sanitisiertem Markdown angelegt.
*
* Phase-8-MVP: Bilder + Audio werden gedroppt (siehe parse.ts
* `sanitizeAnkiHtml`). Ein späterer Media-Pfad ist additiv.
*
* No de-dupe: Re-Import derselben .apkg legt doppelte Decks an.
*/
import { createDeck } from '$lib/api/decks.ts';
import { createCard } from '$lib/api/cards.ts';
import { sanitizeAnkiHtml, type ParsedAnki } from './parse.ts';
export interface ImportResult {
decksCreated: number;
cardsCreated: number;
failed: number;
failures: string[];
}
export interface ImportProgress {
stage: 'decks' | 'cards' | 'done';
current: number;
total: number;
}
export async function importParsedAnki(
parsed: ParsedAnki,
opts: { onProgress?: (p: ImportProgress) => void } = {}
): Promise<ImportResult> {
const result: ImportResult = {
decksCreated: 0,
cardsCreated: 0,
failed: 0,
failures: [],
};
// 1) Decks — Anki "::"-Hierarchie zu " / "-Strings flach machen.
const ankiIdToDeckId = new Map<string, string>();
let deckIdx = 0;
for (const ankiDeck of parsed.decks) {
opts.onProgress?.({ stage: 'decks', current: deckIdx++, total: parsed.decks.length });
const name = ankiDeck.name.replace(/::/g, ' / ');
try {
const created = await createDeck({ name });
ankiIdToDeckId.set(ankiDeck.ankiId, created.id);
result.decksCreated++;
} catch (e) {
result.failed++;
result.failures.push(`deck "${name}": ${errMessage(e)}`);
}
}
// Fallback-Deck für Karten ohne explizit referenziertes Anki-Deck.
let fallbackDeckId: string | null = null;
const ensureFallbackDeck = async (): Promise<string | null> => {
if (fallbackDeckId) return fallbackDeckId;
try {
const created = await createDeck({ name: 'Anki-Import' });
fallbackDeckId = created.id;
result.decksCreated++;
return fallbackDeckId;
} catch (e) {
result.failures.push(`fallback deck: ${errMessage(e)}`);
return null;
}
};
// 2) Cards — Felder sanitizen (Media-Refs werden gedroppt).
for (let i = 0; i < parsed.cards.length; i++) {
opts.onProgress?.({ stage: 'cards', current: i, total: parsed.cards.length });
const card = parsed.cards[i];
let targetDeckId = ankiIdToDeckId.get(card.ankiDeckId);
if (!targetDeckId) {
const fallback = await ensureFallbackDeck();
if (!fallback) {
result.failed++;
continue;
}
targetDeckId = fallback;
}
const cleanFields: Record<string, string> = {};
for (const [key, value] of Object.entries(card.fields)) {
cleanFields[key] = sanitizeAnkiHtml(value);
}
try {
await createCard({
deck_id: targetDeckId,
type: card.type,
fields: cleanFields,
});
result.cardsCreated++;
} catch (e) {
result.failed++;
result.failures.push(`card "${preview(cleanFields)}": ${errMessage(e)}`);
}
}
opts.onProgress?.({ stage: 'done', current: parsed.cards.length, total: parsed.cards.length });
return result;
}
function errMessage(e: unknown): string {
if (e instanceof Error) return e.message;
return String(e);
}
function preview(fields: Record<string, string>): string {
const first = Object.values(fields)[0] ?? '';
const trimmed = first.length > 40 ? first.slice(0, 40) + '…' : first;
return trimmed.replace(/\s+/g, ' ');
}

View file

@ -0,0 +1,241 @@
/**
* Parse an Anki .apkg / .colpkg file in the browser.
*
* .apkg = ZIP archive containing a SQLite collection (`collection.anki2`
* or `collection.anki21`) plus media files. We open the SQLite blob with
* sql.js (WASM-backed in-browser SQLite) and walk Anki's three core
* tables: `col` (collection meta with JSON-encoded models + decks),
* `notes` (the user-typed content), and `cards` (one row per learnable
* unit basic = 1, basic-reverse = 2, cloze = N).
*
* MVP scope (Cards Phase 8): basic + basic-reverse + cloze. Media is
* collected but not uploaded Image/audio refs are stripped from the
* sanitized text. Review history is skipped FSRS state will be
* regenerated on first sight.
*
* --------------------------------------------------------------------
* STRATEGIE-B-AUSNAHME: Diese Datei ist ein bewusst portierter Lift aus
* mana-monorepo/apps/cards/apps/web/src/lib/anki/parse.ts (commit
* ~Mai 2026). Anki-Format-Logik ist standalone Parser-Code ohne
* Architektur-Übernahme die Kopie spart 2-3 Tage Re-Implementierung
* bei null Strategy-Risiko. CardType-Import auf @cards/domain
* umgestellt, Doc-Kommentar an Phase-8-Scope angepasst.
* --------------------------------------------------------------------
*/
import JSZip, { type JSZipObject } from 'jszip';
import initSqlJs, { type Database } from 'sql.js';
import type { CardType } from '@cards/domain';
export interface ParsedDeck {
ankiId: string; // Anki's numeric deck id, stringified
name: string; // "Studies::Spanish" — Anki uses :: as separator
}
export interface ParsedCard {
ankiDeckId: string;
type: CardType;
fields: Record<string, string>;
}
export interface ParsedAnki {
decks: ParsedDeck[];
cards: ParsedCard[];
skipped: number;
warnings: string[];
/**
* Mapping from the original media filename (as referenced in card
* fields, e.g. `paris.jpg` or `audio_001.mp3`) to its ZIP entry. Anki
* stores files numerically (`0`, `1`, ) and the JSON manifest
* (`media`) maps numbers original names; we flip that here so the
* importer can look up by the name it sees in the field text.
*/
mediaByFilename: Map<string, JSZipObject>;
}
interface AnkiModel {
id: number;
name: string;
type: number; // 0 = standard, 1 = cloze
flds: { name: string }[];
tmpls: { name: string }[];
}
interface AnkiDeckJson {
id: number;
name: string;
}
let SQL: Awaited<ReturnType<typeof initSqlJs>> | null = null;
async function getSql() {
if (SQL) return SQL;
SQL = await initSqlJs({ locateFile: (file) => `/${file}` });
return SQL;
}
export async function parseApkg(file: File | Blob): Promise<ParsedAnki> {
const zip = await JSZip.loadAsync(await file.arrayBuffer());
const collectionEntry = zip.file('collection.anki21') ?? zip.file('collection.anki2');
if (!collectionEntry) {
throw new Error(
'Keine Anki-Collection-Datei in der .apkg gefunden (erwartet: collection.anki21 oder collection.anki2).'
);
}
const sqliteBytes = await collectionEntry.async('uint8array');
const sql = await getSql();
const db: Database = new sql.Database(sqliteBytes);
const mediaByFilename = await extractMediaManifest(zip);
try {
const result = extract(db);
return { ...result, mediaByFilename };
} finally {
db.close();
}
}
async function extractMediaManifest(zip: JSZip): Promise<Map<string, JSZipObject>> {
const out = new Map<string, JSZipObject>();
const manifestEntry = zip.file('media');
if (!manifestEntry) return out;
let manifest: Record<string, string>;
try {
manifest = JSON.parse(await manifestEntry.async('string'));
} catch {
return out;
}
for (const [numericKey, originalName] of Object.entries(manifest)) {
const entry = zip.file(numericKey);
if (entry) out.set(originalName, entry);
}
return out;
}
// Internal extract returns everything except media — that's plumbed in
// at the parseApkg layer so the SQLite-only path stays focused.
type ExtractResult = Omit<ParsedAnki, 'mediaByFilename'>;
function extract(db: Database): ExtractResult {
const colRow = db.exec('SELECT models, decks FROM col LIMIT 1');
if (colRow.length === 0 || colRow[0].values.length === 0) {
throw new Error('Anki-Collection ist leer.');
}
const [modelsJson, decksJson] = colRow[0].values[0] as [string, string];
const models: Record<string, AnkiModel> = JSON.parse(modelsJson);
const decksMap: Record<string, AnkiDeckJson> = JSON.parse(decksJson);
const decks: ParsedDeck[] = Object.values(decksMap)
.filter((d) => d.id !== 1) // Anki's "Default" deck has id 1; skip if empty later
.map((d) => ({ ankiId: String(d.id), name: d.name }));
// Pre-load notes into a Map so we don't hit SQLite per card.
type NoteRow = { id: string; mid: string; flds: string };
const notesById = new Map<string, NoteRow>();
const notesRes = db.exec('SELECT id, mid, flds FROM notes');
if (notesRes.length > 0) {
for (const row of notesRes[0].values) {
const [id, mid, flds] = row as [number, number, string];
notesById.set(String(id), { id: String(id), mid: String(mid), flds });
}
}
const warnings: string[] = [];
const cards: ParsedCard[] = [];
let skipped = 0;
const cardsRes = db.exec('SELECT nid, did, ord FROM cards');
if (cardsRes.length === 0)
return { decks, cards: [], skipped: 0, warnings: ['Keine Karten gefunden.'] };
// We dedupe at the note level — Anki stores one DB-row per generated
// card (basic-reverse = 2 rows, cloze cluster c1+c2 = 2 rows). Our
// model regenerates these from `type` + `fields` automatically, so
// pulling each note once is enough.
const seenNotes = new Set<string>();
for (const row of cardsRes[0].values) {
const [nid, did] = row as [number, number, number];
const noteKey = String(nid);
if (seenNotes.has(noteKey)) continue;
seenNotes.add(noteKey);
const note = notesById.get(noteKey);
if (!note) {
skipped++;
continue;
}
const model = models[note.mid];
if (!model) {
skipped++;
warnings.push(`Note ${nid}: unknown model ${note.mid}`);
continue;
}
const fieldValues = note.flds.split('\x1f');
const result = mapNoteToCard(model, fieldValues);
if (!result) {
skipped++;
continue;
}
cards.push({ ankiDeckId: String(did), ...result });
}
if (skipped > 0) warnings.unshift(`${skipped} Karten übersprungen (unbekannter Typ).`);
return { decks, cards, skipped, warnings };
}
function mapNoteToCard(
model: AnkiModel,
fields: string[]
): { type: CardType; fields: Record<string, string> } | null {
// Cloze: exactly one input field with {{cN::...}} markup.
if (model.type === 1) {
const text = fields[0] ?? '';
return { type: 'cloze', fields: { text, ...(fields[1] ? { extra: fields[1] } : {}) } };
}
// Standard: one or two templates → basic / basic-reverse.
if (model.type === 0) {
const front = fields[0] ?? '';
const back = fields[1] ?? '';
if (model.tmpls.length === 2) {
return { type: 'basic-reverse', fields: { front, back } };
}
// 1 (or unusual N) → treat as basic. Custom multi-card templates
// lose their extra surfaces; the user-typed content survives.
return { type: 'basic', fields: { front, back } };
}
return null;
}
/**
* Convert Anki's HTML / image / sound markup to plain text + Markdown.
*
* Phase-8-MVP: Bilder + Audio werden ersatzlos gedroppt (Option A).
* Ein späterer Media-Pfad (lokaler Cards-Upload-Endpunkt oder mana-media
* via Phase 2 Auth-Föderation) kann hier eine FilenameURL-Map einsetzen,
* die dann zu `<img>` / `<audio>`-Tags expandiert.
*/
export function sanitizeAnkiHtml(html: string): string {
// Bilder + Audio-Refs vollständig entfernen.
const imgStripped = html.replace(/<img\b[^>]*>/gi, '');
const soundStripped = imgStripped.replace(/\[sound:[^\]]+\]/g, '');
return soundStripped
.replace(/<br\s*\/?>/gi, '\n')
.replace(/<\/?(?:b|strong)>/gi, '**')
.replace(/<\/?(?:i|em)>/gi, '*')
.replace(/<\/?p>/gi, '\n')
.replace(/<\/?div>/gi, '\n')
.replace(/<[^>]+>/gi, '')
.replace(/&nbsp;/g, ' ')
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/\n{3,}/g, '\n\n')
.trim();
}

View file

@ -0,0 +1,209 @@
<!--
Anki-Import-UI: Datei wählen → Preview → Importieren → Done.
STRATEGIE-B-AUSNAHME: portiert aus
mana-monorepo/apps/cards/apps/web/src/lib/components/AnkiImport.svelte.
Original-Layout angepasst auf das oklch-Theme im neuen Repo
(var(--color-*)) und auf den server-authoritative Import-Pfad
ohne Media-Upload-Stage. Cloze-Skipped-Anzeige ergänzt — der neue
Importer reicht Cloze direkt durch.
-->
<script lang="ts">
import { parseApkg, type ParsedAnki } from '$lib/anki/parse.ts';
import { importParsedAnki, type ImportResult, type ImportProgress } from '$lib/anki/import.ts';
let fileInput = $state<HTMLInputElement | null>(null);
let stage = $state<'idle' | 'parsing' | 'preview' | 'importing' | 'done' | 'error'>('idle');
let parsed = $state<ParsedAnki | null>(null);
let result = $state<ImportResult | null>(null);
let error = $state<string | null>(null);
let fileName = $state<string>('');
let progress = $state<ImportProgress>({ stage: 'decks', current: 0, total: 0 });
const typeBreakdown = $derived.by(() => {
if (!parsed) return { basic: 0, basicReverse: 0, cloze: 0 };
const counts = { basic: 0, basicReverse: 0, cloze: 0 };
for (const c of parsed.cards) {
if (c.type === 'basic') counts.basic++;
else if (c.type === 'basic-reverse') counts.basicReverse++;
else if (c.type === 'cloze') counts.cloze++;
}
return counts;
});
async function handleFile(file: File) {
error = null;
fileName = file.name;
stage = 'parsing';
try {
parsed = await parseApkg(file);
stage = 'preview';
} catch (e: unknown) {
error = e instanceof Error ? e.message : 'Datei konnte nicht gelesen werden.';
stage = 'error';
}
}
function onPick(e: Event) {
const input = e.currentTarget as HTMLInputElement;
const f = input.files?.[0];
if (f) handleFile(f);
input.value = '';
}
function onDrop(e: DragEvent) {
e.preventDefault();
const f = e.dataTransfer?.files?.[0];
if (f) handleFile(f);
}
async function confirmImport() {
if (!parsed) return;
stage = 'importing';
progress = { stage: 'decks', current: 0, total: parsed.decks.length };
try {
result = await importParsedAnki(parsed, {
onProgress: (p) => {
progress = p;
},
});
stage = 'done';
} catch (e: unknown) {
error = e instanceof Error ? e.message : 'Import fehlgeschlagen.';
stage = 'error';
}
}
function reset() {
stage = 'idle';
parsed = null;
result = null;
error = null;
fileName = '';
}
</script>
<div class="rounded-xl border border-[var(--color-border)] bg-[var(--color-card)] p-4">
<div class="mb-2 text-sm font-medium">Aus Anki importieren</div>
{#if stage === 'idle'}
<!-- svelte-ignore a11y_no_static_element_interactions -->
<!-- svelte-ignore a11y_click_events_have_key_events -->
<div
class="cursor-pointer rounded-lg border-2 border-dashed border-[var(--color-border)] px-4 py-6 text-center text-sm text-[var(--color-muted)] transition-colors hover:border-[var(--color-primary)] hover:text-[var(--color-fg)]"
ondragover={(e) => e.preventDefault()}
ondrop={onDrop}
onclick={() => fileInput?.click()}
>
<div class="mb-1">📦 .apkg-Datei hier ablegen oder klicken</div>
<div class="text-xs">
Basic, Basic + Reverse, Cloze · Bilder + Audio werden in dieser Phase nicht übernommen.
</div>
</div>
<input
bind:this={fileInput}
type="file"
accept=".apkg,.colpkg"
class="hidden"
onchange={onPick}
/>
{:else if stage === 'parsing'}
<div class="py-6 text-center text-sm text-[var(--color-muted)]">Lese {fileName}</div>
{:else if stage === 'preview' && parsed}
<div class="space-y-2 text-sm">
<div>
<span class="text-[var(--color-muted)]">Gefunden in</span>
<code class="rounded bg-[var(--color-border)]/40 px-1 text-xs">{fileName}</code>:
</div>
<ul class="ml-4 list-disc">
<li>{parsed.decks.length} {parsed.decks.length === 1 ? 'Deck' : 'Decks'}</li>
<li>
{parsed.cards.length} {parsed.cards.length === 1 ? 'Karte' : 'Karten'}
{#if parsed.cards.length > 0}
<span class="text-[var(--color-muted)]">
({typeBreakdown.basic} basic, {typeBreakdown.basicReverse} basic-reverse,
{typeBreakdown.cloze} cloze)
</span>
{/if}
</li>
{#if parsed.mediaByFilename.size > 0}
<li class="text-[var(--color-muted)]">
{parsed.mediaByFilename.size} Medien (werden in dieser Phase NICHT übernommen)
</li>
{/if}
{#if parsed.skipped > 0}
<li>{parsed.skipped} übersprungen (unbekannter Typ)</li>
{/if}
</ul>
{#if parsed.warnings.length > 0}
<details class="text-xs text-[var(--color-muted)]">
<summary class="cursor-pointer">Hinweise ({parsed.warnings.length})</summary>
<ul class="mt-1 list-disc pl-4">
{#each parsed.warnings.slice(0, 10) as w (w)}<li>{w}</li>{/each}
</ul>
</details>
{/if}
<div class="flex justify-end gap-2 pt-2">
<button
class="rounded px-3 py-1.5 text-sm text-[var(--color-muted)] hover:text-[var(--color-fg)]"
onclick={reset}
>
Abbrechen
</button>
<button
class="rounded bg-[var(--color-primary)] px-4 py-1.5 text-sm text-[var(--color-primary-fg)] hover:opacity-90"
onclick={confirmImport}
>
Importieren
</button>
</div>
</div>
{:else if stage === 'importing'}
<div class="py-6 text-center text-sm text-[var(--color-muted)]">
{#if progress.stage === 'decks'}
Lege Decks an · {progress.current} / {progress.total}
{:else if progress.stage === 'cards'}
Importiere Karten · {progress.current} / {progress.total}
{:else}
Fertig.
{/if}
<div class="mx-auto mt-3 h-1 w-48 overflow-hidden rounded-full bg-[var(--color-border)]/40">
<div
class="h-full bg-[var(--color-primary)] transition-all"
style="width: {progress.total === 0 ? 0 : (progress.current / progress.total) * 100}%"
></div>
</div>
</div>
{:else if stage === 'done' && result}
<div class="space-y-2 text-sm">
<div class="text-[var(--color-success,#16a34a)]">
{result.cardsCreated} Karten in {result.decksCreated}
{result.decksCreated === 1 ? 'Deck' : 'Decks'} angelegt.
</div>
{#if result.failed > 0}
<details class="text-[var(--color-danger)]">
<summary class="cursor-pointer">{result.failed} Fehler</summary>
<ul class="mt-1 list-disc pl-4 text-xs">
{#each result.failures.slice(0, 20) as msg (msg)}<li>{msg}</li>{/each}
</ul>
</details>
{/if}
<button
class="rounded px-3 py-1.5 text-sm text-[var(--color-muted)] hover:text-[var(--color-fg)]"
onclick={reset}
>
Weitere Datei
</button>
</div>
{:else if stage === 'error'}
<div class="space-y-2 text-sm">
<div class="text-[var(--color-danger)]">Fehler: {error}</div>
<button
class="rounded px-3 py-1.5 text-sm text-[var(--color-muted)] hover:text-[var(--color-fg)]"
onclick={reset}
>
Erneut versuchen
</button>
</div>
{/if}
</div>

View file

@ -27,6 +27,11 @@
class="hover:text-[var(--color-primary)]"
class:font-medium={page.url.pathname.startsWith('/study')}>Lernen</a
>
<a
href="/import"
class="hover:text-[var(--color-primary)]"
class:font-medium={page.url.pathname.startsWith('/import')}>Import</a
>
</nav>
<div class="flex items-center gap-3 text-sm">

View file

@ -0,0 +1,43 @@
<script lang="ts">
import { onMount } from 'svelte';
import { goto } from '$app/navigation';
import { devUser } from '$lib/auth/dev-stub.svelte.ts';
import AnkiImport from '$lib/components/AnkiImport.svelte';
onMount(() => {
if (!devUser.id) {
goto('/');
}
});
</script>
<svelte:head>
<title>Import · Cards</title>
</svelte:head>
<div class="mx-auto max-w-2xl px-4 py-8">
<h1 class="text-2xl font-semibold">Importieren</h1>
<p class="mt-2 text-sm text-[var(--color-muted)]">
Übernimm Decks und Karten aus einer Anki-Datei (<code>.apkg</code> oder <code>.colpkg</code>).
FSRS-Verlauf wird nicht übernommen — alle Karten starten als „neu".
</p>
<div class="mt-6">
<AnkiImport />
</div>
<aside class="mt-8 rounded-lg border border-dashed border-[var(--color-border)] p-4 text-xs text-[var(--color-muted)]">
<div class="mb-1 font-medium text-[var(--color-fg)]">Was wird übernommen</div>
<ul class="list-disc pl-4">
<li>Decks (Anki-Hierarchie <code>Foo::Bar</code> wird zu <code>Foo / Bar</code>).</li>
<li>Basic + Basic-Reverse: Front/Back direkt.</li>
<li>Cloze: <code>{'{{c1::…}}'}</code> wird mit Sub-Index pro Cluster angelegt.</li>
</ul>
<div class="mt-2 mb-1 font-medium text-[var(--color-fg)]">Was nicht übernommen wird</div>
<ul class="list-disc pl-4">
<li>Bilder + Audio (kommen mit der Plattform-Anbindung in einer späteren Phase).</li>
<li>FSRS-Lernverlauf (Anki-Reviews werden bewusst neu aufgesetzt).</li>
<li>Add-on-spezifische Card-Types (image-occlusion etc.).</li>
</ul>
</aside>
</div>