diff --git a/apps/web/package.json b/apps/web/package.json index a41dbfa..fa8d802 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -17,7 +17,9 @@ "dependencies": { "@cards/domain": "workspace:*", "dompurify": "^3.4.2", - "marked": "^18.0.3" + "jszip": "^3.10.1", + "marked": "^18.0.3", + "sql.js": "^1.14.1" }, "devDependencies": { "@sveltejs/adapter-node": "^5.2.0", @@ -25,6 +27,8 @@ "@sveltejs/vite-plugin-svelte": "^4.0.0", "@tailwindcss/vite": "^4.2.4", "@types/dompurify": "^3.2.0", + "@types/jszip": "^3.4.1", + "@types/sql.js": "^1.4.11", "svelte": "^5.0.0", "svelte-check": "^4.0.0", "tailwindcss": "^4.2.4", diff --git a/apps/web/src/lib/anki/import.ts b/apps/web/src/lib/anki/import.ts new file mode 100644 index 0000000..45442b1 --- /dev/null +++ b/apps/web/src/lib/anki/import.ts @@ -0,0 +1,120 @@ +/** + * Server-authoritative Anki-Import. + * + * Schreibt gegen die cards-api HTTP-Endpoints — keine Dexie, keine + * lokalen Stores. Anki-Decks werden 1:1 in cards-Decks gemappt + * (Anki-`::` zu ` / ` flacht die Hierarchie aus, wie im Original). + * Karten werden mit sanitisiertem Markdown angelegt. + * + * Phase-8-MVP: Bilder + Audio werden gedroppt (siehe parse.ts + * `sanitizeAnkiHtml`). Ein späterer Media-Pfad ist additiv. + * + * No de-dupe: Re-Import derselben .apkg legt doppelte Decks an. + */ + +import { createDeck } from '$lib/api/decks.ts'; +import { createCard } from '$lib/api/cards.ts'; +import { sanitizeAnkiHtml, type ParsedAnki } from './parse.ts'; + +export interface ImportResult { + decksCreated: number; + cardsCreated: number; + failed: number; + failures: string[]; +} + +export interface ImportProgress { + stage: 'decks' | 'cards' | 'done'; + current: number; + total: number; +} + +export async function importParsedAnki( + parsed: ParsedAnki, + opts: { onProgress?: (p: ImportProgress) => void } = {} +): Promise { + const result: ImportResult = { + decksCreated: 0, + cardsCreated: 0, + failed: 0, + failures: [], + }; + + // 1) Decks — Anki "::"-Hierarchie zu " / "-Strings flach machen. + const ankiIdToDeckId = new Map(); + let deckIdx = 0; + for (const ankiDeck of parsed.decks) { + opts.onProgress?.({ stage: 'decks', current: deckIdx++, total: parsed.decks.length }); + const name = ankiDeck.name.replace(/::/g, ' / '); + try { + const created = await createDeck({ name }); + ankiIdToDeckId.set(ankiDeck.ankiId, created.id); + result.decksCreated++; + } catch (e) { + result.failed++; + result.failures.push(`deck "${name}": ${errMessage(e)}`); + } + } + + // Fallback-Deck für Karten ohne explizit referenziertes Anki-Deck. + let fallbackDeckId: string | null = null; + const ensureFallbackDeck = async (): Promise => { + if (fallbackDeckId) return fallbackDeckId; + try { + const created = await createDeck({ name: 'Anki-Import' }); + fallbackDeckId = created.id; + result.decksCreated++; + return fallbackDeckId; + } catch (e) { + result.failures.push(`fallback deck: ${errMessage(e)}`); + return null; + } + }; + + // 2) Cards — Felder sanitizen (Media-Refs werden gedroppt). + for (let i = 0; i < parsed.cards.length; i++) { + opts.onProgress?.({ stage: 'cards', current: i, total: parsed.cards.length }); + const card = parsed.cards[i]; + + let targetDeckId = ankiIdToDeckId.get(card.ankiDeckId); + if (!targetDeckId) { + const fallback = await ensureFallbackDeck(); + if (!fallback) { + result.failed++; + continue; + } + targetDeckId = fallback; + } + + const cleanFields: Record = {}; + for (const [key, value] of Object.entries(card.fields)) { + cleanFields[key] = sanitizeAnkiHtml(value); + } + + try { + await createCard({ + deck_id: targetDeckId, + type: card.type, + fields: cleanFields, + }); + result.cardsCreated++; + } catch (e) { + result.failed++; + result.failures.push(`card "${preview(cleanFields)}": ${errMessage(e)}`); + } + } + + opts.onProgress?.({ stage: 'done', current: parsed.cards.length, total: parsed.cards.length }); + return result; +} + +function errMessage(e: unknown): string { + if (e instanceof Error) return e.message; + return String(e); +} + +function preview(fields: Record): string { + const first = Object.values(fields)[0] ?? ''; + const trimmed = first.length > 40 ? first.slice(0, 40) + '…' : first; + return trimmed.replace(/\s+/g, ' '); +} diff --git a/apps/web/src/lib/anki/parse.ts b/apps/web/src/lib/anki/parse.ts new file mode 100644 index 0000000..193bf53 --- /dev/null +++ b/apps/web/src/lib/anki/parse.ts @@ -0,0 +1,241 @@ +/** + * Parse an Anki .apkg / .colpkg file in the browser. + * + * .apkg = ZIP archive containing a SQLite collection (`collection.anki2` + * or `collection.anki21`) plus media files. We open the SQLite blob with + * sql.js (WASM-backed in-browser SQLite) and walk Anki's three core + * tables: `col` (collection meta with JSON-encoded models + decks), + * `notes` (the user-typed content), and `cards` (one row per learnable + * unit — basic = 1, basic-reverse = 2, cloze = N). + * + * MVP scope (Cards Phase 8): basic + basic-reverse + cloze. Media is + * collected but not uploaded — Image/audio refs are stripped from the + * sanitized text. Review history is skipped — FSRS state will be + * regenerated on first sight. + * + * -------------------------------------------------------------------- + * STRATEGIE-B-AUSNAHME: Diese Datei ist ein bewusst portierter Lift aus + * mana-monorepo/apps/cards/apps/web/src/lib/anki/parse.ts (commit + * ~Mai 2026). Anki-Format-Logik ist standalone Parser-Code ohne + * Architektur-Übernahme — die Kopie spart 2-3 Tage Re-Implementierung + * bei null Strategy-Risiko. CardType-Import auf @cards/domain + * umgestellt, Doc-Kommentar an Phase-8-Scope angepasst. + * -------------------------------------------------------------------- + */ + +import JSZip, { type JSZipObject } from 'jszip'; +import initSqlJs, { type Database } from 'sql.js'; +import type { CardType } from '@cards/domain'; + +export interface ParsedDeck { + ankiId: string; // Anki's numeric deck id, stringified + name: string; // "Studies::Spanish" — Anki uses :: as separator +} + +export interface ParsedCard { + ankiDeckId: string; + type: CardType; + fields: Record; +} + +export interface ParsedAnki { + decks: ParsedDeck[]; + cards: ParsedCard[]; + skipped: number; + warnings: string[]; + /** + * Mapping from the original media filename (as referenced in card + * fields, e.g. `paris.jpg` or `audio_001.mp3`) to its ZIP entry. Anki + * stores files numerically (`0`, `1`, …) and the JSON manifest + * (`media`) maps numbers → original names; we flip that here so the + * importer can look up by the name it sees in the field text. + */ + mediaByFilename: Map; +} + +interface AnkiModel { + id: number; + name: string; + type: number; // 0 = standard, 1 = cloze + flds: { name: string }[]; + tmpls: { name: string }[]; +} + +interface AnkiDeckJson { + id: number; + name: string; +} + +let SQL: Awaited> | null = null; +async function getSql() { + if (SQL) return SQL; + SQL = await initSqlJs({ locateFile: (file) => `/${file}` }); + return SQL; +} + +export async function parseApkg(file: File | Blob): Promise { + const zip = await JSZip.loadAsync(await file.arrayBuffer()); + + const collectionEntry = zip.file('collection.anki21') ?? zip.file('collection.anki2'); + if (!collectionEntry) { + throw new Error( + 'Keine Anki-Collection-Datei in der .apkg gefunden (erwartet: collection.anki21 oder collection.anki2).' + ); + } + + const sqliteBytes = await collectionEntry.async('uint8array'); + const sql = await getSql(); + const db: Database = new sql.Database(sqliteBytes); + + const mediaByFilename = await extractMediaManifest(zip); + + try { + const result = extract(db); + return { ...result, mediaByFilename }; + } finally { + db.close(); + } +} + +async function extractMediaManifest(zip: JSZip): Promise> { + const out = new Map(); + const manifestEntry = zip.file('media'); + if (!manifestEntry) return out; + let manifest: Record; + try { + manifest = JSON.parse(await manifestEntry.async('string')); + } catch { + return out; + } + for (const [numericKey, originalName] of Object.entries(manifest)) { + const entry = zip.file(numericKey); + if (entry) out.set(originalName, entry); + } + return out; +} + +// Internal extract returns everything except media — that's plumbed in +// at the parseApkg layer so the SQLite-only path stays focused. +type ExtractResult = Omit; +function extract(db: Database): ExtractResult { + const colRow = db.exec('SELECT models, decks FROM col LIMIT 1'); + if (colRow.length === 0 || colRow[0].values.length === 0) { + throw new Error('Anki-Collection ist leer.'); + } + const [modelsJson, decksJson] = colRow[0].values[0] as [string, string]; + const models: Record = JSON.parse(modelsJson); + const decksMap: Record = JSON.parse(decksJson); + + const decks: ParsedDeck[] = Object.values(decksMap) + .filter((d) => d.id !== 1) // Anki's "Default" deck has id 1; skip if empty later + .map((d) => ({ ankiId: String(d.id), name: d.name })); + + // Pre-load notes into a Map so we don't hit SQLite per card. + type NoteRow = { id: string; mid: string; flds: string }; + const notesById = new Map(); + const notesRes = db.exec('SELECT id, mid, flds FROM notes'); + if (notesRes.length > 0) { + for (const row of notesRes[0].values) { + const [id, mid, flds] = row as [number, number, string]; + notesById.set(String(id), { id: String(id), mid: String(mid), flds }); + } + } + + const warnings: string[] = []; + const cards: ParsedCard[] = []; + let skipped = 0; + + const cardsRes = db.exec('SELECT nid, did, ord FROM cards'); + if (cardsRes.length === 0) + return { decks, cards: [], skipped: 0, warnings: ['Keine Karten gefunden.'] }; + + // We dedupe at the note level — Anki stores one DB-row per generated + // card (basic-reverse = 2 rows, cloze cluster c1+c2 = 2 rows). Our + // model regenerates these from `type` + `fields` automatically, so + // pulling each note once is enough. + const seenNotes = new Set(); + for (const row of cardsRes[0].values) { + const [nid, did] = row as [number, number, number]; + const noteKey = String(nid); + if (seenNotes.has(noteKey)) continue; + seenNotes.add(noteKey); + + const note = notesById.get(noteKey); + if (!note) { + skipped++; + continue; + } + const model = models[note.mid]; + if (!model) { + skipped++; + warnings.push(`Note ${nid}: unknown model ${note.mid}`); + continue; + } + + const fieldValues = note.flds.split('\x1f'); + const result = mapNoteToCard(model, fieldValues); + if (!result) { + skipped++; + continue; + } + cards.push({ ankiDeckId: String(did), ...result }); + } + + if (skipped > 0) warnings.unshift(`${skipped} Karten übersprungen (unbekannter Typ).`); + return { decks, cards, skipped, warnings }; +} + +function mapNoteToCard( + model: AnkiModel, + fields: string[] +): { type: CardType; fields: Record } | null { + // Cloze: exactly one input field with {{cN::...}} markup. + if (model.type === 1) { + const text = fields[0] ?? ''; + return { type: 'cloze', fields: { text, ...(fields[1] ? { extra: fields[1] } : {}) } }; + } + + // Standard: one or two templates → basic / basic-reverse. + if (model.type === 0) { + const front = fields[0] ?? ''; + const back = fields[1] ?? ''; + if (model.tmpls.length === 2) { + return { type: 'basic-reverse', fields: { front, back } }; + } + // 1 (or unusual N) → treat as basic. Custom multi-card templates + // lose their extra surfaces; the user-typed content survives. + return { type: 'basic', fields: { front, back } }; + } + + return null; +} + +/** + * Convert Anki's HTML / image / sound markup to plain text + Markdown. + * + * Phase-8-MVP: Bilder + Audio werden ersatzlos gedroppt (Option A). + * Ein späterer Media-Pfad (lokaler Cards-Upload-Endpunkt oder mana-media + * via Phase 2 Auth-Föderation) kann hier eine Filename→URL-Map einsetzen, + * die dann zu `` / `