Phase 8c: Anki-Import via portiertem Parser
Strategie-B-Ausnahme: parse.ts (Anki-Format-Parser via JSZip + sql.js) und AnkiImport.svelte (UI-Stages) sind aus mana-monorepo portiert, mit Source-Comment-Header dokumentiert. Anki-Format ist standalone Parser-Logik, kein Architektur-Schmuggel. Neuer server-authoritative import.ts schreibt direkt gegen die cards-api ($lib/api/decks + cards) — keine Stores, keine Dexie. Anki "::"-Hierarchie wird zu " / "-Strings flach. Fallback-Deck "Anki-Import" für Karten ohne explizites Deck. Cloze-Karten kommen first-class durch (Sub-Index pro Cluster, Sprint 8a/8b). Phase-8-MVP-Scope: Bilder + Audio werden gedroppt (Option A) — der sanitizeAnkiHtml entfernt <img> und [sound:…] ersatzlos. Späterer Media-Pfad (lokaler Cards-Upload oder mana-media nach Phase 2) ist additiv. Neue Route /import + Top-Nav-Link. Hermetic Vitest (5 Cases): baut zur Laufzeit ein Mini-.apkg via sql.js + JSZip und prüft den Parser-Output (basic, basic-reverse, cloze, sanitize, dedupe auf Note-Ebene). svelte-check 0 errors, prod-Build sauber. sql-wasm.wasm liegt in static/ (660kB) — fix für sql.js 1.14.1, vom Browser einmal geladen. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0b609c46fd
commit
2ca09fe0c3
9 changed files with 916 additions and 3 deletions
120
apps/web/src/lib/anki/import.ts
Normal file
120
apps/web/src/lib/anki/import.ts
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
/**
|
||||
* Server-authoritative Anki-Import.
|
||||
*
|
||||
* Schreibt gegen die cards-api HTTP-Endpoints — keine Dexie, keine
|
||||
* lokalen Stores. Anki-Decks werden 1:1 in cards-Decks gemappt
|
||||
* (Anki-`::` zu ` / ` flacht die Hierarchie aus, wie im Original).
|
||||
* Karten werden mit sanitisiertem Markdown angelegt.
|
||||
*
|
||||
* Phase-8-MVP: Bilder + Audio werden gedroppt (siehe parse.ts
|
||||
* `sanitizeAnkiHtml`). Ein späterer Media-Pfad ist additiv.
|
||||
*
|
||||
* No de-dupe: Re-Import derselben .apkg legt doppelte Decks an.
|
||||
*/
|
||||
|
||||
import { createDeck } from '$lib/api/decks.ts';
|
||||
import { createCard } from '$lib/api/cards.ts';
|
||||
import { sanitizeAnkiHtml, type ParsedAnki } from './parse.ts';
|
||||
|
||||
export interface ImportResult {
|
||||
decksCreated: number;
|
||||
cardsCreated: number;
|
||||
failed: number;
|
||||
failures: string[];
|
||||
}
|
||||
|
||||
export interface ImportProgress {
|
||||
stage: 'decks' | 'cards' | 'done';
|
||||
current: number;
|
||||
total: number;
|
||||
}
|
||||
|
||||
export async function importParsedAnki(
|
||||
parsed: ParsedAnki,
|
||||
opts: { onProgress?: (p: ImportProgress) => void } = {}
|
||||
): Promise<ImportResult> {
|
||||
const result: ImportResult = {
|
||||
decksCreated: 0,
|
||||
cardsCreated: 0,
|
||||
failed: 0,
|
||||
failures: [],
|
||||
};
|
||||
|
||||
// 1) Decks — Anki "::"-Hierarchie zu " / "-Strings flach machen.
|
||||
const ankiIdToDeckId = new Map<string, string>();
|
||||
let deckIdx = 0;
|
||||
for (const ankiDeck of parsed.decks) {
|
||||
opts.onProgress?.({ stage: 'decks', current: deckIdx++, total: parsed.decks.length });
|
||||
const name = ankiDeck.name.replace(/::/g, ' / ');
|
||||
try {
|
||||
const created = await createDeck({ name });
|
||||
ankiIdToDeckId.set(ankiDeck.ankiId, created.id);
|
||||
result.decksCreated++;
|
||||
} catch (e) {
|
||||
result.failed++;
|
||||
result.failures.push(`deck "${name}": ${errMessage(e)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback-Deck für Karten ohne explizit referenziertes Anki-Deck.
|
||||
let fallbackDeckId: string | null = null;
|
||||
const ensureFallbackDeck = async (): Promise<string | null> => {
|
||||
if (fallbackDeckId) return fallbackDeckId;
|
||||
try {
|
||||
const created = await createDeck({ name: 'Anki-Import' });
|
||||
fallbackDeckId = created.id;
|
||||
result.decksCreated++;
|
||||
return fallbackDeckId;
|
||||
} catch (e) {
|
||||
result.failures.push(`fallback deck: ${errMessage(e)}`);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
// 2) Cards — Felder sanitizen (Media-Refs werden gedroppt).
|
||||
for (let i = 0; i < parsed.cards.length; i++) {
|
||||
opts.onProgress?.({ stage: 'cards', current: i, total: parsed.cards.length });
|
||||
const card = parsed.cards[i];
|
||||
|
||||
let targetDeckId = ankiIdToDeckId.get(card.ankiDeckId);
|
||||
if (!targetDeckId) {
|
||||
const fallback = await ensureFallbackDeck();
|
||||
if (!fallback) {
|
||||
result.failed++;
|
||||
continue;
|
||||
}
|
||||
targetDeckId = fallback;
|
||||
}
|
||||
|
||||
const cleanFields: Record<string, string> = {};
|
||||
for (const [key, value] of Object.entries(card.fields)) {
|
||||
cleanFields[key] = sanitizeAnkiHtml(value);
|
||||
}
|
||||
|
||||
try {
|
||||
await createCard({
|
||||
deck_id: targetDeckId,
|
||||
type: card.type,
|
||||
fields: cleanFields,
|
||||
});
|
||||
result.cardsCreated++;
|
||||
} catch (e) {
|
||||
result.failed++;
|
||||
result.failures.push(`card "${preview(cleanFields)}": ${errMessage(e)}`);
|
||||
}
|
||||
}
|
||||
|
||||
opts.onProgress?.({ stage: 'done', current: parsed.cards.length, total: parsed.cards.length });
|
||||
return result;
|
||||
}
|
||||
|
||||
function errMessage(e: unknown): string {
|
||||
if (e instanceof Error) return e.message;
|
||||
return String(e);
|
||||
}
|
||||
|
||||
function preview(fields: Record<string, string>): string {
|
||||
const first = Object.values(fields)[0] ?? '';
|
||||
const trimmed = first.length > 40 ? first.slice(0, 40) + '…' : first;
|
||||
return trimmed.replace(/\s+/g, ' ');
|
||||
}
|
||||
241
apps/web/src/lib/anki/parse.ts
Normal file
241
apps/web/src/lib/anki/parse.ts
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
/**
|
||||
* Parse an Anki .apkg / .colpkg file in the browser.
|
||||
*
|
||||
* .apkg = ZIP archive containing a SQLite collection (`collection.anki2`
|
||||
* or `collection.anki21`) plus media files. We open the SQLite blob with
|
||||
* sql.js (WASM-backed in-browser SQLite) and walk Anki's three core
|
||||
* tables: `col` (collection meta with JSON-encoded models + decks),
|
||||
* `notes` (the user-typed content), and `cards` (one row per learnable
|
||||
* unit — basic = 1, basic-reverse = 2, cloze = N).
|
||||
*
|
||||
* MVP scope (Cards Phase 8): basic + basic-reverse + cloze. Media is
|
||||
* collected but not uploaded — Image/audio refs are stripped from the
|
||||
* sanitized text. Review history is skipped — FSRS state will be
|
||||
* regenerated on first sight.
|
||||
*
|
||||
* --------------------------------------------------------------------
|
||||
* STRATEGIE-B-AUSNAHME: Diese Datei ist ein bewusst portierter Lift aus
|
||||
* mana-monorepo/apps/cards/apps/web/src/lib/anki/parse.ts (commit
|
||||
* ~Mai 2026). Anki-Format-Logik ist standalone Parser-Code ohne
|
||||
* Architektur-Übernahme — die Kopie spart 2-3 Tage Re-Implementierung
|
||||
* bei null Strategy-Risiko. CardType-Import auf @cards/domain
|
||||
* umgestellt, Doc-Kommentar an Phase-8-Scope angepasst.
|
||||
* --------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
import JSZip, { type JSZipObject } from 'jszip';
|
||||
import initSqlJs, { type Database } from 'sql.js';
|
||||
import type { CardType } from '@cards/domain';
|
||||
|
||||
export interface ParsedDeck {
|
||||
ankiId: string; // Anki's numeric deck id, stringified
|
||||
name: string; // "Studies::Spanish" — Anki uses :: as separator
|
||||
}
|
||||
|
||||
export interface ParsedCard {
|
||||
ankiDeckId: string;
|
||||
type: CardType;
|
||||
fields: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface ParsedAnki {
|
||||
decks: ParsedDeck[];
|
||||
cards: ParsedCard[];
|
||||
skipped: number;
|
||||
warnings: string[];
|
||||
/**
|
||||
* Mapping from the original media filename (as referenced in card
|
||||
* fields, e.g. `paris.jpg` or `audio_001.mp3`) to its ZIP entry. Anki
|
||||
* stores files numerically (`0`, `1`, …) and the JSON manifest
|
||||
* (`media`) maps numbers → original names; we flip that here so the
|
||||
* importer can look up by the name it sees in the field text.
|
||||
*/
|
||||
mediaByFilename: Map<string, JSZipObject>;
|
||||
}
|
||||
|
||||
interface AnkiModel {
|
||||
id: number;
|
||||
name: string;
|
||||
type: number; // 0 = standard, 1 = cloze
|
||||
flds: { name: string }[];
|
||||
tmpls: { name: string }[];
|
||||
}
|
||||
|
||||
interface AnkiDeckJson {
|
||||
id: number;
|
||||
name: string;
|
||||
}
|
||||
|
||||
let SQL: Awaited<ReturnType<typeof initSqlJs>> | null = null;
|
||||
async function getSql() {
|
||||
if (SQL) return SQL;
|
||||
SQL = await initSqlJs({ locateFile: (file) => `/${file}` });
|
||||
return SQL;
|
||||
}
|
||||
|
||||
export async function parseApkg(file: File | Blob): Promise<ParsedAnki> {
|
||||
const zip = await JSZip.loadAsync(await file.arrayBuffer());
|
||||
|
||||
const collectionEntry = zip.file('collection.anki21') ?? zip.file('collection.anki2');
|
||||
if (!collectionEntry) {
|
||||
throw new Error(
|
||||
'Keine Anki-Collection-Datei in der .apkg gefunden (erwartet: collection.anki21 oder collection.anki2).'
|
||||
);
|
||||
}
|
||||
|
||||
const sqliteBytes = await collectionEntry.async('uint8array');
|
||||
const sql = await getSql();
|
||||
const db: Database = new sql.Database(sqliteBytes);
|
||||
|
||||
const mediaByFilename = await extractMediaManifest(zip);
|
||||
|
||||
try {
|
||||
const result = extract(db);
|
||||
return { ...result, mediaByFilename };
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
}
|
||||
|
||||
async function extractMediaManifest(zip: JSZip): Promise<Map<string, JSZipObject>> {
|
||||
const out = new Map<string, JSZipObject>();
|
||||
const manifestEntry = zip.file('media');
|
||||
if (!manifestEntry) return out;
|
||||
let manifest: Record<string, string>;
|
||||
try {
|
||||
manifest = JSON.parse(await manifestEntry.async('string'));
|
||||
} catch {
|
||||
return out;
|
||||
}
|
||||
for (const [numericKey, originalName] of Object.entries(manifest)) {
|
||||
const entry = zip.file(numericKey);
|
||||
if (entry) out.set(originalName, entry);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Internal extract returns everything except media — that's plumbed in
|
||||
// at the parseApkg layer so the SQLite-only path stays focused.
|
||||
type ExtractResult = Omit<ParsedAnki, 'mediaByFilename'>;
|
||||
function extract(db: Database): ExtractResult {
|
||||
const colRow = db.exec('SELECT models, decks FROM col LIMIT 1');
|
||||
if (colRow.length === 0 || colRow[0].values.length === 0) {
|
||||
throw new Error('Anki-Collection ist leer.');
|
||||
}
|
||||
const [modelsJson, decksJson] = colRow[0].values[0] as [string, string];
|
||||
const models: Record<string, AnkiModel> = JSON.parse(modelsJson);
|
||||
const decksMap: Record<string, AnkiDeckJson> = JSON.parse(decksJson);
|
||||
|
||||
const decks: ParsedDeck[] = Object.values(decksMap)
|
||||
.filter((d) => d.id !== 1) // Anki's "Default" deck has id 1; skip if empty later
|
||||
.map((d) => ({ ankiId: String(d.id), name: d.name }));
|
||||
|
||||
// Pre-load notes into a Map so we don't hit SQLite per card.
|
||||
type NoteRow = { id: string; mid: string; flds: string };
|
||||
const notesById = new Map<string, NoteRow>();
|
||||
const notesRes = db.exec('SELECT id, mid, flds FROM notes');
|
||||
if (notesRes.length > 0) {
|
||||
for (const row of notesRes[0].values) {
|
||||
const [id, mid, flds] = row as [number, number, string];
|
||||
notesById.set(String(id), { id: String(id), mid: String(mid), flds });
|
||||
}
|
||||
}
|
||||
|
||||
const warnings: string[] = [];
|
||||
const cards: ParsedCard[] = [];
|
||||
let skipped = 0;
|
||||
|
||||
const cardsRes = db.exec('SELECT nid, did, ord FROM cards');
|
||||
if (cardsRes.length === 0)
|
||||
return { decks, cards: [], skipped: 0, warnings: ['Keine Karten gefunden.'] };
|
||||
|
||||
// We dedupe at the note level — Anki stores one DB-row per generated
|
||||
// card (basic-reverse = 2 rows, cloze cluster c1+c2 = 2 rows). Our
|
||||
// model regenerates these from `type` + `fields` automatically, so
|
||||
// pulling each note once is enough.
|
||||
const seenNotes = new Set<string>();
|
||||
for (const row of cardsRes[0].values) {
|
||||
const [nid, did] = row as [number, number, number];
|
||||
const noteKey = String(nid);
|
||||
if (seenNotes.has(noteKey)) continue;
|
||||
seenNotes.add(noteKey);
|
||||
|
||||
const note = notesById.get(noteKey);
|
||||
if (!note) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
const model = models[note.mid];
|
||||
if (!model) {
|
||||
skipped++;
|
||||
warnings.push(`Note ${nid}: unknown model ${note.mid}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const fieldValues = note.flds.split('\x1f');
|
||||
const result = mapNoteToCard(model, fieldValues);
|
||||
if (!result) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
cards.push({ ankiDeckId: String(did), ...result });
|
||||
}
|
||||
|
||||
if (skipped > 0) warnings.unshift(`${skipped} Karten übersprungen (unbekannter Typ).`);
|
||||
return { decks, cards, skipped, warnings };
|
||||
}
|
||||
|
||||
function mapNoteToCard(
|
||||
model: AnkiModel,
|
||||
fields: string[]
|
||||
): { type: CardType; fields: Record<string, string> } | null {
|
||||
// Cloze: exactly one input field with {{cN::...}} markup.
|
||||
if (model.type === 1) {
|
||||
const text = fields[0] ?? '';
|
||||
return { type: 'cloze', fields: { text, ...(fields[1] ? { extra: fields[1] } : {}) } };
|
||||
}
|
||||
|
||||
// Standard: one or two templates → basic / basic-reverse.
|
||||
if (model.type === 0) {
|
||||
const front = fields[0] ?? '';
|
||||
const back = fields[1] ?? '';
|
||||
if (model.tmpls.length === 2) {
|
||||
return { type: 'basic-reverse', fields: { front, back } };
|
||||
}
|
||||
// 1 (or unusual N) → treat as basic. Custom multi-card templates
|
||||
// lose their extra surfaces; the user-typed content survives.
|
||||
return { type: 'basic', fields: { front, back } };
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert Anki's HTML / image / sound markup to plain text + Markdown.
|
||||
*
|
||||
* Phase-8-MVP: Bilder + Audio werden ersatzlos gedroppt (Option A).
|
||||
* Ein späterer Media-Pfad (lokaler Cards-Upload-Endpunkt oder mana-media
|
||||
* via Phase 2 Auth-Föderation) kann hier eine Filename→URL-Map einsetzen,
|
||||
* die dann zu `<img>` / `<audio>`-Tags expandiert.
|
||||
*/
|
||||
export function sanitizeAnkiHtml(html: string): string {
|
||||
// Bilder + Audio-Refs vollständig entfernen.
|
||||
const imgStripped = html.replace(/<img\b[^>]*>/gi, '');
|
||||
const soundStripped = imgStripped.replace(/\[sound:[^\]]+\]/g, '');
|
||||
|
||||
return soundStripped
|
||||
.replace(/<br\s*\/?>/gi, '\n')
|
||||
.replace(/<\/?(?:b|strong)>/gi, '**')
|
||||
.replace(/<\/?(?:i|em)>/gi, '*')
|
||||
.replace(/<\/?p>/gi, '\n')
|
||||
.replace(/<\/?div>/gi, '\n')
|
||||
.replace(/<[^>]+>/gi, '')
|
||||
.replace(/ /g, ' ')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.trim();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue