diff --git a/apps/cards/apps/web/src/lib/anki/import.ts b/apps/cards/apps/web/src/lib/anki/import.ts index 5614146cd..b01176c17 100644 --- a/apps/cards/apps/web/src/lib/anki/import.ts +++ b/apps/cards/apps/web/src/lib/anki/import.ts @@ -2,9 +2,14 @@ * Apply a `ParsedAnki` to the local DB. * * Strategy: every Anki deck becomes one of our decks (1:1, name-mapped). - * Card content is HTML-sanitized to plain Markdown before save. Reviews - * are auto-generated by reviewStore.ensureReviewsForCard — the imported - * cards become "new" in the FSRS sense, no inherited schedule. + * Card content is HTML-sanitized to plain Markdown / inline media tags + * before save. Reviews are auto-generated by reviewStore.ensureReviewsForCard + * — the imported cards become "new" in the FSRS sense, no inherited schedule. + * + * Media: every referenced file is uploaded to mana-media first; the + * resulting URL replaces the original Anki filename in the field text. + * Files referenced from no card are skipped — many Anki decks bundle + * orphaned media that bloats the upload time. * * No de-dupe: re-importing the same .apkg adds duplicate decks. The UI * warns about this once we decide it matters. @@ -12,20 +17,135 @@ import { deckStore } from '../stores/decks.svelte'; import { cardStore } from '../stores/cards.svelte'; +import { uploadCardMedia, MediaUploadError } from '../media/upload'; import { sanitizeAnkiHtml, type ParsedAnki } from './parse'; export interface ImportResult { decksCreated: number; cardsCreated: number; + mediaUploaded: number; + mediaFailed: number; failed: number; } -export async function importParsedAnki(parsed: ParsedAnki): Promise { - const result: ImportResult = { decksCreated: 0, cardsCreated: 0, failed: 0 }; +export interface MediaProgress { + uploaded: number; + total: number; +} - // Anki deck names use "::" as a separator for nesting — flatten with - // a slash so the user sees a meaningful single-line title and we - // don't have to invent a hierarchy concept yet. +const MEDIA_CONCURRENCY = 4; +// Anki's always quotes; we also catch [sound:foo.mp3]. +const IMG_RE = /]*\bsrc=["']([^"']+)["']/gi; +const SOUND_RE = /\[sound:([^\]]+)\]/g; + +function collectMediaRefs(parsed: ParsedAnki): Set { + const refs = new Set(); + for (const card of parsed.cards) { + for (const value of Object.values(card.fields)) { + let m: RegExpExecArray | null; + IMG_RE.lastIndex = 0; + while ((m = IMG_RE.exec(value))) refs.add(m[1]); + SOUND_RE.lastIndex = 0; + while ((m = SOUND_RE.exec(value))) refs.add(m[1]); + } + } + return refs; +} + +async function uploadOne( + filename: string, + parsed: ParsedAnki +): Promise<{ filename: string; url: string | null }> { + const entry = parsed.mediaByFilename.get(filename); + if (!entry) return { filename, url: null }; + try { + const blob = await entry.async('blob'); + const file = new File([blob], filename, { type: guessMime(filename) }); + const media = await uploadCardMedia(file); + return { filename, url: media.url }; + } catch (e) { + if (e instanceof MediaUploadError) { + console.warn(`[anki] media upload failed: ${filename}`, e.message); + } else { + console.warn(`[anki] media upload failed: ${filename}`, e); + } + return { filename, url: null }; + } +} + +function guessMime(filename: string): string { + const ext = filename.split('.').pop()?.toLowerCase() ?? ''; + const map: Record = { + jpg: 'image/jpeg', + jpeg: 'image/jpeg', + png: 'image/png', + gif: 'image/gif', + webp: 'image/webp', + svg: 'image/svg+xml', + mp3: 'audio/mpeg', + ogg: 'audio/ogg', + oga: 'audio/ogg', + wav: 'audio/wav', + m4a: 'audio/mp4', + mp4: 'video/mp4', + webm: 'video/webm', + }; + return map[ext] ?? 'application/octet-stream'; +} + +async function uploadAllMedia( + parsed: ParsedAnki, + onProgress?: (p: MediaProgress) => void +): Promise<{ urlByFilename: Map; uploaded: number; failed: number }> { + const referenced = [...collectMediaRefs(parsed)].filter((f) => parsed.mediaByFilename.has(f)); + const urlByFilename = new Map(); + let uploaded = 0; + let failed = 0; + + if (referenced.length === 0) { + onProgress?.({ uploaded: 0, total: 0 }); + return { urlByFilename, uploaded, failed }; + } + + let nextIdx = 0; + async function worker() { + while (true) { + const idx = nextIdx++; + if (idx >= referenced.length) return; + const result = await uploadOne(referenced[idx], parsed); + if (result.url) { + urlByFilename.set(result.filename, result.url); + uploaded++; + } else { + failed++; + } + onProgress?.({ uploaded: uploaded + failed, total: referenced.length }); + } + } + + await Promise.all(Array.from({ length: MEDIA_CONCURRENCY }, () => worker())); + return { urlByFilename, uploaded, failed }; +} + +export async function importParsedAnki( + parsed: ParsedAnki, + opts: { onMediaProgress?: (p: MediaProgress) => void } = {} +): Promise { + const result: ImportResult = { + decksCreated: 0, + cardsCreated: 0, + mediaUploaded: 0, + mediaFailed: 0, + failed: 0, + }; + + // 1) Media — upload before any cards so the field-text rewrite has + // real URLs to point at. Empty in the no-media case. + const { urlByFilename, uploaded, failed } = await uploadAllMedia(parsed, opts.onMediaProgress); + result.mediaUploaded = uploaded; + result.mediaFailed = failed; + + // 2) Decks — Anki "::" hierarchy flattened to " / ". const ankiIdToDeckId = new Map(); for (const ankiDeck of parsed.decks) { const title = ankiDeck.name.replace(/::/g, ' / '); @@ -38,9 +158,8 @@ export async function importParsedAnki(parsed: ParsedAnki): Promise { let id: string | null = null; return async () => { @@ -57,7 +176,8 @@ export async function importParsedAnki(parsed: ParsedAnki): Promise(); + // 3) Cards — sanitize each field with the media URL map. + const orderByDeck = new Map(); for (const card of parsed.cards) { let targetDeckId = ankiIdToDeckId.get(card.ankiDeckId); if (!targetDeckId) { @@ -71,7 +191,7 @@ export async function importParsedAnki(parsed: ParsedAnki): Promise = {}; for (const [key, value] of Object.entries(card.fields)) { - cleanFields[key] = sanitizeAnkiHtml(value); + cleanFields[key] = sanitizeAnkiHtml(value, urlByFilename); } const order = orderByDeck.get(targetDeckId) ?? 0; diff --git a/apps/cards/apps/web/src/lib/anki/parse.ts b/apps/cards/apps/web/src/lib/anki/parse.ts index 7764bb67a..afccaadc7 100644 --- a/apps/cards/apps/web/src/lib/anki/parse.ts +++ b/apps/cards/apps/web/src/lib/anki/parse.ts @@ -13,7 +13,7 @@ * regenerated on first sight. */ -import JSZip from 'jszip'; +import JSZip, { type JSZipObject } from 'jszip'; import initSqlJs, { type Database } from 'sql.js'; import type { CardType } from '@mana/cards-core'; @@ -33,6 +33,14 @@ export interface ParsedAnki { cards: ParsedCard[]; skipped: number; warnings: string[]; + /** + * Mapping from the original media filename (as referenced in card + * fields, e.g. `paris.jpg` or `audio_001.mp3`) to its ZIP entry. Anki + * stores files numerically (`0`, `1`, …) and the JSON manifest + * (`media`) maps numbers → original names; we flip that here so the + * importer can look up by the name it sees in the field text. + */ + mediaByFilename: Map; } interface AnkiModel { @@ -69,14 +77,37 @@ export async function parseApkg(file: File | Blob): Promise { const sql = await getSql(); const db: Database = new sql.Database(sqliteBytes); + const mediaByFilename = await extractMediaManifest(zip); + try { - return extract(db); + const result = extract(db); + return { ...result, mediaByFilename }; } finally { db.close(); } } -function extract(db: Database): ParsedAnki { +async function extractMediaManifest(zip: JSZip): Promise> { + const out = new Map(); + const manifestEntry = zip.file('media'); + if (!manifestEntry) return out; + let manifest: Record; + try { + manifest = JSON.parse(await manifestEntry.async('string')); + } catch { + return out; + } + for (const [numericKey, originalName] of Object.entries(manifest)) { + const entry = zip.file(numericKey); + if (entry) out.set(originalName, entry); + } + return out; +} + +// Internal extract returns everything except media — that's plumbed in +// at the parseApkg layer so the SQLite-only path stays focused. +type ExtractResult = Omit; +function extract(db: Database): ExtractResult { const colRow = db.exec('SELECT models, decks FROM col LIMIT 1'); if (colRow.length === 0 || colRow[0].values.length === 0) { throw new Error('Anki-Collection ist leer.'); @@ -169,25 +200,48 @@ function mapNoteToCard( return null; } -/** Strip Anki's HTML / image / sound markup down to plain text + Markdown. - * Conservative — keeps line breaks and bold/italic but strips images - * and sound refs (Phase-2 will re-import media). */ -export function sanitizeAnkiHtml(html: string): string { - return html - .replace(/]*>/g, '') - .replace(/\[sound:[^\]]+\]/g, '') - .replace(//gi, '\n') - .replace(/<\/?(?:b|strong)>/gi, '**') - .replace(/<\/?(?:i|em)>/gi, '*') - .replace(/<\/?p>/gi, '\n') - .replace(/<\/?div>/gi, '\n') - .replace(/<[^>]+>/g, '') // drop remaining tags - .replace(/ /g, ' ') - .replace(/&/g, '&') - .replace(/</g, '<') - .replace(/>/g, '>') - .replace(/"/g, '"') - .replace(/'/g, "'") - .replace(/\n{3,}/g, '\n\n') - .trim(); +/** + * Convert Anki's HTML / image / sound markup to plain text + Markdown. + * + * `mediaUrlByFilename` maps the filename Anki references in the field + * (e.g. `paris.jpg` for `` or `audio.mp3` for + * `[sound:audio.mp3]`) to its post-upload URL on mana-media. Anything + * not in the map is dropped silently — same as the no-media path. + */ +export function sanitizeAnkiHtml( + html: string, + mediaUrlByFilename: Map = new Map() +): string { + const imgReplaced = html.replace( + /]*\bsrc=["']([^"']+)["'][^>]*>/gi, + (_, src: string) => { + const url = mediaUrlByFilename.get(src); + return url ? `` : ''; + } + ); + const soundReplaced = imgReplaced.replace(/\[sound:([^\]]+)\]/g, (_, name: string) => { + const url = mediaUrlByFilename.get(name); + return url ? `` : ''; + }); + + return ( + soundReplaced + .replace(//gi, '\n') + .replace(/<\/?(?:b|strong)>/gi, '**') + .replace(/<\/?(?:i|em)>/gi, '*') + .replace(/<\/?p>/gi, '\n') + .replace(/<\/?div>/gi, '\n') + // Drop remaining HTML tags except the ones we just emitted + // (img/audio/video/source) — those need to survive into the + // rendered card. Negative lookahead does that in one pass. + .replace(/<(?!\/?(?:img|audio|video|source)\b)[^>]+>/gi, '') + .replace(/ /g, ' ') + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/\n{3,}/g, '\n\n') + .trim() + ); } diff --git a/apps/cards/apps/web/src/lib/components/AnkiImport.svelte b/apps/cards/apps/web/src/lib/components/AnkiImport.svelte index 658f55968..7b4c938a8 100644 --- a/apps/cards/apps/web/src/lib/components/AnkiImport.svelte +++ b/apps/cards/apps/web/src/lib/components/AnkiImport.svelte @@ -3,11 +3,16 @@ import { importParsedAnki, type ImportResult } from '$lib/anki/import'; let fileInput = $state(null); - let stage = $state<'idle' | 'parsing' | 'preview' | 'importing' | 'done' | 'error'>('idle'); + let stage = $state< + 'idle' | 'parsing' | 'preview' | 'uploading-media' | 'importing' | 'done' | 'error' + >('idle'); let parsed = $state(null); let result = $state(null); let error = $state(null); let fileName = $state(''); + let mediaProgress = $state<{ uploaded: number; total: number }>({ uploaded: 0, total: 0 }); + + const mediaCount = $derived(parsed?.mediaByFilename.size ?? 0); async function handleFile(file: File) { error = null; @@ -37,9 +42,17 @@ async function confirmImport() { if (!parsed) return; - stage = 'importing'; + mediaProgress = { uploaded: 0, total: mediaCount }; + stage = mediaCount > 0 ? 'uploading-media' : 'importing'; try { - result = await importParsedAnki(parsed); + result = await importParsedAnki(parsed, { + onMediaProgress: (p) => { + mediaProgress = p; + if (p.uploaded >= p.total && stage === 'uploading-media') { + stage = 'importing'; + } + }, + }); stage = 'done'; } catch (e: any) { error = e?.message ?? 'Import fehlgeschlagen.'; @@ -70,7 +83,7 @@ >
📦 .apkg-Datei hier ablegen oder klicken
- Basic, Basic + Reverse und Cloze werden importiert. Bilder/Audio bleiben raus. + Basic, Basic + Reverse, Cloze · Bilder + Audio werden mit übernommen.
  • {parsed.decks.length} {parsed.decks.length === 1 ? 'Deck' : 'Decks'}
  • {parsed.cards.length} {parsed.cards.length === 1 ? 'Karte' : 'Karten'}
  • + {#if mediaCount > 0} +
  • {mediaCount} Medien (Bilder/Audio)
  • + {/if} {#if parsed.skipped > 0}
  • {parsed.skipped} übersprungen (unbekannter Typ)
  • {/if} @@ -118,6 +134,18 @@ + {:else if stage === 'uploading-media'} +
    +
    Lade Medien hoch · {mediaProgress.uploaded} / {mediaProgress.total}
    +
    +
    +
    +
    {:else if stage === 'importing'}
    Importiere {parsed?.cards.length ?? 0} Karten… @@ -128,6 +156,13 @@ ✓ {result.cardsCreated} Karten in {result.decksCreated} {result.decksCreated === 1 ? 'Deck' : 'Decks'} angelegt.
    + {#if result.mediaUploaded > 0 || result.mediaFailed > 0} +
    + {result.mediaUploaded} Medien übernommen{#if result.mediaFailed > 0} + · {result.mediaFailed} fehlgeschlagen + {/if} +
    + {/if} {#if result.failed > 0}
    {result.failed} Karten konnten nicht angelegt werden.
    {/if}