diff --git a/apps/api/package.json b/apps/api/package.json index 0b48a36..f34e956 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -19,8 +19,9 @@ }, "dependencies": { "@cards/domain": "workspace:*", - "hono": "^4.6.0", "drizzle-orm": "0.38", + "hono": "^4.6.0", + "minio": "^8.0.7", "postgres": "^3.4.0", "zod": "3", "zod-to-json-schema": "^3.23.0" diff --git a/apps/api/src/db/schema/index.ts b/apps/api/src/db/schema/index.ts index a3dd7be..365dd64 100644 --- a/apps/api/src/db/schema/index.ts +++ b/apps/api/src/db/schema/index.ts @@ -22,8 +22,13 @@ export type { export { tags } from './tags.ts'; export type { TagRow, TagInsert } from './tags.ts'; -export { mediaRefs } from './media.ts'; -export type { MediaRefRow, MediaRefInsert } from './media.ts'; +export { mediaRefs, mediaFiles } from './media.ts'; +export type { + MediaRefRow, + MediaRefInsert, + MediaFileRow, + MediaFileInsert, +} from './media.ts'; export { importJobs } from './imports.ts'; export type { ImportJobRow, ImportJobInsert } from './imports.ts'; diff --git a/apps/api/src/db/schema/media.ts b/apps/api/src/db/schema/media.ts index b23c4db..3941c13 100644 --- a/apps/api/src/db/schema/media.ts +++ b/apps/api/src/db/schema/media.ts @@ -4,9 +4,42 @@ import { cardsSchema } from './_schema.ts'; import { cards } from './cards.ts'; /** - * Media-Verweise auf Object-IDs in mana-media. Die eigentlichen Files - * (Bilder, Audio, Video) liegen in MinIO via mana-media; diese Tabelle - * hält nur den Verweis + Sortier-Order pro Karte. + * Media-Files: Bilder, Audio, Video, die in MinIO unter dem + * `objectKey` liegen und von Karten via cards.media_refs[] + * referenziert werden. + * + * Bewusst ohne FK auf eine konkrete Karte: ein File kann von + * mehreren Karten referenziert werden (z.B. ein Bild für Front + * und Back). Lifecycle-Cleanup per Cron oder DSGVO-Delete. + * + * objectKey-Format: `/.` — UserId-Präfix + * vereinfacht den DSGVO-Delete (Bucket-Prefix-Sweep). + */ +export const mediaFiles = cardsSchema.table( + 'media_files', + { + id: text('id').primaryKey(), + userId: text('user_id').notNull(), + objectKey: text('object_key').notNull(), + mimeType: text('mime_type').notNull(), + originalFilename: text('original_filename'), + sizeBytes: integer('size_bytes').notNull(), + kind: text('kind', { enum: ['image', 'audio', 'video', 'other'] }).notNull(), + createdAt: timestamp('created_at', { withTimezone: true, mode: 'date' }) + .notNull() + .defaultNow(), + }, + (t) => ({ + userIdx: index('media_files_user_idx').on(t.userId), + }) +); + +export type MediaFileRow = typeof mediaFiles.$inferSelect; +export type MediaFileInsert = typeof mediaFiles.$inferInsert; + +/** + * Legacy: media_refs aus Phase 1, Vor-Sprint-15. Bewusst behalten als + * Sortier-Layer-Slot für später (mana-media-Konvergenz). Aktuell leer. */ export const mediaRefs = cardsSchema.table( 'media_refs', diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index f084a05..6ec23d9 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -11,6 +11,7 @@ import { toolsRouter } from './routes/tools.ts'; import { searchRouter } from './routes/search.ts'; import { dsgvoRouter } from './routes/dsgvo.ts'; import { meRouter } from './routes/me.ts'; +import { mediaRouter } from './routes/media.ts'; const app = new Hono(); @@ -41,6 +42,7 @@ app.route('/api/v1/tools', toolsRouter()); app.route('/api/v1/search', searchRouter()); app.route('/api/v1/dsgvo', dsgvoRouter()); app.route('/api/v1/me', meRouter()); +app.route('/api/v1/media', mediaRouter()); app.get('/', (c) => c.json({ diff --git a/apps/api/src/routes/dsgvo.ts b/apps/api/src/routes/dsgvo.ts index fbb7065..22ebd6c 100644 --- a/apps/api/src/routes/dsgvo.ts +++ b/apps/api/src/routes/dsgvo.ts @@ -6,12 +6,14 @@ import { cards, decks, importJobs, + mediaFiles, mediaRefs, reviews, studySessions, tags, } from '../db/schema/index.ts'; import { serviceKeyAuth } from '../middleware/service-key.ts'; +import { getStorage } from '../services/storage.ts'; export type DsgvoDeps = { db?: CardsDb }; @@ -21,16 +23,25 @@ export type DsgvoDeps = { db?: CardsDb }; * Export aus /api/v1/me. */ export async function buildUserExport(db: CardsDb, userId: string) { - const [decksRows, cardsRows, reviewsRows, sessionsRows, tagsRows, mediaRows, importsRows] = - await Promise.all([ - db.select().from(decks).where(eq(decks.userId, userId)), - db.select().from(cards).where(eq(cards.userId, userId)), - db.select().from(reviews).where(eq(reviews.userId, userId)), - db.select().from(studySessions).where(eq(studySessions.userId, userId)), - db.select().from(tags).where(eq(tags.userId, userId)), - db.select().from(mediaRefs).where(eq(mediaRefs.userId, userId)), - db.select().from(importJobs).where(eq(importJobs.userId, userId)), - ]); + const [ + decksRows, + cardsRows, + reviewsRows, + sessionsRows, + tagsRows, + mediaRefRows, + mediaFileRows, + importsRows, + ] = await Promise.all([ + db.select().from(decks).where(eq(decks.userId, userId)), + db.select().from(cards).where(eq(cards.userId, userId)), + db.select().from(reviews).where(eq(reviews.userId, userId)), + db.select().from(studySessions).where(eq(studySessions.userId, userId)), + db.select().from(tags).where(eq(tags.userId, userId)), + db.select().from(mediaRefs).where(eq(mediaRefs.userId, userId)), + db.select().from(mediaFiles).where(eq(mediaFiles.userId, userId)), + db.select().from(importJobs).where(eq(importJobs.userId, userId)), + ]); return { user_id: userId, @@ -59,10 +70,14 @@ export async function buildUserExport(db: CardsDb, userId: string) { finishedAt: s.finishedAt ? s.finishedAt.toISOString() : null, })), tags: tagsRows.map((t) => ({ ...t, createdAt: t.createdAt.toISOString() })), - media_refs: mediaRows.map((m) => ({ + media_refs: mediaRefRows.map((m) => ({ ...m, createdAt: m.createdAt.toISOString(), })), + media_files: mediaFileRows.map((f) => ({ + ...f, + createdAt: f.createdAt.toISOString(), + })), import_jobs: importsRows.map((j) => ({ ...j, createdAt: j.createdAt.toISOString(), @@ -104,7 +119,9 @@ export function dsgvoRouter(deps: DsgvoDeps = {}): Hono { * cards → card_tags * decks → tags * decks → study_sessions - * Verbleibend: import_jobs (eigene Tabelle ohne FK) — wird separat gelöscht. + * Verbleibend: import_jobs + media_files (eigene Tabellen ohne FK) + * — werden separat gelöscht. MinIO-Objects werden per Bucket-Prefix- + * Sweep entfernt (objectKey-Format `/.`). */ r.post('/delete', async (c) => { const body = await c.req.json().catch(() => null); @@ -112,14 +129,33 @@ export function dsgvoRouter(deps: DsgvoDeps = {}): Hono { if (!userId) return c.json({ error: 'missing_user_id' }, 400); const db = dbOf(); - const [deletedDecks, deletedImports] = await db.transaction(async (tx) => { - const dd = await tx.delete(decks).where(eq(decks.userId, userId)).returning({ id: decks.id }); - const di = await tx - .delete(importJobs) - .where(eq(importJobs.userId, userId)) - .returning({ id: importJobs.id }); - return [dd, di]; - }); + const [deletedDecks, deletedImports, deletedMediaFiles] = await db.transaction( + async (tx) => { + const dd = await tx + .delete(decks) + .where(eq(decks.userId, userId)) + .returning({ id: decks.id }); + const di = await tx + .delete(importJobs) + .where(eq(importJobs.userId, userId)) + .returning({ id: importJobs.id }); + const dm = await tx + .delete(mediaFiles) + .where(eq(mediaFiles.userId, userId)) + .returning({ id: mediaFiles.id }); + return [dd, di, dm]; + } + ); + + // MinIO-Bucket-Sweep nach DB-Cleanup. Wenn der Storage-Sweep + // scheitert, ist das nicht-fatal — die DB ist schon konsistent + // gelöscht und Storage-Files ohne DB-Eintrag sind tote Bytes. + let storageObjectsDeleted = 0; + try { + storageObjectsDeleted = await getStorage().removeObjectsByPrefix(`${userId}/`); + } catch (err) { + console.warn('[dsgvo/delete] storage sweep failed:', err); + } return c.json({ deleted: true, @@ -127,6 +163,8 @@ export function dsgvoRouter(deps: DsgvoDeps = {}): Hono { counts: { decks: deletedDecks.length, import_jobs: deletedImports.length, + media_files: deletedMediaFiles.length, + storage_objects: storageObjectsDeleted, }, }); }); diff --git a/apps/api/src/routes/me.ts b/apps/api/src/routes/me.ts index c8e049b..89072b1 100644 --- a/apps/api/src/routes/me.ts +++ b/apps/api/src/routes/me.ts @@ -2,8 +2,9 @@ import { and, eq, gte, isNotNull, lte, sql } from 'drizzle-orm'; import { Hono } from 'hono'; import { getDb, type CardsDb } from '../db/connection.ts'; -import { cards, decks, importJobs, reviews } from '../db/schema/index.ts'; +import { cards, decks, importJobs, mediaFiles, reviews } from '../db/schema/index.ts'; import { authMiddleware, type AuthVars } from '../middleware/auth.ts'; +import { getStorage } from '../services/storage.ts'; import { buildUserExport } from './dsgvo.ts'; export type MeDeps = { db?: CardsDb }; @@ -125,14 +126,30 @@ export function meRouter(deps: MeDeps = {}): Hono<{ Variables: AuthVars }> { r.post('/delete', async (c) => { const userId = c.get('userId'); const db = dbOf(); - const [deletedDecks, deletedImports] = await db.transaction(async (tx) => { - const dd = await tx.delete(decks).where(eq(decks.userId, userId)).returning({ id: decks.id }); - const di = await tx - .delete(importJobs) - .where(eq(importJobs.userId, userId)) - .returning({ id: importJobs.id }); - return [dd, di]; - }); + const [deletedDecks, deletedImports, deletedMediaFiles] = await db.transaction( + async (tx) => { + const dd = await tx + .delete(decks) + .where(eq(decks.userId, userId)) + .returning({ id: decks.id }); + const di = await tx + .delete(importJobs) + .where(eq(importJobs.userId, userId)) + .returning({ id: importJobs.id }); + const dm = await tx + .delete(mediaFiles) + .where(eq(mediaFiles.userId, userId)) + .returning({ id: mediaFiles.id }); + return [dd, di, dm]; + } + ); + + let storageObjectsDeleted = 0; + try { + storageObjectsDeleted = await getStorage().removeObjectsByPrefix(`${userId}/`); + } catch (err) { + console.warn('[me/delete] storage sweep failed:', err); + } return c.json({ deleted: true, @@ -140,6 +157,8 @@ export function meRouter(deps: MeDeps = {}): Hono<{ Variables: AuthVars }> { counts: { decks: deletedDecks.length, import_jobs: deletedImports.length, + media_files: deletedMediaFiles.length, + storage_objects: storageObjectsDeleted, }, }); }); diff --git a/apps/api/src/routes/media.ts b/apps/api/src/routes/media.ts new file mode 100644 index 0000000..9bc7b7c --- /dev/null +++ b/apps/api/src/routes/media.ts @@ -0,0 +1,156 @@ +import { and, eq } from 'drizzle-orm'; +import { Hono } from 'hono'; + +import { getDb, type CardsDb } from '../db/connection.ts'; +import { mediaFiles, type MediaFileRow } from '../db/schema/index.ts'; +import { authMiddleware, type AuthVars } from '../middleware/auth.ts'; +import { ulid } from '../lib/ulid.ts'; +import { getStorage, type StorageService } from '../services/storage.ts'; + +export type MediaDeps = { db?: CardsDb; storage?: StorageService }; + +const MAX_BYTES = Number(process.env.CARDS_MEDIA_MAX_BYTES ?? 25 * 1024 * 1024); // 25 MiB +const ALLOWED_PREFIXES = ['image/', 'audio/', 'video/']; + +function kindFor(mime: string): MediaFileRow['kind'] { + if (mime.startsWith('image/')) return 'image'; + if (mime.startsWith('audio/')) return 'audio'; + if (mime.startsWith('video/')) return 'video'; + return 'other'; +} + +function extFor(mime: string, fallback?: string): string { + const map: Record = { + 'image/jpeg': 'jpg', + 'image/png': 'png', + 'image/gif': 'gif', + 'image/webp': 'webp', + 'image/svg+xml': 'svg', + 'audio/mpeg': 'mp3', + 'audio/ogg': 'ogg', + 'audio/wav': 'wav', + 'audio/mp4': 'm4a', + 'video/mp4': 'mp4', + 'video/webm': 'webm', + }; + if (map[mime]) return map[mime]; + if (fallback) { + const dot = fallback.lastIndexOf('.'); + if (dot > 0 && dot < fallback.length - 1) return fallback.slice(dot + 1).toLowerCase(); + } + return 'bin'; +} + +export function mediaRouter(deps: MediaDeps = {}): Hono<{ Variables: AuthVars }> { + const r = new Hono<{ Variables: AuthVars }>(); + const dbOf = () => deps.db ?? getDb(); + const storageOf = () => deps.storage ?? getStorage(); + + r.use('*', authMiddleware); + + /** + * Multipart-Upload eines einzelnen Files. Bilder/Audio/Video; alles + * andere wird mit 415 abgelehnt. Limit per env (Default 25 MiB). + * + * Antwort: + * { id, url, mime_type, kind, size_bytes, original_filename } + * + * `url` ist ein relativer App-Pfad (`/api/v1/media/`), den + * Frontend + Anki-Importer in `` einsetzen können. + * Public absolute URL kommt erst mit Phase 10 + DNS dazu. + */ + r.post('/upload', async (c) => { + const userId = c.get('userId'); + const form = await c.req.formData().catch(() => null); + if (!form) return c.json({ error: 'expected_multipart' }, 400); + + const file = form.get('file'); + if (!(file instanceof File)) return c.json({ error: 'missing_file_field' }, 400); + + const mime = file.type || 'application/octet-stream'; + if (!ALLOWED_PREFIXES.some((p) => mime.startsWith(p))) { + return c.json({ error: 'unsupported_media_type', mime_type: mime }, 415); + } + if (file.size > MAX_BYTES) { + return c.json({ error: 'too_large', max_bytes: MAX_BYTES, got: file.size }, 413); + } + + const id = ulid(); + const ext = extFor(mime, file.name); + const objectKey = `${userId}/${id}.${ext}`; + const buf = new Uint8Array(await file.arrayBuffer()); + + await storageOf().putObject(objectKey, buf, mime); + + const [row] = await dbOf() + .insert(mediaFiles) + .values({ + id, + userId, + objectKey, + mimeType: mime, + originalFilename: file.name || null, + sizeBytes: buf.byteLength, + kind: kindFor(mime), + createdAt: new Date(), + }) + .returning(); + + return c.json( + { + id: row.id, + url: `/api/v1/media/${row.id}`, + mime_type: row.mimeType, + kind: row.kind, + size_bytes: row.sizeBytes, + original_filename: row.originalFilename, + }, + 201 + ); + }); + + /** + * Streamt ein Media-File via MinIO-getObject. User-gated — fremde + * Files sind 404 (nicht 403, damit IDs nicht enumerierbar sind). + */ + r.get('/:id', async (c) => { + const userId = c.get('userId'); + const id = c.req.param('id'); + const [row] = await dbOf() + .select() + .from(mediaFiles) + .where(and(eq(mediaFiles.id, id), eq(mediaFiles.userId, userId))) + .limit(1); + if (!row) return c.json({ error: 'not_found' }, 404); + + const stream = await storageOf().getObjectStream(row.objectKey); + c.header('Content-Type', row.mimeType); + c.header('Content-Length', String(row.sizeBytes)); + c.header('Cache-Control', 'private, max-age=31536000, immutable'); + + return new Response(stream as unknown as ReadableStream, { + status: 200, + headers: c.res.headers, + }); + }); + + /** Listet alle Media-Files des Users — nützlich fürs UI später. */ + r.get('/', async (c) => { + const userId = c.get('userId'); + const rows = await dbOf().select().from(mediaFiles).where(eq(mediaFiles.userId, userId)); + return c.json({ + files: rows.map((r) => ({ + id: r.id, + url: `/api/v1/media/${r.id}`, + mime_type: r.mimeType, + kind: r.kind, + size_bytes: r.sizeBytes, + original_filename: r.originalFilename, + created_at: r.createdAt.toISOString(), + })), + total: rows.length, + }); + }); + + return r; +} diff --git a/apps/api/src/services/storage.ts b/apps/api/src/services/storage.ts new file mode 100644 index 0000000..a4d98ec --- /dev/null +++ b/apps/api/src/services/storage.ts @@ -0,0 +1,92 @@ +/** + * Object-Storage über MinIO (S3-API-kompatibel). + * + * Lokal: Container `cards-minio` (siehe infrastructure/docker-compose.yml) + * auf 9100/9101 — Plattform-MinIO bleibt auf 9000/9001 ungestört. + * + * Produktiv (Phase 10): entweder eigener MinIO auf dem Mac Mini mit + * separatem Bucket, oder gegen das Plattform-MinIO mit eigenem Bucket + * `cards-media`. Konfiguration via env, kein Code-Pfad muss umgebogen + * werden. + */ + +import * as Minio from 'minio'; + +let cached: StorageService | null = null; + +export class StorageService { + readonly client: Minio.Client; + readonly bucket: string; + private bucketReady = false; + + constructor() { + this.client = new Minio.Client({ + endPoint: process.env.CARDS_S3_ENDPOINT ?? 'localhost', + port: Number(process.env.CARDS_S3_PORT ?? 9100), + useSSL: process.env.CARDS_S3_USE_SSL === 'true', + accessKey: process.env.CARDS_S3_ACCESS_KEY ?? 'cardsadmin', + secretKey: process.env.CARDS_S3_SECRET_KEY ?? 'cardsadmin', + }); + this.bucket = process.env.CARDS_S3_BUCKET ?? 'cards-media'; + } + + /** Idempotenter Bucket-Init. Wird einmal pro Process-Lifetime gerufen. */ + async ensureBucket(): Promise { + if (this.bucketReady) return; + const exists = await this.client.bucketExists(this.bucket).catch(() => false); + if (!exists) { + await this.client.makeBucket(this.bucket); + } + this.bucketReady = true; + } + + async putObject( + key: string, + body: Buffer | Uint8Array, + contentType: string + ): Promise { + await this.ensureBucket(); + await this.client.putObject(this.bucket, key, Buffer.from(body), body.byteLength, { + 'Content-Type': contentType, + }); + } + + async getObjectStream(key: string): Promise { + await this.ensureBucket(); + return this.client.getObject(this.bucket, key); + } + + async statObject(key: string): Promise<{ size: number; contentType: string }> { + await this.ensureBucket(); + const stat = await this.client.statObject(this.bucket, key); + return { + size: stat.size, + contentType: stat.metaData?.['content-type'] ?? 'application/octet-stream', + }; + } + + async removeObject(key: string): Promise { + await this.ensureBucket(); + await this.client.removeObject(this.bucket, key); + } + + async removeObjectsByPrefix(prefix: string): Promise { + await this.ensureBucket(); + const objectsStream = this.client.listObjectsV2(this.bucket, prefix, true); + const keys: string[] = []; + for await (const obj of objectsStream) { + if (obj.name) keys.push(obj.name); + } + if (keys.length > 0) await this.client.removeObjects(this.bucket, keys); + return keys.length; + } +} + +export function getStorage(): StorageService { + if (!cached) cached = new StorageService(); + return cached; +} + +export function resetStorageForTests(): void { + cached = null; +} diff --git a/apps/api/tests/media.test.ts b/apps/api/tests/media.test.ts new file mode 100644 index 0000000..2607295 --- /dev/null +++ b/apps/api/tests/media.test.ts @@ -0,0 +1,53 @@ +import { describe, it, expect } from 'vitest'; +import { Hono } from 'hono'; + +import { mediaRouter } from '../src/routes/media.ts'; +import type { CardsDb } from '../src/db/connection.ts'; + +/** + * Auth-Gate-Tests für die Media-Routen ohne echte DB. Wir prüfen, dass + * der authMiddleware-Pfad ehrt und Validation-Errors konsistent sind. + * Ein echter MinIO-Roundtrip bleibt dem manuellen E2E-Smoke vorbehalten, + * weil sql.js + JSZip + MinIO-SDK in Vitest zu viel Mock-Overhead wäre. + */ +function buildApp() { + const app = new Hono(); + const stub = {} as CardsDb; + app.route('/api/v1/media', mediaRouter({ db: stub })); + return { app }; +} + +describe('mediaRouter — auth-gate', () => { + it('GET ohne X-User-Id ist 401', async () => { + const { app } = buildApp(); + const res = await app.request('/api/v1/media'); + expect(res.status).toBe(401); + }); + + it('GET /:id ohne X-User-Id ist 401', async () => { + const { app } = buildApp(); + const res = await app.request('/api/v1/media/abc'); + expect(res.status).toBe(401); + }); + + it('POST /upload ohne X-User-Id ist 401', async () => { + const { app } = buildApp(); + const res = await app.request('/api/v1/media/upload', { + method: 'POST', + }); + expect(res.status).toBe(401); + }); +}); + +describe('mediaRouter — Input-Validation', () => { + it('POST /upload ohne multipart-Body ist 400', async () => { + const { app } = buildApp(); + const res = await app.request('/api/v1/media/upload', { + method: 'POST', + headers: { 'X-User-Id': 'u-1' }, + }); + expect(res.status).toBe(400); + const body = (await res.json()) as { error: string }; + expect(body.error).toBe('expected_multipart'); + }); +}); diff --git a/apps/web/src/lib/anki/import.ts b/apps/web/src/lib/anki/import.ts index def34ec..a834f97 100644 --- a/apps/web/src/lib/anki/import.ts +++ b/apps/web/src/lib/anki/import.ts @@ -6,37 +6,123 @@ * (Anki-`::` zu ` / ` flacht die Hierarchie aus, wie im Original). * Karten werden mit sanitisiertem Markdown angelegt. * - * Phase-8-MVP: Bilder + Audio werden gedroppt (siehe parse.ts - * `sanitizeAnkiHtml`). Ein späterer Media-Pfad ist additiv. + * Phase 9k: Media-Upload via MinIO. Bilder + Audio werden vor den + * Karten in den Cards-Bucket geladen, der Sanitize-Pfad ersetzt + * Anki-Filenames durch echte Media-URLs (`/api/v1/media/`). * - * Phase-9j-Re-Import-Dedupe: Vor dem Insert wird der content_hash der - * Karte berechnet (gleiche Funktion wie der Server) und gegen die - * existierende Hash-Liste des Users geprüft. Duplikate werden gezählt - * und übersprungen — Re-Imports bringen also keine doppelten Karten - * mehr ins Deck. Decks werden nicht dedupliziert (gewollt: zwei - * .apkg-Files mit identischen Decknamen sollen sich nicht - * versehentlich zusammenführen). + * Phase 9j Re-Import-Dedupe: content_hash-Set wird vor dem Loop + * geladen, Duplikate werden gezählt und übersprungen. */ +import JSZip from 'jszip'; + import { cardContentHash } from '@cards/domain'; import { createDeck } from '$lib/api/decks.ts'; import { createCard, listCardHashes } from '$lib/api/cards.ts'; +import { uploadMedia } from '$lib/api/media.ts'; import { sanitizeAnkiHtml, type ParsedAnki } from './parse.ts'; export interface ImportResult { decksCreated: number; cardsCreated: number; cardsSkippedDuplicate: number; + mediaUploaded: number; + mediaFailed: number; failed: number; failures: string[]; } export interface ImportProgress { - stage: 'decks' | 'cards' | 'done'; + stage: 'media' | 'decks' | 'cards' | 'done'; current: number; total: number; } +const MEDIA_CONCURRENCY = 4; +const IMG_RE = /]*\bsrc=["']([^"']+)["']/gi; +const SOUND_RE = /\[sound:([^\]]+)\]/g; + +function collectMediaRefs(parsed: ParsedAnki): Set { + const refs = new Set(); + for (const card of parsed.cards) { + for (const value of Object.values(card.fields)) { + let m: RegExpExecArray | null; + IMG_RE.lastIndex = 0; + while ((m = IMG_RE.exec(value))) refs.add(m[1]); + SOUND_RE.lastIndex = 0; + while ((m = SOUND_RE.exec(value))) refs.add(m[1]); + } + } + return refs; +} + +function guessMime(filename: string): string { + const ext = filename.split('.').pop()?.toLowerCase() ?? ''; + const map: Record = { + jpg: 'image/jpeg', + jpeg: 'image/jpeg', + png: 'image/png', + gif: 'image/gif', + webp: 'image/webp', + svg: 'image/svg+xml', + mp3: 'audio/mpeg', + ogg: 'audio/ogg', + oga: 'audio/ogg', + wav: 'audio/wav', + m4a: 'audio/mp4', + mp4: 'video/mp4', + webm: 'video/webm', + }; + return map[ext] ?? 'application/octet-stream'; +} + +async function uploadAllMedia( + parsed: ParsedAnki, + onProgress?: (current: number, total: number) => void +): Promise<{ urlByFilename: Map; uploaded: number; failed: number }> { + const referenced = [...collectMediaRefs(parsed)].filter((f) => parsed.mediaByFilename.has(f)); + const urlByFilename = new Map(); + let uploaded = 0; + let failed = 0; + let done = 0; + + if (referenced.length === 0) { + onProgress?.(0, 0); + return { urlByFilename, uploaded, failed }; + } + + let nextIdx = 0; + async function worker() { + while (true) { + const idx = nextIdx++; + if (idx >= referenced.length) return; + const filename = referenced[idx]; + const entry = parsed.mediaByFilename.get(filename); + if (!entry) { + failed++; + done++; + onProgress?.(done, referenced.length); + continue; + } + try { + const blob = await (entry as JSZip.JSZipObject).async('blob'); + const file = new File([blob], filename, { type: guessMime(filename) }); + const result = await uploadMedia(file); + urlByFilename.set(filename, result.url); + uploaded++; + } catch (e) { + console.warn(`[anki-import] media upload failed for ${filename}:`, e); + failed++; + } + done++; + onProgress?.(done, referenced.length); + } + } + + await Promise.all(Array.from({ length: MEDIA_CONCURRENCY }, () => worker())); + return { urlByFilename, uploaded, failed }; +} + export async function importParsedAnki( parsed: ParsedAnki, opts: { onProgress?: (p: ImportProgress) => void } = {} @@ -45,22 +131,32 @@ export async function importParsedAnki( decksCreated: 0, cardsCreated: 0, cardsSkippedDuplicate: 0, + mediaUploaded: 0, + mediaFailed: 0, failed: 0, failures: [], }; - // Vor dem Insert die Hash-Liste des Users laden — wenn der Endpoint - // fehlschlägt (z.B. älterer Server vor Phase 9j), fallen wir - // stillschweigend auf "kein Dedupe" zurück. + // Hash-Set vor dem Loop laden (Phase 9j-Dedupe). const existingHashes = new Set(); try { const r = await listCardHashes(); for (const h of r.hashes) existingHashes.add(h); } catch { - // Dedupe bleibt aus — Karten werden eingefügt wie zuvor. + // Dedupe bleibt aus (älterer Server o.ä.). } - // 1) Decks — Anki "::"-Hierarchie zu " / "-Strings flach machen. + // 1) Media — vor den Karten uploaden, damit der Sanitize-Pfad echte + // URLs einsetzen kann. Files, die nicht im Anki-Manifest stehen, + // werden gedroppt; Upload-Fehler werden gezählt + im Card-Field + // gedroppt (statt 404-URL). + const { urlByFilename, uploaded, failed } = await uploadAllMedia(parsed, (current, total) => { + opts.onProgress?.({ stage: 'media', current, total }); + }); + result.mediaUploaded = uploaded; + result.mediaFailed = failed; + + // 2) Decks — Anki "::"-Hierarchie zu " / "-Strings flach machen. const ankiIdToDeckId = new Map(); let deckIdx = 0; for (const ankiDeck of parsed.decks) { @@ -76,7 +172,6 @@ export async function importParsedAnki( } } - // Fallback-Deck für Karten ohne explizit referenziertes Anki-Deck. let fallbackDeckId: string | null = null; const ensureFallbackDeck = async (): Promise => { if (fallbackDeckId) return fallbackDeckId; @@ -91,14 +186,14 @@ export async function importParsedAnki( } }; - // 2) Cards — Felder sanitizen, content_hash prüfen, einfügen. + // 3) Cards — sanitize mit URL-Map, content_hash-Dedupe, Insert. for (let i = 0; i < parsed.cards.length; i++) { opts.onProgress?.({ stage: 'cards', current: i, total: parsed.cards.length }); const card = parsed.cards[i]; const cleanFields: Record = {}; for (const [key, value] of Object.entries(card.fields)) { - cleanFields[key] = sanitizeAnkiHtml(value); + cleanFields[key] = sanitizeAnkiHtml(value, urlByFilename); } const hash = await cardContentHash({ type: card.type, fields: cleanFields }); @@ -124,8 +219,6 @@ export async function importParsedAnki( fields: cleanFields, }); result.cardsCreated++; - // Hash sofort merken — derselbe Import könnte zwei identische - // Karten enthalten (Anki-Drift), zweite würde sonst auch rein. existingHashes.add(hash); } catch (e) { result.failed++; diff --git a/apps/web/src/lib/anki/parse.ts b/apps/web/src/lib/anki/parse.ts index 193bf53..583f2bf 100644 --- a/apps/web/src/lib/anki/parse.ts +++ b/apps/web/src/lib/anki/parse.ts @@ -213,23 +213,41 @@ function mapNoteToCard( /** * Convert Anki's HTML / image / sound markup to plain text + Markdown. * - * Phase-8-MVP: Bilder + Audio werden ersatzlos gedroppt (Option A). - * Ein späterer Media-Pfad (lokaler Cards-Upload-Endpunkt oder mana-media - * via Phase 2 Auth-Föderation) kann hier eine Filename→URL-Map einsetzen, - * die dann zu `` / `