Phase 9k: Media-Upload via MinIO-Container

Eigener cards-minio-Container im docker-compose (9100/9101 — Plattform
auf 9000/9001 bleibt isoliert). cardsadmin/cardsadmin als Dev-Default,
prod via env-Vars (CARDS_S3_*).

apps/api/src/services/storage.ts — schmaler StorageService um den
minio-Client. ensureBucket() ist idempotent (auto-create beim ersten
Upload). removeObjectsByPrefix() implementiert den DSGVO-Bucket-Sweep,
weil die S3-API kein Cascade kennt.

Neue Tabelle media_files in pgSchema('cards'):
  id, user_id, object_key, mime_type, original_filename, size_bytes,
  kind, created_at — kein FK auf cards (ein File kann mehreren Karten
  gehören). objectKey-Format <userId>/<ulid>.<ext> für Bucket-Prefix-
  Sweep beim DSGVO-Delete. Legacy mediaRefs bleibt als Slot.

Neuer Router /api/v1/media:
  POST /upload   — multipart, 25 MiB Default-Limit, image/audio/video
                   only (415 sonst), schreibt media_files-Row + speichert
                   in MinIO unter <userId>/<ulid>.<ext>
  GET  /:id      — streamt aus MinIO mit Cache-Control: private,
                   immutable. Cross-User → 404 (nicht 403, anti-enumeration).
  GET  /         — listet alle eigenen Files

DSGVO-Pfade (Service-Key + /me/delete) räumen jetzt auch media_files
+ MinIO-Bucket-Prefix mit ab. Storage-Sweep ist non-fatal — DB ist erst
konsistent gelöscht, dead bytes wären die schlimmstmögliche Folge.

Anki-Import: parse.ts sanitizeAnkiHtml akzeptiert wieder eine
Filename→URL-Map (war in Phase 8c gedroppt). import.ts lädt vor den
Karten alle referenzierten Media-Files via uploadMedia() in MinIO,
sammelt URLs, ersetzt Anki-Filenames durch /api/v1/media/<id>-Pfade
in `<img>` (Markdown) und `[sound:…]` (HTML <audio>). 4-fache Worker-
Concurrency.

apps/web/src/lib/markdown.ts: DOMPurify lässt jetzt <audio>/<video>/
<source> mit src/controls/preload-Attributen durch — sonst würden die
Audio-Tags aus dem Anki-Import gestrippt.

i18n-Strings (DE/EN) auf Media-Stage erweitert: stage_media,
done_media, what_works_media, dropzone_hint, preview_media.
import.what_skipped_media wird zur Bestätigung dass Media seit
Sprint 9k mit übernommen wird.

Manueller E2E-Smoke gegen lokale MinIO (cards-minio :9100):
- 1×1-PNG hochgeladen → 201 mit ID + URL
- /api/v1/media/<id> streamt 200 image/png 69 bytes (file-Identifikation
  bestätigt)
- Cross-User → 404, ohne X-User-Id → 401, text/plain → 415

53 API-Tests grün (+4 neue media-Auth-Gate-Tests), 7 Web-Tests,
51 Domain-Tests, type-check + svelte-check 0 errors.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-05-08 18:42:56 +02:00
parent e7ae93dcf9
commit c9eb0a6f80
20 changed files with 886 additions and 78 deletions

View file

@ -19,8 +19,9 @@
},
"dependencies": {
"@cards/domain": "workspace:*",
"hono": "^4.6.0",
"drizzle-orm": "0.38",
"hono": "^4.6.0",
"minio": "^8.0.7",
"postgres": "^3.4.0",
"zod": "3",
"zod-to-json-schema": "^3.23.0"

View file

@ -22,8 +22,13 @@ export type {
export { tags } from './tags.ts';
export type { TagRow, TagInsert } from './tags.ts';
export { mediaRefs } from './media.ts';
export type { MediaRefRow, MediaRefInsert } from './media.ts';
export { mediaRefs, mediaFiles } from './media.ts';
export type {
MediaRefRow,
MediaRefInsert,
MediaFileRow,
MediaFileInsert,
} from './media.ts';
export { importJobs } from './imports.ts';
export type { ImportJobRow, ImportJobInsert } from './imports.ts';

View file

@ -4,9 +4,42 @@ import { cardsSchema } from './_schema.ts';
import { cards } from './cards.ts';
/**
* Media-Verweise auf Object-IDs in mana-media. Die eigentlichen Files
* (Bilder, Audio, Video) liegen in MinIO via mana-media; diese Tabelle
* hält nur den Verweis + Sortier-Order pro Karte.
* Media-Files: Bilder, Audio, Video, die in MinIO unter dem
* `objectKey` liegen und von Karten via cards.media_refs[]
* referenziert werden.
*
* Bewusst ohne FK auf eine konkrete Karte: ein File kann von
* mehreren Karten referenziert werden (z.B. ein Bild für Front
* und Back). Lifecycle-Cleanup per Cron oder DSGVO-Delete.
*
* objectKey-Format: `<userId>/<ulid>.<ext>` UserId-Präfix
* vereinfacht den DSGVO-Delete (Bucket-Prefix-Sweep).
*/
export const mediaFiles = cardsSchema.table(
'media_files',
{
id: text('id').primaryKey(),
userId: text('user_id').notNull(),
objectKey: text('object_key').notNull(),
mimeType: text('mime_type').notNull(),
originalFilename: text('original_filename'),
sizeBytes: integer('size_bytes').notNull(),
kind: text('kind', { enum: ['image', 'audio', 'video', 'other'] }).notNull(),
createdAt: timestamp('created_at', { withTimezone: true, mode: 'date' })
.notNull()
.defaultNow(),
},
(t) => ({
userIdx: index('media_files_user_idx').on(t.userId),
})
);
export type MediaFileRow = typeof mediaFiles.$inferSelect;
export type MediaFileInsert = typeof mediaFiles.$inferInsert;
/**
* Legacy: media_refs aus Phase 1, Vor-Sprint-15. Bewusst behalten als
* Sortier-Layer-Slot für später (mana-media-Konvergenz). Aktuell leer.
*/
export const mediaRefs = cardsSchema.table(
'media_refs',

View file

@ -11,6 +11,7 @@ import { toolsRouter } from './routes/tools.ts';
import { searchRouter } from './routes/search.ts';
import { dsgvoRouter } from './routes/dsgvo.ts';
import { meRouter } from './routes/me.ts';
import { mediaRouter } from './routes/media.ts';
const app = new Hono();
@ -41,6 +42,7 @@ app.route('/api/v1/tools', toolsRouter());
app.route('/api/v1/search', searchRouter());
app.route('/api/v1/dsgvo', dsgvoRouter());
app.route('/api/v1/me', meRouter());
app.route('/api/v1/media', mediaRouter());
app.get('/', (c) =>
c.json({

View file

@ -6,12 +6,14 @@ import {
cards,
decks,
importJobs,
mediaFiles,
mediaRefs,
reviews,
studySessions,
tags,
} from '../db/schema/index.ts';
import { serviceKeyAuth } from '../middleware/service-key.ts';
import { getStorage } from '../services/storage.ts';
export type DsgvoDeps = { db?: CardsDb };
@ -21,16 +23,25 @@ export type DsgvoDeps = { db?: CardsDb };
* Export aus /api/v1/me.
*/
export async function buildUserExport(db: CardsDb, userId: string) {
const [decksRows, cardsRows, reviewsRows, sessionsRows, tagsRows, mediaRows, importsRows] =
await Promise.all([
db.select().from(decks).where(eq(decks.userId, userId)),
db.select().from(cards).where(eq(cards.userId, userId)),
db.select().from(reviews).where(eq(reviews.userId, userId)),
db.select().from(studySessions).where(eq(studySessions.userId, userId)),
db.select().from(tags).where(eq(tags.userId, userId)),
db.select().from(mediaRefs).where(eq(mediaRefs.userId, userId)),
db.select().from(importJobs).where(eq(importJobs.userId, userId)),
]);
const [
decksRows,
cardsRows,
reviewsRows,
sessionsRows,
tagsRows,
mediaRefRows,
mediaFileRows,
importsRows,
] = await Promise.all([
db.select().from(decks).where(eq(decks.userId, userId)),
db.select().from(cards).where(eq(cards.userId, userId)),
db.select().from(reviews).where(eq(reviews.userId, userId)),
db.select().from(studySessions).where(eq(studySessions.userId, userId)),
db.select().from(tags).where(eq(tags.userId, userId)),
db.select().from(mediaRefs).where(eq(mediaRefs.userId, userId)),
db.select().from(mediaFiles).where(eq(mediaFiles.userId, userId)),
db.select().from(importJobs).where(eq(importJobs.userId, userId)),
]);
return {
user_id: userId,
@ -59,10 +70,14 @@ export async function buildUserExport(db: CardsDb, userId: string) {
finishedAt: s.finishedAt ? s.finishedAt.toISOString() : null,
})),
tags: tagsRows.map((t) => ({ ...t, createdAt: t.createdAt.toISOString() })),
media_refs: mediaRows.map((m) => ({
media_refs: mediaRefRows.map((m) => ({
...m,
createdAt: m.createdAt.toISOString(),
})),
media_files: mediaFileRows.map((f) => ({
...f,
createdAt: f.createdAt.toISOString(),
})),
import_jobs: importsRows.map((j) => ({
...j,
createdAt: j.createdAt.toISOString(),
@ -104,7 +119,9 @@ export function dsgvoRouter(deps: DsgvoDeps = {}): Hono {
* cards card_tags
* decks tags
* decks study_sessions
* Verbleibend: import_jobs (eigene Tabelle ohne FK) wird separat gelöscht.
* Verbleibend: import_jobs + media_files (eigene Tabellen ohne FK)
* werden separat gelöscht. MinIO-Objects werden per Bucket-Prefix-
* Sweep entfernt (objectKey-Format `<userId>/<ulid>.<ext>`).
*/
r.post('/delete', async (c) => {
const body = await c.req.json().catch(() => null);
@ -112,14 +129,33 @@ export function dsgvoRouter(deps: DsgvoDeps = {}): Hono {
if (!userId) return c.json({ error: 'missing_user_id' }, 400);
const db = dbOf();
const [deletedDecks, deletedImports] = await db.transaction(async (tx) => {
const dd = await tx.delete(decks).where(eq(decks.userId, userId)).returning({ id: decks.id });
const di = await tx
.delete(importJobs)
.where(eq(importJobs.userId, userId))
.returning({ id: importJobs.id });
return [dd, di];
});
const [deletedDecks, deletedImports, deletedMediaFiles] = await db.transaction(
async (tx) => {
const dd = await tx
.delete(decks)
.where(eq(decks.userId, userId))
.returning({ id: decks.id });
const di = await tx
.delete(importJobs)
.where(eq(importJobs.userId, userId))
.returning({ id: importJobs.id });
const dm = await tx
.delete(mediaFiles)
.where(eq(mediaFiles.userId, userId))
.returning({ id: mediaFiles.id });
return [dd, di, dm];
}
);
// MinIO-Bucket-Sweep nach DB-Cleanup. Wenn der Storage-Sweep
// scheitert, ist das nicht-fatal — die DB ist schon konsistent
// gelöscht und Storage-Files ohne DB-Eintrag sind tote Bytes.
let storageObjectsDeleted = 0;
try {
storageObjectsDeleted = await getStorage().removeObjectsByPrefix(`${userId}/`);
} catch (err) {
console.warn('[dsgvo/delete] storage sweep failed:', err);
}
return c.json({
deleted: true,
@ -127,6 +163,8 @@ export function dsgvoRouter(deps: DsgvoDeps = {}): Hono {
counts: {
decks: deletedDecks.length,
import_jobs: deletedImports.length,
media_files: deletedMediaFiles.length,
storage_objects: storageObjectsDeleted,
},
});
});

View file

@ -2,8 +2,9 @@ import { and, eq, gte, isNotNull, lte, sql } from 'drizzle-orm';
import { Hono } from 'hono';
import { getDb, type CardsDb } from '../db/connection.ts';
import { cards, decks, importJobs, reviews } from '../db/schema/index.ts';
import { cards, decks, importJobs, mediaFiles, reviews } from '../db/schema/index.ts';
import { authMiddleware, type AuthVars } from '../middleware/auth.ts';
import { getStorage } from '../services/storage.ts';
import { buildUserExport } from './dsgvo.ts';
export type MeDeps = { db?: CardsDb };
@ -125,14 +126,30 @@ export function meRouter(deps: MeDeps = {}): Hono<{ Variables: AuthVars }> {
r.post('/delete', async (c) => {
const userId = c.get('userId');
const db = dbOf();
const [deletedDecks, deletedImports] = await db.transaction(async (tx) => {
const dd = await tx.delete(decks).where(eq(decks.userId, userId)).returning({ id: decks.id });
const di = await tx
.delete(importJobs)
.where(eq(importJobs.userId, userId))
.returning({ id: importJobs.id });
return [dd, di];
});
const [deletedDecks, deletedImports, deletedMediaFiles] = await db.transaction(
async (tx) => {
const dd = await tx
.delete(decks)
.where(eq(decks.userId, userId))
.returning({ id: decks.id });
const di = await tx
.delete(importJobs)
.where(eq(importJobs.userId, userId))
.returning({ id: importJobs.id });
const dm = await tx
.delete(mediaFiles)
.where(eq(mediaFiles.userId, userId))
.returning({ id: mediaFiles.id });
return [dd, di, dm];
}
);
let storageObjectsDeleted = 0;
try {
storageObjectsDeleted = await getStorage().removeObjectsByPrefix(`${userId}/`);
} catch (err) {
console.warn('[me/delete] storage sweep failed:', err);
}
return c.json({
deleted: true,
@ -140,6 +157,8 @@ export function meRouter(deps: MeDeps = {}): Hono<{ Variables: AuthVars }> {
counts: {
decks: deletedDecks.length,
import_jobs: deletedImports.length,
media_files: deletedMediaFiles.length,
storage_objects: storageObjectsDeleted,
},
});
});

View file

@ -0,0 +1,156 @@
import { and, eq } from 'drizzle-orm';
import { Hono } from 'hono';
import { getDb, type CardsDb } from '../db/connection.ts';
import { mediaFiles, type MediaFileRow } from '../db/schema/index.ts';
import { authMiddleware, type AuthVars } from '../middleware/auth.ts';
import { ulid } from '../lib/ulid.ts';
import { getStorage, type StorageService } from '../services/storage.ts';
export type MediaDeps = { db?: CardsDb; storage?: StorageService };
const MAX_BYTES = Number(process.env.CARDS_MEDIA_MAX_BYTES ?? 25 * 1024 * 1024); // 25 MiB
const ALLOWED_PREFIXES = ['image/', 'audio/', 'video/'];
function kindFor(mime: string): MediaFileRow['kind'] {
if (mime.startsWith('image/')) return 'image';
if (mime.startsWith('audio/')) return 'audio';
if (mime.startsWith('video/')) return 'video';
return 'other';
}
function extFor(mime: string, fallback?: string): string {
const map: Record<string, string> = {
'image/jpeg': 'jpg',
'image/png': 'png',
'image/gif': 'gif',
'image/webp': 'webp',
'image/svg+xml': 'svg',
'audio/mpeg': 'mp3',
'audio/ogg': 'ogg',
'audio/wav': 'wav',
'audio/mp4': 'm4a',
'video/mp4': 'mp4',
'video/webm': 'webm',
};
if (map[mime]) return map[mime];
if (fallback) {
const dot = fallback.lastIndexOf('.');
if (dot > 0 && dot < fallback.length - 1) return fallback.slice(dot + 1).toLowerCase();
}
return 'bin';
}
export function mediaRouter(deps: MediaDeps = {}): Hono<{ Variables: AuthVars }> {
const r = new Hono<{ Variables: AuthVars }>();
const dbOf = () => deps.db ?? getDb();
const storageOf = () => deps.storage ?? getStorage();
r.use('*', authMiddleware);
/**
* Multipart-Upload eines einzelnen Files. Bilder/Audio/Video; alles
* andere wird mit 415 abgelehnt. Limit per env (Default 25 MiB).
*
* Antwort:
* { id, url, mime_type, kind, size_bytes, original_filename }
*
* `url` ist ein relativer App-Pfad (`/api/v1/media/<id>`), den
* Frontend + Anki-Importer in `<img src=...>` einsetzen können.
* Public absolute URL kommt erst mit Phase 10 + DNS dazu.
*/
r.post('/upload', async (c) => {
const userId = c.get('userId');
const form = await c.req.formData().catch(() => null);
if (!form) return c.json({ error: 'expected_multipart' }, 400);
const file = form.get('file');
if (!(file instanceof File)) return c.json({ error: 'missing_file_field' }, 400);
const mime = file.type || 'application/octet-stream';
if (!ALLOWED_PREFIXES.some((p) => mime.startsWith(p))) {
return c.json({ error: 'unsupported_media_type', mime_type: mime }, 415);
}
if (file.size > MAX_BYTES) {
return c.json({ error: 'too_large', max_bytes: MAX_BYTES, got: file.size }, 413);
}
const id = ulid();
const ext = extFor(mime, file.name);
const objectKey = `${userId}/${id}.${ext}`;
const buf = new Uint8Array(await file.arrayBuffer());
await storageOf().putObject(objectKey, buf, mime);
const [row] = await dbOf()
.insert(mediaFiles)
.values({
id,
userId,
objectKey,
mimeType: mime,
originalFilename: file.name || null,
sizeBytes: buf.byteLength,
kind: kindFor(mime),
createdAt: new Date(),
})
.returning();
return c.json(
{
id: row.id,
url: `/api/v1/media/${row.id}`,
mime_type: row.mimeType,
kind: row.kind,
size_bytes: row.sizeBytes,
original_filename: row.originalFilename,
},
201
);
});
/**
* Streamt ein Media-File via MinIO-getObject. User-gated fremde
* Files sind 404 (nicht 403, damit IDs nicht enumerierbar sind).
*/
r.get('/:id', async (c) => {
const userId = c.get('userId');
const id = c.req.param('id');
const [row] = await dbOf()
.select()
.from(mediaFiles)
.where(and(eq(mediaFiles.id, id), eq(mediaFiles.userId, userId)))
.limit(1);
if (!row) return c.json({ error: 'not_found' }, 404);
const stream = await storageOf().getObjectStream(row.objectKey);
c.header('Content-Type', row.mimeType);
c.header('Content-Length', String(row.sizeBytes));
c.header('Cache-Control', 'private, max-age=31536000, immutable');
return new Response(stream as unknown as ReadableStream, {
status: 200,
headers: c.res.headers,
});
});
/** Listet alle Media-Files des Users — nützlich fürs UI später. */
r.get('/', async (c) => {
const userId = c.get('userId');
const rows = await dbOf().select().from(mediaFiles).where(eq(mediaFiles.userId, userId));
return c.json({
files: rows.map((r) => ({
id: r.id,
url: `/api/v1/media/${r.id}`,
mime_type: r.mimeType,
kind: r.kind,
size_bytes: r.sizeBytes,
original_filename: r.originalFilename,
created_at: r.createdAt.toISOString(),
})),
total: rows.length,
});
});
return r;
}

View file

@ -0,0 +1,92 @@
/**
* Object-Storage über MinIO (S3-API-kompatibel).
*
* Lokal: Container `cards-minio` (siehe infrastructure/docker-compose.yml)
* auf 9100/9101 Plattform-MinIO bleibt auf 9000/9001 ungestört.
*
* Produktiv (Phase 10): entweder eigener MinIO auf dem Mac Mini mit
* separatem Bucket, oder gegen das Plattform-MinIO mit eigenem Bucket
* `cards-media`. Konfiguration via env, kein Code-Pfad muss umgebogen
* werden.
*/
import * as Minio from 'minio';
let cached: StorageService | null = null;
export class StorageService {
readonly client: Minio.Client;
readonly bucket: string;
private bucketReady = false;
constructor() {
this.client = new Minio.Client({
endPoint: process.env.CARDS_S3_ENDPOINT ?? 'localhost',
port: Number(process.env.CARDS_S3_PORT ?? 9100),
useSSL: process.env.CARDS_S3_USE_SSL === 'true',
accessKey: process.env.CARDS_S3_ACCESS_KEY ?? 'cardsadmin',
secretKey: process.env.CARDS_S3_SECRET_KEY ?? 'cardsadmin',
});
this.bucket = process.env.CARDS_S3_BUCKET ?? 'cards-media';
}
/** Idempotenter Bucket-Init. Wird einmal pro Process-Lifetime gerufen. */
async ensureBucket(): Promise<void> {
if (this.bucketReady) return;
const exists = await this.client.bucketExists(this.bucket).catch(() => false);
if (!exists) {
await this.client.makeBucket(this.bucket);
}
this.bucketReady = true;
}
async putObject(
key: string,
body: Buffer | Uint8Array,
contentType: string
): Promise<void> {
await this.ensureBucket();
await this.client.putObject(this.bucket, key, Buffer.from(body), body.byteLength, {
'Content-Type': contentType,
});
}
async getObjectStream(key: string): Promise<NodeJS.ReadableStream> {
await this.ensureBucket();
return this.client.getObject(this.bucket, key);
}
async statObject(key: string): Promise<{ size: number; contentType: string }> {
await this.ensureBucket();
const stat = await this.client.statObject(this.bucket, key);
return {
size: stat.size,
contentType: stat.metaData?.['content-type'] ?? 'application/octet-stream',
};
}
async removeObject(key: string): Promise<void> {
await this.ensureBucket();
await this.client.removeObject(this.bucket, key);
}
async removeObjectsByPrefix(prefix: string): Promise<number> {
await this.ensureBucket();
const objectsStream = this.client.listObjectsV2(this.bucket, prefix, true);
const keys: string[] = [];
for await (const obj of objectsStream) {
if (obj.name) keys.push(obj.name);
}
if (keys.length > 0) await this.client.removeObjects(this.bucket, keys);
return keys.length;
}
}
export function getStorage(): StorageService {
if (!cached) cached = new StorageService();
return cached;
}
export function resetStorageForTests(): void {
cached = null;
}

View file

@ -0,0 +1,53 @@
import { describe, it, expect } from 'vitest';
import { Hono } from 'hono';
import { mediaRouter } from '../src/routes/media.ts';
import type { CardsDb } from '../src/db/connection.ts';
/**
* Auth-Gate-Tests für die Media-Routen ohne echte DB. Wir prüfen, dass
* der authMiddleware-Pfad ehrt und Validation-Errors konsistent sind.
* Ein echter MinIO-Roundtrip bleibt dem manuellen E2E-Smoke vorbehalten,
* weil sql.js + JSZip + MinIO-SDK in Vitest zu viel Mock-Overhead wäre.
*/
function buildApp() {
const app = new Hono();
const stub = {} as CardsDb;
app.route('/api/v1/media', mediaRouter({ db: stub }));
return { app };
}
describe('mediaRouter — auth-gate', () => {
it('GET ohne X-User-Id ist 401', async () => {
const { app } = buildApp();
const res = await app.request('/api/v1/media');
expect(res.status).toBe(401);
});
it('GET /:id ohne X-User-Id ist 401', async () => {
const { app } = buildApp();
const res = await app.request('/api/v1/media/abc');
expect(res.status).toBe(401);
});
it('POST /upload ohne X-User-Id ist 401', async () => {
const { app } = buildApp();
const res = await app.request('/api/v1/media/upload', {
method: 'POST',
});
expect(res.status).toBe(401);
});
});
describe('mediaRouter — Input-Validation', () => {
it('POST /upload ohne multipart-Body ist 400', async () => {
const { app } = buildApp();
const res = await app.request('/api/v1/media/upload', {
method: 'POST',
headers: { 'X-User-Id': 'u-1' },
});
expect(res.status).toBe(400);
const body = (await res.json()) as { error: string };
expect(body.error).toBe('expected_multipart');
});
});