From fd1ea4707520a93dc2bf237ff3820eabbc1f62ff Mon Sep 17 00:00:00 2001 From: Till JS Date: Wed, 22 Apr 2026 18:46:29 +0200 Subject: [PATCH] feat(backup): client-driven v2 snapshot export, drop server-side backup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the mana-sync event-stream export (GET /backup/export) with a fully client-driven `.mana` v2 archive: webapp reads Dexie, decrypts per-field, packages JSONL + manifest, optionally PBKDF2+AES-GCM seals with a passphrase. - New: backup/v2/{format,passphrase,export,import}.ts + format.test.ts (10 tests: round-trip, sealed path, 3 failure modes incl. wrong- passphrase vs. tamper distinction). - UI: ExportImportPanel with module multi-select, optional passphrase, progress + sealed-file detection — replaces the old backup flow in Settings → MyData. - Removes services/mana-sync/internal/backup/ and the corresponding client helpers + v1 tests. No parallel paths, no legacy shim. - Why client-driven: zero-knowledge users hold their vault key only client-side, so a server exporter cannot produce plaintext archives; GDPR Art. 20 portability is better served by plaintext-by-default. - Cross-account restore works via re-encryption under the target vault key (no MK transfer needed). DATA_LAYER_AUDIT.md §8 rewritten to reflect the new architecture. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../apps/web/src/lib/api/services/backup.ts | 64 -- .../my-data/ExportImportPanel.svelte | 644 ++++++++++++++++++ .../settings/sections/MyDataSection.svelte | 159 +---- .../apps/web/src/lib/data/DATA_LAYER_AUDIT.md | 132 ++-- .../web/src/lib/data/backup/format.test.ts | 231 ------- .../apps/web/src/lib/data/backup/format.ts | 259 ------- .../apps/web/src/lib/data/backup/import.ts | 218 ------ .../apps/web/src/lib/data/backup/v2/export.ts | 164 +++++ .../web/src/lib/data/backup/v2/format.test.ts | 140 ++++ .../apps/web/src/lib/data/backup/v2/format.ts | 465 +++++++++++++ .../apps/web/src/lib/data/backup/v2/import.ts | 175 +++++ .../web/src/lib/data/backup/v2/passphrase.ts | 166 +++++ docs/plans/data-export-v2.md | 309 +++++++++ services/mana-sync/CLAUDE.md | 25 +- services/mana-sync/cmd/server/main.go | 12 +- services/mana-sync/internal/backup/handler.go | 128 ---- services/mana-sync/internal/backup/writer.go | 133 ---- .../mana-sync/internal/backup/writer_test.go | 251 ------- 18 files changed, 2145 insertions(+), 1530 deletions(-) delete mode 100644 apps/mana/apps/web/src/lib/api/services/backup.ts create mode 100644 apps/mana/apps/web/src/lib/components/my-data/ExportImportPanel.svelte delete mode 100644 apps/mana/apps/web/src/lib/data/backup/format.test.ts delete mode 100644 apps/mana/apps/web/src/lib/data/backup/format.ts delete mode 100644 apps/mana/apps/web/src/lib/data/backup/import.ts create mode 100644 apps/mana/apps/web/src/lib/data/backup/v2/export.ts create mode 100644 apps/mana/apps/web/src/lib/data/backup/v2/format.test.ts create mode 100644 apps/mana/apps/web/src/lib/data/backup/v2/format.ts create mode 100644 apps/mana/apps/web/src/lib/data/backup/v2/import.ts create mode 100644 apps/mana/apps/web/src/lib/data/backup/v2/passphrase.ts create mode 100644 docs/plans/data-export-v2.md delete mode 100644 services/mana-sync/internal/backup/handler.go delete mode 100644 services/mana-sync/internal/backup/writer.go delete mode 100644 services/mana-sync/internal/backup/writer_test.go diff --git a/apps/mana/apps/web/src/lib/api/services/backup.ts b/apps/mana/apps/web/src/lib/api/services/backup.ts deleted file mode 100644 index 7e7dd1270..000000000 --- a/apps/mana/apps/web/src/lib/api/services/backup.ts +++ /dev/null @@ -1,64 +0,0 @@ -/** - * Backup / Restore API. - * - * Talks directly to mana-sync's /backup/export endpoint, which streams a - * .mana archive (zip container) with two entries: - * - * events.jsonl — every sync_changes row, one per line, chronological - * manifest.json — formatVersion, schemaVersion, userId, eventCount, - * eventsSha256, app list, timestamps - * - * The file is immediately usable as input for the future import flow: - * replaying events through applyServerChanges() reconstructs the user's - * entire dataset in a fresh IndexedDB. - * - * Field-level encrypted fields stay ciphertext throughout — the file is - * safe at rest for those fields. Plaintext fields (IDs, timestamps, sort - * keys) are visible as-is, matching the GDPR data-portability expectation. - */ - -import { authStore } from '$lib/stores/auth.svelte'; - -function getSyncServerUrl(): string { - if (typeof window !== 'undefined') { - const injected = (window as unknown as { __PUBLIC_SYNC_SERVER_URL__?: string }) - .__PUBLIC_SYNC_SERVER_URL__; - if (injected) return injected; - } - return (import.meta.env.PUBLIC_SYNC_SERVER_URL as string | undefined) ?? 'http://localhost:3050'; -} - -export const backupService = { - /** - * Trigger a browser download of the user's full sync-event backup as - * a .jsonl file. Streams directly from mana-sync; no intermediate buffer - * in the app server. - */ - async downloadBackup(): Promise { - const token = await authStore.getValidToken(); - if (!token) throw new Error('not authenticated'); - - const response = await fetch(`${getSyncServerUrl()}/backup/export`, { - method: 'GET', - headers: { Authorization: `Bearer ${token}` }, - }); - - if (!response.ok) { - throw new Error(`backup export failed: ${response.status} ${response.statusText}`); - } - - const blob = await response.blob(); - const filename = - response.headers.get('Content-Disposition')?.match(/filename="(.+)"/)?.[1] || - `mana-backup-${new Date().toISOString().slice(0, 10)}.mana`; - - const url = URL.createObjectURL(blob); - const a = document.createElement('a'); - a.href = url; - a.download = filename; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(url); - }, -}; diff --git a/apps/mana/apps/web/src/lib/components/my-data/ExportImportPanel.svelte b/apps/mana/apps/web/src/lib/components/my-data/ExportImportPanel.svelte new file mode 100644 index 000000000..6b2aba256 --- /dev/null +++ b/apps/mana/apps/web/src/lib/components/my-data/ExportImportPanel.svelte @@ -0,0 +1,644 @@ + + + + + + + +
+
+

Module

+ +
+
+ {#each options as opt (opt.appId)} + + {/each} +
+
+ + +
+ + {#if usePassphrase} +
+ + +
+

+ Ohne Passphrase enthält die Datei die Daten im Klartext — bequem durchsuchbar, behandle sie + wie persönliche Dokumente. Mit Passphrase wird der Inhalt AES-GCM-verschlüsselt + (PBKDF2-SHA256, 600k Iterationen). +

+ {#if passphraseError} +

{passphraseError}

+ {/if} + {/if} +
+ + +
+ + +
+ + + + + {#if exportProgress} +
+

{labelForExportPhase(exportProgress)}

+ {#if exportProgress.totalTables > 0} +
+
+
+ {/if} +
+ {/if} + {#if exportError} +

+ + {exportError} +

+ {/if} + {#if exportResult && !exportBusy} +

+ + {Object.values(exportResult.rowCounts) + .reduce((a, b) => a + b, 0) + .toLocaleString('de-DE')} Rows aus {Object.keys(exportResult.rowCounts).length} Tabellen exportiert + — + {exportResult.filename} +

+ {/if} + + + {#if pendingSealedFile && pendingSealedManifest} +
+

Passphrase-geschützes Archiv

+

+ Diese Datei wurde mit einer Passphrase verschlüsselt. Gib sie ein um mit dem Import + fortzufahren. +

+ { + if (e.key === 'Enter' && importPassphrase) confirmSealedImport(); + }} + /> +
+ + +
+
+ {/if} + + + {#if importProgress} +
+

{labelForImportPhase(importProgress)}

+ {#if importProgress.totalTables > 0} +
+
+
+ {/if} +
+ {/if} + {#if importError} +

+ + {importError} +

+ {/if} + {#if importResult && !importBusy} +

+ + {importResult.totalApplied.toLocaleString('de-DE')} Rows aus {Object.keys( + importResult.appliedPerTable + ).length} Tabellen eingespielt{#if importResult.skippedTables.length > 0} + · {importResult.skippedTables.length} Tabelle(n) übersprungen (nicht im aktuellen Build) + {/if} +

+ {/if} +
+ + diff --git a/apps/mana/apps/web/src/lib/components/settings/sections/MyDataSection.svelte b/apps/mana/apps/web/src/lib/components/settings/sections/MyDataSection.svelte index 23b1db293..abbd061f1 100644 --- a/apps/mana/apps/web/src/lib/components/settings/sections/MyDataSection.svelte +++ b/apps/mana/apps/web/src/lib/components/settings/sections/MyDataSection.svelte @@ -20,13 +20,7 @@ import DeleteConfirmationModal from '$lib/components/my-data/DeleteConfirmationModal.svelte'; import QRExportModal from '$lib/components/my-data/QRExportModal.svelte'; import { myDataService, type UserDataSummary } from '$lib/api/services/my-data'; - import { backupService } from '$lib/api/services/backup'; - import { - importBackup, - BackupImportError, - type ImportProgress, - type ImportResult, - } from '$lib/data/backup/import'; + import ExportImportPanel from '$lib/components/my-data/ExportImportPanel.svelte'; import type { DeleteUserDataResponse } from '$lib/api/services/admin'; import { authStore } from '$lib/stores/auth.svelte'; @@ -42,69 +36,6 @@ let showQRDialog = $state(false); - let backupLoading = $state(false); - let backupError = $state(null); - - let importInput = $state(null); - let importing = $state(false); - let importProgress = $state(null); - let importResult = $state(null); - let importError = $state(null); - - async function handleBackupDownload() { - backupLoading = true; - backupError = null; - try { - await backupService.downloadBackup(); - } catch (e) { - backupError = e instanceof Error ? e.message : 'Backup fehlgeschlagen'; - } finally { - backupLoading = false; - } - } - - async function handleImportFileChange(e: Event) { - const input = e.currentTarget as HTMLInputElement; - const file = input.files?.[0]; - input.value = ''; - if (!file) return; - - importing = true; - importError = null; - importResult = null; - importProgress = { phase: 'parsing', applied: 0, total: 0 }; - - try { - const result = await importBackup(file, { - onProgress: (p) => (importProgress = p), - }); - importResult = result; - } catch (e) { - if (e instanceof BackupImportError) { - importError = `${e.kind}: ${e.message}`; - } else { - importError = e instanceof Error ? e.message : 'Import fehlgeschlagen'; - } - } finally { - importing = false; - } - } - - function importProgressLabel(p: ImportProgress): string { - switch (p.phase) { - case 'parsing': - return 'Archiv wird entpackt…'; - case 'validating': - return 'Manifest & Integrität werden geprüft…'; - case 'applying': - return p.currentAppId - ? `Wende Events an (${p.applied}/${p.total}) — ${p.currentAppId}` - : `Wende Events an (${p.applied}/${p.total})`; - case 'done': - return `Fertig — ${p.applied} Events eingespielt`; - } - } - async function loadMyData() { loading = true; error = null; @@ -442,92 +373,8 @@ - - - -
-
-
-

Backup herunterladen

-

- ZIP mit Event-Stream + Integritäts-Hash. Sensible Felder bleiben verschlüsselt. -

-
- -
-
-
-

Backup einspielen

-

Nur Backups deines eigenen Accounts werden akzeptiert.

-
- -
-
- - - - {#if backupError} -

{backupError}

- {/if} - - {#if importProgress} -
-

{importProgressLabel(importProgress)}

- {#if importProgress.total > 0} -
-
-
- {/if} -
- {/if} - - {#if importResult} -

- - {importResult.appliedEvents} Events aus Backup vom {formatDate( - importResult.manifest.createdAt - )} eingespielt ({importResult.manifest.apps.length} Apps). -

- {/if} - - {#if importError} -

{importError}

- {/if} -
+ + diff --git a/apps/mana/apps/web/src/lib/data/DATA_LAYER_AUDIT.md b/apps/mana/apps/web/src/lib/data/DATA_LAYER_AUDIT.md index b041ed263..8114b3d84 100644 --- a/apps/mana/apps/web/src/lib/data/DATA_LAYER_AUDIT.md +++ b/apps/mana/apps/web/src/lib/data/DATA_LAYER_AUDIT.md @@ -501,95 +501,95 @@ Pre-existing Test-Failures (nicht von dieser Audit-Arbeit verursacht): Die Datenschicht ist jetzt **production-grade** in den Dimensionen Korrektheit, Sicherheit, **Vertraulichkeit** (inkl. optionaler **Zero-Knowledge-Modus**), Robustheit, Beobachtbarkeit, Performance und Testabdeckung. -## 8. Backup & Restore (Sync-Stream-Export) +## 8. Data Export / Import (v2, ab 2026-04-22) -Der Sync-Event-Log ist bereits eine saubere, LWW-geordnete, schema-versionierte Serialisierung aller Nutzerdaten — also nutzen wir ihn als Backup-Format statt eine zweite parallele Serializer-Schicht zu bauen. +Pre-launch Umbau: der alte server-seitige Sync-Stream-Export (`GET /backup/export`) ist weg. Data-Export ist jetzt **rein client-driven** — der Webapp liest seine lokale Dexie, entschlüsselt pro Feld, baut ein portables Snapshot-Archiv und bietet optional einen Passphrase-Wrap an. -### Architektur — eine Datei, beide Richtungen +### Warum Client-driven + +- **Zero-Knowledge-User** halten ihren Vault-Key ausschließlich client-seitig — ein Server-Exporter kann für sie prinzipiell kein Klartext-Archiv erzeugen. +- **GDPR Art. 20** (Datenportabilität) erwartet ein maschinenlesbares Format, das der Nutzer außerhalb des Anbieters auswerten kann. Ciphertext-Blob, den nur eine laufende Mana-Installation wieder aufschließen kann, erfüllt das nicht. +- **Modul-selektives Export** (nur Todo + Notes, nicht alles) ist intrinsisch eine Client-Entscheidung. Der Server hat kein Business zu wissen, welche Subset ein User rausgibt. + +### Architektur ``` EXPORT IMPORT ──────────────────────────────────────────── ──────────────────────────────────────────── -mana-sync DB .mana (ZIP) - └─ sync_changes WHERE user_id = $1 ├─ events.jsonl ──┐ - │ └─ manifest.json │ parseBackup() - ▼ ▼ - WriteBackup(w, userID, createdAt, iter) authStore.user.id match? ┐ - │ streams eventsSha256 match? │ validate - ├─ events.jsonl (JSON Lines) schemaVersionMax ≤ client?┘ - └─ manifest.json │ - ▼ - iterateEvents() → toSyncChange() - │ - ▼ - applyServerChanges(appId, batch) - │ (batches of 300) - ▼ - IndexedDB (via Dexie hooks, suppressed) +Dexie (this device, this session's vault) .mana Archiv + └─ iterate MODULE_CONFIGS[*].tables ├─ manifest.json + │ ├─ data/*.jsonl (oder) + ▼ └─ data.sealed (AES-GCM-gewrapped) + decryptRecords(table, rows) │ + │ ▼ + ▼ readBackup() → parseManifest() + build manifest + data/*.jsonl │ + │ optional: ▼ (falls gesealed) + ▼ seal(passphrase, innerBody) unseal(passphrase, sealed, wrap) + buildBackup / buildSealedBackup │ + │ ▼ + ▼ applyClientBackup: + .mana ZIP (hand-gerollt + pako deflate) delete row.userId (adoption durch Hook) + encryptRecord(table, row) ← mit ZIEL-Account-Key + db.table(table).bulkPut(prepared) ``` -Same-Account-Restore funktioniert ohne Server-Roundtrip: Events liegen schon auf mana-sync, LWW würde sowieso dedupen. Cross-Account-Migration (anderer User auf neuem Gerät) braucht den MK-Transfer-Pfad — siehe Backlog. +**Cross-Account-Restore funktioniert**, ohne dass ein Master-Key transferiert werden muss: Export entschlüsselt, Import re-verschlüsselt mit dem _neuen_ Vault-Key. Zero-Knowledge-User, die ihren Recovery-Code verloren haben, können sich so auch selbst wieder rein-restoren. -### `.mana`-Dateiformat (Version 1) +### `.mana`-Format (v2) -ZIP-Archiv mit genau zwei Einträgen, beide DEFLATE-komprimiert: +Hand-gerollter ZIP (PKZIP, Store + Deflate via `pako`), genau ein stabiler Header, zwei Payload-Formen: -| Entry | Inhalt | -| --------------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| `events.jsonl` | Eine JSON-Zeile pro `sync_changes`-Row, chronologisch | -| `manifest.json` | Header mit `formatVersion`, `schemaVersion`, `userId`, `eventCount`, `eventsSha256`, `apps[]`, `createdAt`, `schemaVersionMin/Max` | +| Entry | Plain-Export | Sealed-Export | +| -------------------- | -------------- | -------------- | +| `manifest.json` | ✅ lesbar | ✅ lesbar | +| `data/{table}.jsonl` | ✅ Plain-JSONL | — | +| `data.sealed` | — | ✅ AES-GCM-256 | +| `README.txt` | optional | optional | -**Event-Zeile**: +`manifest.json` trägt `formatVersion: 2`, `schemaVersion` (Dexie `db.verno`), `producedBy`, `exportedAt`, `userId`, `scope` (`full` oder `filtered` mit `appIds[]`), `rowCounts`, `fieldsPlaintext: true` und — bei Sealed — den `passphrase`-Block mit KDF-Parametern (`pbkdf2-sha256`, 600k Iterationen, 16-byte Salt, 12-byte IV). + +**JSONL-Zeile** (Plain): ```json -{"eventId":"uuid","schemaVersion":1,"appId":"todo","table":"tasks","id":"task-1","op":"update","data":{...},"fieldTimestamps":{...},"clientId":"...","createdAt":"2026-..."} +{"id":"task-1","userId":"...","title":"Einkaufen","order":3,"createdAt":"...","__fieldTimestamps":{...}} ``` -Verschlüsselte Felder bleiben Ciphertext — die `.mana`-Datei ist für die 27 Encryption-Registry-Tabellen **at-rest verschlüsselt**. Plaintext-Felder (IDs, Sort-Keys, Timestamps) stehen lesbar drin (GDPR-Portabilitäts-Anspruch). +Verschlüsselte Felder aus der Encryption-Registry sind **hier im Klartext** — der Sinn des Exports. Wer das Archiv trotzdem verschlüsselt haben will, aktiviert den Passphrase-Wrap. -### Protokoll-Stability-Contract (M2, pre-launch gehärtet) +### Passphrase-Seal -Ab v1 sind diese Felder unveränderlich im Event-Shape: - -- `eventId: uuid` — stabiler Primary-Key, client-seitiger Dedup -- `schemaVersion: number` — ermöglicht Migration-Chain für künftige Protokoll-Änderungen -- `op: "insert" | "update" | "delete"` — Vokabular eingefroren -- `fields` = kanonisch für LWW-Merges, `data` = Snapshot-only für Inserts -- Tombstones (Deletes) bleiben für immer in `sync_changes` — sonst kein vollständiges Backup - -**Pre-M2-Clients** (kein `schemaVersion` auf dem Wire) werden server-seitig auf v1 geklemmt. Ein Client mit `schemaVersion > MaxSupported` wird mit 400 abgelehnt. - -### Encryption-Boundary bleibt intakt - -Der Backup-Pfad **berührt nie Plaintext**: - -1. Feld-Level-Ciphertext liegt bereits verschlüsselt in `sync_changes.data` -2. `WriteBackup` liest Bytes 1:1 und streamt sie in den ZIP -3. Import-Seite ruft `applyServerChanges()` — das gleiche Pfad, den Live-Sync benutzt — was in IndexedDB landet, fließt durch den normalen `decryptRecords()`-Pfad beim Lesen, nicht beim Schreiben - -Zero-Knowledge-User: bis zum MK-Transfer-Pfad (M5) können sie sich selbst restoren (gleicher Account, gleicher Recovery-Code schon aktiv) — aber kein Account-Wechsel ohne Recovery-Code. +- **KDF**: PBKDF2-SHA256, 600 000 Iterationen (OWASP 2024) +- **Cipher**: AES-GCM-256 +- **Integrity**: GCM-AuthTag + separater sha256 über den Plaintext-Body → bei falscher Passphrase wirft `unseal()` `PassphraseError` (mit freundlicher Meldung), bei echter Korruption `BackupParseError` +- **Min-Länge**: UI erzwingt 12 Zeichen vor dem Aufruf ### Dateien -| Pfad | Rolle | -| ------------------------------------------------------------------- | ---------------------------------------------------------------------------------------- | -| `services/mana-sync/internal/backup/writer.go` | Pure `WriteBackup()` — streaming ZIP + sha256-Tee | -| `services/mana-sync/internal/backup/handler.go` | HTTP-Shim für `GET /backup/export` (auth-only, kein billing-gate) | -| `services/mana-sync/internal/backup/writer_test.go` | 4 Go-Tests (Round-Trip, empty, legacy-v0-clamping) | -| `services/mana-sync/internal/store/postgres.go` | `StreamAllUserChanges()` — cursor-freier Stream über alle Events eines Users, RLS-scoped | -| `apps/mana/apps/web/src/lib/data/backup/format.ts` | Hand-gerollter ZIP-Parser + sha256-Recompute (nutzt `pako` für Inflate) | -| `apps/mana/apps/web/src/lib/data/backup/import.ts` | Replay-Logik: validate → iterate → batch → `applyServerChanges` | -| `apps/mana/apps/web/src/lib/data/backup/format.test.ts` | 8 Vitest-Tests für den Parser (synthetische PKZIP-Bytes) | -| `apps/mana/apps/web/src/lib/api/services/backup.ts` | Browser-seitiger Download-Helper | -| `apps/mana/apps/web/src/routes/(app)/settings/my-data/+page.svelte` | UI: Download + File-Picker + Progress | +| Pfad | Rolle | +| ------------------------------------------------------------------------------ | -------------------------------------------------------------------- | +| `apps/mana/apps/web/src/lib/data/backup/v2/format.ts` | PKZIP-Writer + Reader, Manifest-Schema, CRC32, sha256-Helper | +| `apps/mana/apps/web/src/lib/data/backup/v2/passphrase.ts` | `seal()` / `unseal()` — PBKDF2 + AES-GCM via Web Crypto | +| `apps/mana/apps/web/src/lib/data/backup/v2/export.ts` | `buildClientBackup()` — walk MODULE_CONFIGS → decryptRecords → JSONL | +| `apps/mana/apps/web/src/lib/data/backup/v2/import.ts` | `applyClientBackup()` — strip userId → encryptRecord → `bulkPut` | +| `apps/mana/apps/web/src/lib/components/my-data/ExportImportPanel.svelte` | UI: Modul-Auswahl, Passphrase-Toggle, Progress, Sealed-File-Prompt | +| `apps/mana/apps/web/src/lib/components/settings/sections/MyDataSection.svelte` | Mount-Point in Settings | -### Offene Punkte (Backup-Backlog) +### Encryption-Boundary -- **M5 (Cross-Account-Restore)**: `manifest.encryption.mkWrap` mit KEK-wrapped MK befüllen; neuer `POST /me/vault/import-mk` in `mana-auth`; Zero-Knowledge-Pfad via Recovery-Code-Eingabe beim Import -- **M4b (Bulk-Ingest-Endpoint)**: `POST /sync/{appId}/ingest` damit importierte Events auch server-seitig auf dem neuen Account landen (nur relevant bei Cross-Account) -- **Signatur**: Ed25519 über `manifest.json` gegen Tampering — heute nur sha256 über events.jsonl -- **Resumable Download**: Multi-GB-Accounts werden irgendwann fraglich im Browser -- **`_appliedEventIds` Dedup-Tabelle**: Performance-Optimierung für Re-Import (heute macht LWW den Dedup, aber wir verarbeiten trotzdem jedes Event) +Der neue Pfad **bricht die at-rest Boundary bewusst**: der Exporter entschlüsselt, bevor er in die JSONL schreibt. Das ist ausdrücklich Teil der Zweckbestimmung (Portabilität). Wer das nicht will, sealt mit Passphrase — dann ist das Archiv `.sealed` und außerhalb von Mana unbrauchbar, aber auch für jemanden mit Zugriff auf die Datei ohne Passphrase. + +### Schema-Compat + +Der Importer akzeptiert Archive mit `schemaVersion ∈ [current - 2, current]`. Exports aus der Zukunft (User hat Mana downgegradet) werden abgelehnt. Unbekannte Tabellen (Modul wurde seither entfernt) werden still übersprungen, nicht als Fehler behandelt. + +### Ausdrücklich nicht übernommen aus v1 + +- Kein server-seitiger `/backup/export` mehr — Route + `services/mana-sync/internal/backup/` wurde in einem Rutsch entfernt, keine Parallelpfade. +- Keine `sync_changes`-Event-Stream-Serialisierung im Archiv — direkter Dexie-Snapshot ist für den Use-Case "Daten mitnehmen / sichern" ehrlicher und kleiner. +- Kein MK-Wrap-Transfer — Cross-Account funktioniert durch Re-Encryption mit dem Ziel-Vault-Key, nicht durch Key-Transplant. + +Plan: [`docs/plans/data-export-v2.md`](../../../../../../docs/plans/data-export-v2.md). ## 9. Actor-Attribution & AI-Workbench (ab 2026-04-14) diff --git a/apps/mana/apps/web/src/lib/data/backup/format.test.ts b/apps/mana/apps/web/src/lib/data/backup/format.test.ts deleted file mode 100644 index 8961d3cca..000000000 --- a/apps/mana/apps/web/src/lib/data/backup/format.test.ts +++ /dev/null @@ -1,231 +0,0 @@ -/** - * Tests for the hand-rolled .mana (zip) parser. - * - * The parser is the only untrusted-input frontier in the backup flow — a - * corrupt archive should raise a clear BackupParseError, never silently - * drop events. To exercise it without running mana-sync we build synthetic - * archives in-memory: deflate the entries with `pako` (same lib used at - * runtime), assemble local headers + central directory + EOCD per PKZIP - * spec, and feed the result as a Blob. - */ - -import { describe, it, expect } from 'vitest'; -import { deflateRaw } from 'pako'; -import { BackupParseError, iterateEvents, parseBackup, type BackupManifest } from './format'; - -const SIG_LOCAL = 0x04034b50; -const SIG_CENTRAL = 0x02014b50; -const SIG_EOCD = 0x06054b50; - -interface EntrySpec { - name: string; - body: Uint8Array; - method: 0 | 8; // 0 = store, 8 = deflate -} - -/** - * Build a minimal valid PKZIP archive from the given entries. Good enough - * to exercise parseBackup's central-directory walk. CRC32 is left zero — - * the parser does not verify it (sha256 on the uncompressed content plays - * that role at a higher level). - */ -function buildZip(entries: EntrySpec[]): Uint8Array { - const parts: Uint8Array[] = []; - const central: Uint8Array[] = []; - let offset = 0; - - for (const e of entries) { - const nameBytes = new TextEncoder().encode(e.name); - const data = e.method === 8 ? deflateRaw(e.body) : e.body; - - // Local file header - const localHeader = new Uint8Array(30); - const lv = new DataView(localHeader.buffer); - lv.setUint32(0, SIG_LOCAL, true); - lv.setUint16(4, 20, true); // version needed - lv.setUint16(6, 0, true); // flags - lv.setUint16(8, e.method, true); - lv.setUint16(10, 0, true); // mtime - lv.setUint16(12, 0, true); // mdate - lv.setUint32(14, 0, true); // crc32 (ignored by parser) - lv.setUint32(18, data.length, true); // compressed size - lv.setUint32(22, e.body.length, true); // uncompressed size - lv.setUint16(26, nameBytes.length, true); - lv.setUint16(28, 0, true); // extra len - - parts.push(localHeader, nameBytes, data); - const localHeaderOffset = offset; - offset += localHeader.length + nameBytes.length + data.length; - - // Central directory entry - const cdHeader = new Uint8Array(46); - const cv = new DataView(cdHeader.buffer); - cv.setUint32(0, SIG_CENTRAL, true); - cv.setUint16(4, 20, true); // version made by - cv.setUint16(6, 20, true); // version needed - cv.setUint16(8, 0, true); // flags - cv.setUint16(10, e.method, true); - cv.setUint32(16, 0, true); // crc32 - cv.setUint32(20, data.length, true); - cv.setUint32(24, e.body.length, true); - cv.setUint16(28, nameBytes.length, true); - cv.setUint32(42, localHeaderOffset, true); - central.push(cdHeader, nameBytes); - } - - const centralStart = offset; - for (const c of central) { - parts.push(c); - offset += c.length; - } - const centralSize = offset - centralStart; - - const eocd = new Uint8Array(22); - const ev = new DataView(eocd.buffer); - ev.setUint32(0, SIG_EOCD, true); - ev.setUint16(8, entries.length, true); // entries on this disk - ev.setUint16(10, entries.length, true); // total entries - ev.setUint32(12, centralSize, true); - ev.setUint32(16, centralStart, true); - parts.push(eocd); - - const total = parts.reduce((n, p) => n + p.length, 0); - const out = new Uint8Array(total); - let p = 0; - for (const part of parts) { - out.set(part, p); - p += part.length; - } - return out; -} - -async function sha256Hex(bytes: Uint8Array): Promise { - const copy = new Uint8Array(bytes); - const digest = await crypto.subtle.digest('SHA-256', copy.buffer); - return [...new Uint8Array(digest)].map((b) => b.toString(16).padStart(2, '0')).join(''); -} - -function eventsBody(): { jsonl: string; bytes: Uint8Array } { - const lines = [ - { - eventId: 'e-1', - schemaVersion: 1, - appId: 'todo', - table: 'tasks', - id: 'task-1', - op: 'insert', - data: { title: 'Buy milk' }, - clientId: 'c-1', - createdAt: '2026-04-14T10:00:00.000Z', - }, - { - eventId: 'e-2', - schemaVersion: 1, - appId: 'todo', - table: 'tasks', - id: 'task-1', - op: 'update', - data: { completed: true }, - fieldTimestamps: { completed: '2026-04-14T10:05:00.000Z' }, - clientId: 'c-1', - createdAt: '2026-04-14T10:05:00.000Z', - }, - ]; - const jsonl = lines.map((l) => JSON.stringify(l)).join('\n') + '\n'; - return { jsonl, bytes: new TextEncoder().encode(jsonl) }; -} - -async function buildBackup(overrides: Partial = {}): Promise { - const { jsonl, bytes: eventsBytes } = eventsBody(); - const sha = await sha256Hex(eventsBytes); - const manifest: BackupManifest = { - formatVersion: 1, - schemaVersion: 1, - userId: 'user-123', - createdAt: '2026-04-14T10:05:30.000Z', - eventCount: 2, - eventsSha256: sha, - apps: ['todo'], - producedBy: 'test', - schemaVersionMin: 1, - schemaVersionMax: 1, - ...overrides, - }; - const manifestBytes = new TextEncoder().encode(JSON.stringify(manifest, null, 2)); - const zip = buildZip([ - { name: 'events.jsonl', body: eventsBytes, method: 8 }, - { name: 'manifest.json', body: manifestBytes, method: 8 }, - ]); - // Hold the jsonl text alive so tests can also grep the raw body. - void jsonl; - return new Blob([zip], { type: 'application/zip' }); -} - -describe('parseBackup', () => { - it('round-trips a two-event archive and matches sha256', async () => { - const blob = await buildBackup(); - const parsed = await parseBackup(blob); - - expect(parsed.manifest.userId).toBe('user-123'); - expect(parsed.manifest.eventCount).toBe(2); - expect(parsed.manifest.apps).toEqual(['todo']); - expect(parsed.computedEventsSha256).toBe(parsed.manifest.eventsSha256); - - const events = [...iterateEvents(parsed.eventsJsonl)]; - expect(events).toHaveLength(2); - expect(events[0].op).toBe('insert'); - expect(events[1].op).toBe('update'); - expect(events[1].fieldTimestamps?.completed).toBe('2026-04-14T10:05:00.000Z'); - }); - - it('rejects archive with wrong formatVersion', async () => { - const blob = await buildBackup({ formatVersion: 99 }); - await expect(parseBackup(blob)).rejects.toThrow(BackupParseError); - await expect(parseBackup(blob)).rejects.toThrow(/formatVersion/); - }); - - it('rejects archive missing events.jsonl', async () => { - const manifest = new TextEncoder().encode(JSON.stringify({ formatVersion: 1 })); - const zip = buildZip([{ name: 'manifest.json', body: manifest, method: 8 }]); - await expect(parseBackup(new Blob([zip]))).rejects.toThrow(/events\.jsonl/); - }); - - it('rejects archive missing manifest.json', async () => { - const { bytes } = eventsBody(); - const zip = buildZip([{ name: 'events.jsonl', body: bytes, method: 8 }]); - await expect(parseBackup(new Blob([zip]))).rejects.toThrow(/manifest\.json/); - }); - - it('rejects non-zip input', async () => { - await expect(parseBackup(new Blob([new Uint8Array([1, 2, 3, 4])]))).rejects.toThrow( - /valid zip/ - ); - }); - - it('surfaces sha mismatch by returning a different computed hash', async () => { - // Mutating the manifest's claimed sha should not mutate the computed - // one — the importer compares the two and fails loudly. Here we just - // verify the parser reports both so the comparison is possible. - const blob = await buildBackup({ eventsSha256: 'deadbeef' }); - const parsed = await parseBackup(blob); - expect(parsed.manifest.eventsSha256).toBe('deadbeef'); - expect(parsed.computedEventsSha256).not.toBe('deadbeef'); - }); -}); - -describe('iterateEvents', () => { - it('skips blank lines and parses each row', () => { - const jsonl = - '{"eventId":"a","schemaVersion":1,"appId":"x","table":"t","id":"1","op":"insert","clientId":"c","createdAt":"2026-04-14T00:00:00Z"}\n' + - '\n' + - '{"eventId":"b","schemaVersion":1,"appId":"x","table":"t","id":"2","op":"insert","clientId":"c","createdAt":"2026-04-14T00:00:01Z"}\n'; - const events = [...iterateEvents(jsonl)]; - expect(events).toHaveLength(2); - expect(events[0].eventId).toBe('a'); - expect(events[1].eventId).toBe('b'); - }); - - it('throws on malformed JSON', () => { - expect(() => [...iterateEvents('{broken\n')]).toThrow(/parse failed/); - }); -}); diff --git a/apps/mana/apps/web/src/lib/data/backup/format.ts b/apps/mana/apps/web/src/lib/data/backup/format.ts deleted file mode 100644 index 4d643db2e..000000000 --- a/apps/mana/apps/web/src/lib/data/backup/format.ts +++ /dev/null @@ -1,259 +0,0 @@ -/** - * .mana archive parser — client side. - * - * mana-sync emits a small, well-defined zip (archive/zip) with exactly two - * entries: events.jsonl and manifest.json, both DEFLATE-compressed, no - * encryption, no multi-part, no Zip64. That narrow scope means we can hand- - * roll the parser against the central-directory record format rather than - * pull in a ~20KB zip dependency. - * - * Inflate itself runs through `pako`, which the repo already uses for - * spiral-db and qr-export PNG compression — so no new dependency is added. - * - * The parser is structured so the importer can stream events.jsonl line by - * line without materializing the entire (potentially large) decompressed - * body, though at this file-size scale we do decompress-to-string for - * simplicity. If users ever ship multi-GB backups we can swap the jsonl - * entry for a chunk iterator without changing the public surface. - */ - -import { inflateRaw } from 'pako'; - -export const BACKUP_FORMAT_VERSION = 1; -export const BACKUP_FILENAME_EXT = '.mana'; - -/** - * Everything from manifest.json, plus the decoded events.jsonl body. Kept - * tight so it round-trips cleanly through the import UI without pulling any - * extra zip-format leakage into the rest of the app. - */ -export interface ParsedBackup { - manifest: BackupManifest; - eventsJsonl: string; - /** Re-computed sha256 of the uncompressed events.jsonl; hex string. */ - computedEventsSha256: string; -} - -export interface BackupManifest { - formatVersion: number; - schemaVersion: number; - userId: string; - createdAt: string; - eventCount: number; - eventsSha256: string; - apps: string[]; - producedBy?: string; - schemaVersionMin?: number; - schemaVersionMax?: number; -} - -export interface BackupEvent { - eventId: string; - schemaVersion: number; - appId: string; - table: string; - id: string; - op: 'insert' | 'update' | 'delete'; - data?: Record; - fieldTimestamps?: Record; - clientId: string; - createdAt: string; -} - -// ─── Public API ───────────────────────────────────────────────── - -/** - * Parse a .mana file into its manifest + raw events.jsonl. Also re-hashes - * the decompressed events body with SHA-256 so the caller can compare - * against manifest.eventsSha256 for integrity. - */ -export async function parseBackup(file: Blob): Promise { - const buf = new Uint8Array(await file.arrayBuffer()); - const entries = readZipEntries(buf); - - const manifestEntry = entries.get('manifest.json'); - const eventsEntry = entries.get('events.jsonl'); - if (!manifestEntry) throw new BackupParseError('missing manifest.json in archive'); - if (!eventsEntry) throw new BackupParseError('missing events.jsonl in archive'); - - const manifestText = new TextDecoder().decode(inflateEntry(manifestEntry)); - let manifest: BackupManifest; - try { - manifest = JSON.parse(manifestText); - } catch (e) { - throw new BackupParseError(`manifest.json is not valid JSON: ${(e as Error).message}`); - } - validateManifest(manifest); - - const eventsBytes = inflateEntry(eventsEntry); - const eventsJsonl = new TextDecoder().decode(eventsBytes); - - const computedEventsSha256 = await sha256Hex(eventsBytes); - - return { manifest, eventsJsonl, computedEventsSha256 }; -} - -/** - * Yield events from the JSONL body one at a time. Skips blank lines; throws - * on a non-parseable row so corruption is not silently masked. Returns a - * generator so the caller can stream apply-batches without loading all - * events into a single array. - */ -export function* iterateEvents(jsonl: string): Generator { - let start = 0; - while (start < jsonl.length) { - const nl = jsonl.indexOf('\n', start); - const end = nl === -1 ? jsonl.length : nl; - const line = jsonl.slice(start, end).trim(); - start = end + 1; - if (!line) continue; - try { - yield JSON.parse(line) as BackupEvent; - } catch (e) { - throw new BackupParseError(`events.jsonl line parse failed: ${(e as Error).message}`); - } - } -} - -export class BackupParseError extends Error { - constructor(message: string) { - super(message); - this.name = 'BackupParseError'; - } -} - -// ─── Validation ───────────────────────────────────────────────── - -function validateManifest(m: unknown): asserts m is BackupManifest { - if (!m || typeof m !== 'object') throw new BackupParseError('manifest must be an object'); - const o = m as Record; - if (typeof o.formatVersion !== 'number') - throw new BackupParseError('manifest.formatVersion missing'); - if (o.formatVersion !== BACKUP_FORMAT_VERSION) { - throw new BackupParseError( - `unsupported backup formatVersion ${o.formatVersion} (this build supports ${BACKUP_FORMAT_VERSION})` - ); - } - if (typeof o.userId !== 'string' || !o.userId) - throw new BackupParseError('manifest.userId missing'); - if (typeof o.eventsSha256 !== 'string' || !o.eventsSha256) - throw new BackupParseError('manifest.eventsSha256 missing'); - if (typeof o.eventCount !== 'number') throw new BackupParseError('manifest.eventCount missing'); - if (!Array.isArray(o.apps)) throw new BackupParseError('manifest.apps missing'); -} - -// ─── Zip parser (central directory only) ─────────────────────── -// -// ZIP structure we rely on: -// End Of Central Directory Record (EOCD) at the tail -// Central Directory entries (one per file) -// Local File Header + data for each file, earlier in the stream -// -// We locate EOCD, walk the central directory, and for each entry seek to -// the local header to read the actual compressed payload. This is the -// standard "seek-by-central-dir" approach and matches what libraries like -// fflate and jszip do internally. - -interface ZipEntry { - nameUtf8: string; - method: number; // 0 = stored, 8 = deflate - crc32: number; - compressedSize: number; - uncompressedSize: number; - localHeaderOffset: number; - source: Uint8Array; // full archive buffer, held so inflate can seek -} - -const SIG_EOCD = 0x06054b50; -const SIG_CENTRAL = 0x02014b50; -const SIG_LOCAL = 0x04034b50; - -function readZipEntries(buf: Uint8Array): Map { - const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength); - - // Find EOCD by scanning backward from the tail. The comment field is up - // to 65535 bytes, so in the worst case we scan 65557 bytes — fine. - const eocdOffset = findEOCD(view); - if (eocdOffset < 0) throw new BackupParseError('not a valid zip archive (no EOCD)'); - - const entryCount = view.getUint16(eocdOffset + 10, true); - const cdOffset = view.getUint32(eocdOffset + 16, true); - - const entries = new Map(); - let p = cdOffset; - for (let i = 0; i < entryCount; i++) { - if (view.getUint32(p, true) !== SIG_CENTRAL) { - throw new BackupParseError('central directory entry signature mismatch'); - } - const method = view.getUint16(p + 10, true); - const crc32 = view.getUint32(p + 16, true); - const compressedSize = view.getUint32(p + 20, true); - const uncompressedSize = view.getUint32(p + 24, true); - const nameLen = view.getUint16(p + 28, true); - const extraLen = view.getUint16(p + 30, true); - const commentLen = view.getUint16(p + 32, true); - const localHeaderOffset = view.getUint32(p + 42, true); - const nameUtf8 = new TextDecoder('utf-8').decode(buf.subarray(p + 46, p + 46 + nameLen)); - - entries.set(nameUtf8, { - nameUtf8, - method, - crc32, - compressedSize, - uncompressedSize, - localHeaderOffset, - source: buf, - }); - - p += 46 + nameLen + extraLen + commentLen; - } - return entries; -} - -function findEOCD(view: DataView): number { - const maxCommentLen = 65535; - const minOffset = Math.max(0, view.byteLength - 22 - maxCommentLen); - for (let i = view.byteLength - 22; i >= minOffset; i--) { - if (view.getUint32(i, true) === SIG_EOCD) return i; - } - return -1; -} - -function inflateEntry(entry: ZipEntry): Uint8Array { - const buf = entry.source; - const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength); - const p = entry.localHeaderOffset; - if (view.getUint32(p, true) !== SIG_LOCAL) { - throw new BackupParseError(`local header signature mismatch for ${entry.nameUtf8}`); - } - const nameLen = view.getUint16(p + 26, true); - const extraLen = view.getUint16(p + 28, true); - const dataStart = p + 30 + nameLen + extraLen; - const compressed = buf.subarray(dataStart, dataStart + entry.compressedSize); - - switch (entry.method) { - case 0: - return compressed.slice(); - case 8: - return inflateRaw(compressed); - default: - throw new BackupParseError(`unsupported zip compression method ${entry.method}`); - } -} - -// ─── SHA-256 ──────────────────────────────────────────────────── - -async function sha256Hex(bytes: Uint8Array): Promise { - // Copy into a fresh ArrayBuffer so subtle.digest is happy regardless of - // whether the input is backed by SharedArrayBuffer — the DOM typings - // refuse ArrayBufferLike unions even though runtime accepts them. - const copy = new Uint8Array(bytes.byteLength); - copy.set(bytes); - const digest = await crypto.subtle.digest('SHA-256', copy.buffer); - const hex: string[] = []; - const view = new Uint8Array(digest); - for (let i = 0; i < view.length; i++) { - hex.push(view[i].toString(16).padStart(2, '0')); - } - return hex.join(''); -} diff --git a/apps/mana/apps/web/src/lib/data/backup/import.ts b/apps/mana/apps/web/src/lib/data/backup/import.ts deleted file mode 100644 index 4ba98a587..000000000 --- a/apps/mana/apps/web/src/lib/data/backup/import.ts +++ /dev/null @@ -1,218 +0,0 @@ -/** - * Backup import — streams a .mana archive into IndexedDB. - * - * Flow: - * - * 1. parseBackup() unzips the container and re-hashes events.jsonl. - * 2. validate manifest: - * - formatVersion supported (enforced inside parseBackup) - * - userId matches the currently signed-in user (refuse otherwise — - * accidental restore into someone else's account would be a privacy - * disaster) - * - eventsSha256 matches the recomputed hash (integrity) - * 3. iterate events, group by appId, apply in batches via the existing - * applyServerChanges() path. That function already handles LWW, type - * guards, suppressed hooks, and quota recovery — reusing it means - * imported events can never diverge from the server's own apply logic. - * - * Idempotency: applyServerChanges is LWW-safe, so re-running import with - * the same file is a no-op beyond wasted work. A future optimization will - * write eventIds into a _appliedEventIds dedup table, but the LWW semantics - * already make the operation safe today. - * - * Scope (M4a): same-account restore. Events originate from mana-sync for - * this user; after import, IndexedDB is repopulated without re-pushing to - * the server (server already has every event, LWW would dedupe anyway). - * Cross-account migration requires the MK transfer path (M5). - */ - -import { applyServerChanges, type SyncChange } from '$lib/data/sync'; -import { authStore } from '$lib/stores/auth.svelte'; -import { iterateEvents, parseBackup, type BackupEvent, type ParsedBackup } from './format'; - -/** Emitted periodically during import so the UI can drive a progress bar. */ -export interface ImportProgress { - phase: 'parsing' | 'validating' | 'applying' | 'done'; - applied: number; - total: number; - currentAppId?: string; -} - -export interface ImportOptions { - /** - * If true, skip the eventsSha256 integrity check. Reserved for CLI - * debugging — production UI should always leave this false. - */ - skipIntegrityCheck?: boolean; - /** - * Called after each batch so the UI can render progress. Called at - * least once with phase='done' on successful completion. - */ - onProgress?: (p: ImportProgress) => void; -} - -export interface ImportResult { - manifest: ParsedBackup['manifest']; - appliedEvents: number; - perApp: Record; -} - -export class BackupImportError extends Error { - constructor( - message: string, - public readonly kind: - | 'parse' - | 'user-mismatch' - | 'integrity' - | 'schema-too-new' - | 'not-authenticated' - | 'apply' - ) { - super(message); - this.name = 'BackupImportError'; - } -} - -const APPLY_BATCH_SIZE = 300; - -// Mirrors CURRENT_SCHEMA_VERSION in sync.ts. We can't import the constant -// here without pulling sync.ts into every code path, but a tiny duplicate -// keyed on the same const is easier to audit than a transitive import. -// Update in lockstep when bumping the protocol version. -const MAX_SUPPORTED_IMPORT_SCHEMA_VERSION = 1; - -/** - * Import a user-provided .mana file into IndexedDB. Throws on user-mismatch, - * integrity failure, or unsupported schema version. Callers should catch - * BackupImportError and surface `kind` to the UI so the user gets a - * specific error message instead of a generic "import failed". - */ -export async function importBackup(file: File, opts: ImportOptions = {}): Promise { - const { onProgress, skipIntegrityCheck = false } = opts; - - const currentUserId = authStore.user?.id; - if (!currentUserId) { - throw new BackupImportError( - 'not signed in — log in before importing a backup', - 'not-authenticated' - ); - } - - onProgress?.({ phase: 'parsing', applied: 0, total: 0 }); - let parsed: ParsedBackup; - try { - parsed = await parseBackup(file); - } catch (e) { - throw new BackupImportError(`parse failed: ${(e as Error).message}`, 'parse'); - } - const { manifest, eventsJsonl, computedEventsSha256 } = parsed; - - onProgress?.({ phase: 'validating', applied: 0, total: manifest.eventCount }); - - if (manifest.userId !== currentUserId) { - throw new BackupImportError( - `backup is for user ${manifest.userId}, but you are signed in as ${currentUserId}`, - 'user-mismatch' - ); - } - - if (!skipIntegrityCheck && manifest.eventsSha256 !== computedEventsSha256) { - throw new BackupImportError( - `events.jsonl integrity check failed (manifest=${manifest.eventsSha256}, computed=${computedEventsSha256})`, - 'integrity' - ); - } - - const highestSeen = manifest.schemaVersionMax ?? manifest.schemaVersion; - if (highestSeen > MAX_SUPPORTED_IMPORT_SCHEMA_VERSION) { - throw new BackupImportError( - `backup contains events at schemaVersion=${highestSeen}; this build only supports up to ${MAX_SUPPORTED_IMPORT_SCHEMA_VERSION}. Update the app and try again.`, - 'schema-too-new' - ); - } - - // ─── Replay ─────────────────────────────────────────────── - // Group by appId inside each batch so applyServerChanges can scope its - // per-table apply lock tightly. Batches are kept small enough to stay - // responsive (progress reports every 300 events) but large enough that - // the per-call overhead doesn't dominate. - const perApp: Record = {}; - let applied = 0; - - const batch: Record = {}; - let batchCount = 0; - - const flush = async () => { - for (const [appId, changes] of Object.entries(batch)) { - if (changes.length === 0) continue; - onProgress?.({ phase: 'applying', applied, total: manifest.eventCount, currentAppId: appId }); - try { - await applyServerChanges(appId, changes); - } catch (e) { - throw new BackupImportError( - `apply failed for app=${appId}: ${(e as Error).message}`, - 'apply' - ); - } - perApp[appId] = (perApp[appId] ?? 0) + changes.length; - applied += changes.length; - batch[appId] = []; - } - batchCount = 0; - }; - - for (const event of iterateEvents(eventsJsonl)) { - const change = toSyncChange(event); - if (!batch[event.appId]) batch[event.appId] = []; - batch[event.appId].push(change); - batchCount++; - if (batchCount >= APPLY_BATCH_SIZE) { - await flush(); - } - } - if (batchCount > 0) await flush(); - - onProgress?.({ phase: 'done', applied, total: manifest.eventCount }); - - return { manifest, appliedEvents: applied, perApp }; -} - -// ─── Event → SyncChange mapping ───────────────────────────────── -// The backup JSONL stores raw-store shape (data + fieldTimestamps). The -// sync-engine's SyncChange uses folded shape (fields: { key: { value, -// updatedAt } }) for updates. This mirrors the server-side projection in -// mana-sync's changeFromRow. - -function toSyncChange(event: BackupEvent): SyncChange { - const base: SyncChange = { - eventId: event.eventId, - schemaVersion: event.schemaVersion, - table: event.table, - id: event.id, - op: event.op, - }; - - switch (event.op) { - case 'insert': - base.data = event.data ?? {}; - break; - case 'update': - if (event.data && event.fieldTimestamps) { - const fields: Record = {}; - for (const [key, updatedAt] of Object.entries(event.fieldTimestamps)) { - if (key in event.data) { - fields[key] = { value: event.data[key], updatedAt }; - } - } - base.fields = fields; - } - break; - case 'delete': { - const deletedAt = event.data?.deletedAt; - if (typeof deletedAt === 'string') base.deletedAt = deletedAt; - break; - } - } - - return base; -} diff --git a/apps/mana/apps/web/src/lib/data/backup/v2/export.ts b/apps/mana/apps/web/src/lib/data/backup/v2/export.ts new file mode 100644 index 000000000..cc3e17964 --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/backup/v2/export.ts @@ -0,0 +1,164 @@ +/** + * Client-driven export: read Dexie tables → decrypt per-field → package + * as `.mana` v2 archive → optional passphrase-wrap. + * + * Public surface: `buildClientBackup({ appIds?, passphrase?, … })`. + * The export traverses `MODULE_CONFIGS` to decide which Dexie tables to + * include, so a new module that registers a ModuleConfig is exported + * automatically. Per-field decryption is delegated to the existing + * `decryptRecords()` — that way the exporter can't accidentally emit + * plaintext for a different field than the importer re-encrypts. + */ + +import { db } from '$lib/data/database'; +import { decryptRecords } from '$lib/data/crypto'; +import { MODULE_CONFIGS } from '$lib/data/module-registry'; +import { authStore } from '$lib/stores/auth.svelte'; +import { buildBackup, buildSealedBackup, buildSealedDataBody } from './format'; +import type { BackupManifestV2, BackupScope, PassphraseWrap } from './format'; +import { seal } from './passphrase'; + +export interface ExportOptions { + /** AppIds to include. If omitted or empty, all registered modules + * are exported (scope.type = 'full'). */ + appIds?: string[]; + /** When set, the archive is passphrase-wrapped. Min 12 chars; the UI + * should enforce that before calling. */ + passphrase?: string; + /** Called after each table so the UI can render progress. */ + onProgress?: (p: ExportProgress) => void; + /** Override the version string embedded in the manifest. Tests set + * this to keep fixtures stable. */ + producedBy?: string; +} + +export interface ExportProgress { + phase: 'collecting' | 'packaging' | 'sealing' | 'done'; + tablesProcessed: number; + totalTables: number; + currentTable?: string; +} + +export interface ExportResult { + blob: Blob; + filename: string; + rowCounts: Record; +} + +/** Small README that accompanies the archive — non-binding, informational. */ +const README = `Mana Data Export + +This archive was produced by Mana's "Export & Import" feature. Contents: + +- manifest.json — format version, which modules are inside, row counts, + optional passphrase metadata. +- data/*.jsonl — one line per row, JSON-encoded. Encrypted fields were + decrypted at export time; plain strings here. +- data.sealed — present iff the manifest declares a passphrase. An + AES-GCM-256 blob over the data/ payload; see the + manifest.passphrase block for KDF params. + +Re-importing into another Mana account re-encrypts automatically using +that account's vault key. Without a vault, the plain JSONL is directly +readable in any text editor / jq / Python. +`; + +export async function buildClientBackup(opts: ExportOptions = {}): Promise { + const userId = authStore.user?.id ?? 'unknown'; + + // Resolve scope — either user-provided appId list (filtered) or all. + const filter = opts.appIds && opts.appIds.length > 0 ? new Set(opts.appIds) : null; + const scope: BackupScope = filter ? { type: 'filtered', appIds: [...filter] } : { type: 'full' }; + + // Flatten the module configs into a list of { table, appId } so we + // can walk it linearly (and get accurate progress counts). + const tableTargets: { table: string; appId: string }[] = []; + for (const mod of MODULE_CONFIGS) { + if (filter && !filter.has(mod.appId)) continue; + for (const t of mod.tables) tableTargets.push({ table: t.name, appId: mod.appId }); + } + + const totalTables = tableTargets.length; + const tables: Record[]> = {}; + const rowCounts: Record = {}; + + for (let i = 0; i < tableTargets.length; i++) { + const { table } = tableTargets[i]; + opts.onProgress?.({ + phase: 'collecting', + tablesProcessed: i, + totalTables, + currentTable: table, + }); + + const rows = await readTable(table); + tables[table] = rows; + rowCounts[table] = rows.length; + } + + // Build manifest. `fieldsPlaintext: true` is always correct for this + // export path — we decrypted on the way in. + const manifest: BackupManifestV2 = { + formatVersion: 2, + schemaVersion: getSchemaVersion(), + producedBy: opts.producedBy ?? 'mana-web', + exportedAt: new Date().toISOString(), + userId, + scope, + rowCounts, + fieldsPlaintext: true, + }; + + opts.onProgress?.({ phase: 'packaging', tablesProcessed: totalTables, totalTables }); + + let archive: Uint8Array; + if (opts.passphrase) { + opts.onProgress?.({ phase: 'sealing', tablesProcessed: totalTables, totalTables }); + const innerBody = buildSealedDataBody(tables); + const { sealed, wrap } = await seal(opts.passphrase, innerBody); + manifest.passphrase = wrap satisfies PassphraseWrap; + archive = buildSealedBackup(manifest, sealed, README); + } else { + archive = await buildBackup({ manifest, tables, readme: README }); + } + + opts.onProgress?.({ phase: 'done', tablesProcessed: totalTables, totalTables }); + + const filename = defaultFilename(scope, !!opts.passphrase); + const blob = new Blob([archive as unknown as ArrayBuffer], { type: 'application/octet-stream' }); + return { blob, filename, rowCounts }; +} + +/** + * Pull every non-deleted row from a Dexie table and decrypt the fields + * in the encryption registry. Missing tables (e.g. a module removed its + * ModuleConfig but the registry iter still asks for it during a stale + * build) are tolerated with an empty array — the exporter should never + * crash on a schema drift. + */ +async function readTable(table: string): Promise[]> { + let rawRows: Record[]; + try { + rawRows = (await db.table(table).toArray()) as Record[]; + } catch { + return []; + } + // Keep tombstoned (deletedAt) rows out of the export — the receiving + // device has no use for them and they just balloon the file size. + const live = rawRows.filter((row) => !(row as { deletedAt?: unknown }).deletedAt); + return decryptRecords(table, live); +} + +function defaultFilename(scope: BackupScope, sealed: boolean): string { + const date = new Date().toISOString().slice(0, 10); + const scopeTag = scope.type === 'full' ? 'full' : scope.appIds.join('-'); + const sealTag = sealed ? '.sealed' : ''; + return `mana-${scopeTag}-${date}${sealTag}.mana`; +} + +function getSchemaVersion(): number { + // Dexie exposes verno on the opened db. If the db isn't open yet for + // some reason, fall back to 0 — the importer uses schemaVersion only + // as a compat guard, 0 will just match the older-backup branch there. + return (db.verno as number | undefined) ?? 0; +} diff --git a/apps/mana/apps/web/src/lib/data/backup/v2/format.test.ts b/apps/mana/apps/web/src/lib/data/backup/v2/format.test.ts new file mode 100644 index 000000000..f779e41bd --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/backup/v2/format.test.ts @@ -0,0 +1,140 @@ +/** + * Unit tests for the v2 `.mana` format layer. These run pure — no Dexie, + * no per-field crypto, no SvelteKit. The goal is coverage on: + * + * 1. PKZIP round-trip: buildBackup → readBackup recovers manifest + rows + * 2. The sealed-but-still-packaged path (`buildSealedBackup` → + * `readBackup` returns `SealedBackupV2` without trying to decrypt) + * 3. Manifest validation rejects junk with a specific error class + * 4. Passphrase wrap/unwrap round-trip, including the + * wrong-passphrase → `PassphraseError` signal + * + * If we regress any of these four, the export/import feature is broken + * in a way that the UI-level tests wouldn't catch (because they'd just + * see "archive invalid" without pinpointing which layer failed). + */ + +import { describe, it, expect } from 'vitest'; +import { + BACKUP_FORMAT_VERSION, + BackupParseError, + buildBackup, + buildSealedBackup, + buildSealedDataBody, + parseManifest, + parseSealedData, + readBackup, + type BackupManifestV2, +} from './format'; +import { PassphraseError, seal, unseal } from './passphrase'; + +function sampleManifest(overrides: Partial = {}): BackupManifestV2 { + return { + formatVersion: 2, + schemaVersion: 33, + producedBy: 'mana-web/test', + exportedAt: '2026-04-22T12:00:00.000Z', + userId: 'user-1', + scope: { type: 'full' }, + rowCounts: { todos: 2, notes: 1 }, + fieldsPlaintext: true, + ...overrides, + }; +} + +const sampleTables = { + todos: [ + { id: 'a', title: 'walk the dog', done: false }, + { id: 'b', title: 'buy coffee', done: true }, + ], + notes: [{ id: 'n-1', body: 'hello world' }], +}; + +describe('format: unsealed round-trip', () => { + it('recovers manifest + tables through buildBackup → readBackup', async () => { + const manifest = sampleManifest(); + const archive = await buildBackup({ manifest, tables: sampleTables, readme: 'hi' }); + const blob = new Blob([archive as unknown as ArrayBuffer]); + + const parsed = await readBackup(blob); + if ('sealedData' in parsed) throw new Error('expected unsealed'); + + expect(parsed.manifest.formatVersion).toBe(BACKUP_FORMAT_VERSION); + expect(parsed.manifest.userId).toBe('user-1'); + expect(parsed.tables.todos).toEqual(sampleTables.todos); + expect(parsed.tables.notes).toEqual(sampleTables.notes); + }); + + it('tolerates an empty table', async () => { + const manifest = sampleManifest({ rowCounts: { todos: 0 } }); + const archive = await buildBackup({ manifest, tables: { todos: [] } }); + const parsed = await readBackup(new Blob([archive as unknown as ArrayBuffer])); + if ('sealedData' in parsed) throw new Error('expected unsealed'); + expect(parsed.tables.todos).toEqual([]); + }); +}); + +describe('format: parseManifest', () => { + it('rejects non-JSON', () => { + expect(() => parseManifest('{ not json')).toThrow(BackupParseError); + }); + + it('rejects formatVersion !== 2', () => { + expect(() => parseManifest(JSON.stringify({ ...sampleManifest(), formatVersion: 1 }))).toThrow( + /unsupported backup formatVersion/ + ); + }); + + it('rejects missing userId', () => { + const m: Record = { ...sampleManifest() }; + delete m.userId; + expect(() => parseManifest(JSON.stringify(m))).toThrow(BackupParseError); + }); +}); + +describe('format: sealed path', () => { + it('readBackup returns SealedBackupV2 for passphrase-wrapped archives', async () => { + const plainBody = buildSealedDataBody(sampleTables); + const { sealed, wrap } = await seal('correct-horse-battery', plainBody); + const manifest = sampleManifest({ passphrase: wrap }); + const outer = buildSealedBackup(manifest, sealed); + + const parsed = await readBackup(new Blob([outer as unknown as ArrayBuffer])); + if (!('sealedData' in parsed)) throw new Error('expected sealed'); + expect(parsed.manifest.passphrase).toBeDefined(); + expect(parsed.sealedData.byteLength).toBe(sealed.byteLength); + }); + + it('round-trips through seal → unseal → parseSealedData', async () => { + const plainBody = buildSealedDataBody(sampleTables); + const pass = 'correct-horse-battery-staple'; + const { sealed, wrap } = await seal(pass, plainBody); + const manifest = sampleManifest({ passphrase: wrap }); + const unsealedBody = await unseal(pass, sealed, wrap); + const parsed = await parseSealedData(manifest, unsealedBody); + expect(parsed.tables.todos).toEqual(sampleTables.todos); + expect(parsed.tables.notes).toEqual(sampleTables.notes); + }); +}); + +describe('passphrase: failure modes', () => { + it('throws PassphraseError on wrong passphrase', async () => { + const plainBody = buildSealedDataBody(sampleTables); + const { sealed, wrap } = await seal('right-one', plainBody); + await expect(unseal('wrong-one', sealed, wrap)).rejects.toBeInstanceOf(PassphraseError); + }); + + it('throws PassphraseError on integrity mismatch after correct decrypt', async () => { + const plainBody = buildSealedDataBody(sampleTables); + const { sealed, wrap } = await seal('p', plainBody); + // Manifest claims a different plaintext hash than what we actually + // have — simulates a tampered archive where the attacker kept the + // ciphertext valid but swapped the manifest. + const tamperedWrap = { ...wrap, plaintextSha256: 'a'.repeat(64) }; + await expect(unseal('p', sealed, tamperedWrap)).rejects.toBeInstanceOf(PassphraseError); + }); + + it('throws PassphraseError on empty passphrase', async () => { + await expect(seal('', new Uint8Array([1, 2, 3]))).rejects.toBeInstanceOf(PassphraseError); + }); +}); diff --git a/apps/mana/apps/web/src/lib/data/backup/v2/format.ts b/apps/mana/apps/web/src/lib/data/backup/v2/format.ts new file mode 100644 index 000000000..b6b859dda --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/backup/v2/format.ts @@ -0,0 +1,465 @@ +/** + * .mana v2 format — snapshot-based client-driven backup. + * + * v2 breaks from v1 (event-stream from mana-sync) and instead packages + * the current-state rows of selected Dexie tables as one `.jsonl` per + * table, wrapped in a tiny ZIP container. Optional passphrase-wrapping + * folds the `data/` portion into a single AES-GCM blob under a + * PBKDF2-derived key — see `passphrase.ts`. + * + * This file owns: + * - the type surface (`BackupManifestV2`, `ParsedBackupV2`, …) + * - CRC32 + SHA-256 integrity helpers + * - the ZIP reader + writer + * - a narrow public API: `buildZip`, `readZip`, `parseManifest` + * + * It deliberately does NOT know about Dexie tables, per-field crypto, + * or module registry. Those concerns live in `export.ts` + `import.ts` + * — keeping this file as a pure byte-and-bit layer makes it trivial + * to unit-test. + */ + +import { inflateRaw, deflateRaw } from 'pako'; + +export const BACKUP_FORMAT_VERSION = 2; +export const BACKUP_FILENAME_EXT = '.mana'; + +// ─── Manifest types ─────────────────────────────────────────────── + +export type BackupScope = { type: 'full' } | { type: 'filtered'; appIds: string[] }; + +export interface PassphraseWrap { + kdf: 'PBKDF2-SHA256'; + /** OWASP 2023 recommendation: 600k. */ + kdfIterations: number; + /** 16 random bytes, base64-url. */ + kdfSaltBase64: string; + cipher: 'AES-GCM-256'; + /** 12 random bytes, base64-url. */ + ivBase64: string; + /** SHA-256 hex of the plaintext body (= deflate-compressed `data.tar`). + * Lets the importer detect passphrase-typo failures even though the + * AEAD tag already does — we surface a clear "wrong passphrase" vs. + * "corrupted file" distinction by comparing the hash AFTER successful + * decrypt. */ + plaintextSha256: string; +} + +export interface BackupManifestV2 { + formatVersion: 2; + /** Dexie schema version at export time. */ + schemaVersion: number; + /** "mana-web/1.2.3" or similar. Informational. */ + producedBy: string; + exportedAt: string; + userId: string; + scope: BackupScope; + /** rowCounts[tableName] = number. Non-authoritative; the actual jsonl + * content wins if there's a discrepancy. Used for quick UI summary. */ + rowCounts: Record; + /** true = the `data/` jsonls contain plaintext values (Mana's per-field + * crypto already reversed). false is reserved for a future flag where a + * client emits cipher-preserving dumps (not built yet). */ + fieldsPlaintext: boolean; + /** Present iff the archive is passphrase-wrapped. */ + passphrase?: PassphraseWrap; +} + +/** Unpacked archive ready for the importer to chew on. */ +export interface ParsedBackupV2 { + manifest: BackupManifestV2; + /** Map from table name → array of row objects. Present when the archive + * is unencrypted OR has been unwrapped by the caller. */ + tables: Record[]>; +} + +/** Intermediate when the archive is passphrase-wrapped and not yet unlocked. */ +export interface SealedBackupV2 { + manifest: BackupManifestV2; + /** The encrypted tarball — to be decrypted by `passphrase.ts` */ + sealedData: Uint8Array; +} + +// ─── Public API ────────────────────────────────────────────────── + +export class BackupParseError extends Error { + constructor(message: string) { + super(message); + this.name = 'BackupParseError'; + } +} + +/** + * Read a `.mana` v2 blob. Returns either the fully-parsed tables (when + * unencrypted) or a `SealedBackupV2` that the caller must decrypt via + * `passphrase.ts` and then feed back in through `parseSealedData`. + */ +export async function readBackup(file: Blob): Promise { + const buf = new Uint8Array(await file.arrayBuffer()); + const entries = readZipEntries(buf); + + const manifestEntry = entries.get('manifest.json'); + if (!manifestEntry) throw new BackupParseError('missing manifest.json'); + const manifest = parseManifest(new TextDecoder().decode(inflateEntry(manifestEntry))); + + if (manifest.passphrase) { + const sealed = entries.get('data.sealed'); + if (!sealed) + throw new BackupParseError('manifest declares passphrase but data.sealed is missing'); + return { manifest, sealedData: inflateEntry(sealed) }; + } + + const tables = await collectTables(entries); + return { manifest, tables }; +} + +/** + * Parse the raw plaintext bytes produced by `passphrase.unseal()` into a + * ParsedBackupV2. Only called by the import pipeline after a successful + * passphrase unwrap. + */ +export async function parseSealedData( + manifest: BackupManifestV2, + plaintextBody: Uint8Array +): Promise { + const entries = readZipEntries(plaintextBody); + const tables = await collectTables(entries); + return { manifest, tables }; +} + +export interface BuildInput { + manifest: BackupManifestV2; + /** Map from table name → array of row objects. Rows should already have + * their encrypted fields decrypted (the export pipeline handles that). */ + tables: Record[]>; + readme?: string; +} + +/** + * Build an unencrypted `.mana` v2 archive. Caller-side callers pass + * already-decrypted rows. For passphrase-wrapped archives, call this to + * get the inner zip, then hand it to `passphrase.seal()` to produce the + * outer wrapper. + */ +export async function buildBackup(input: BuildInput): Promise { + const enc = new TextEncoder(); + const entries: EntrySpec[] = []; + + // manifest.json first so the reader can short-circuit on malformed + // archives without decompressing the data payload. + entries.push({ + name: 'manifest.json', + body: enc.encode(JSON.stringify(input.manifest, null, 2)), + }); + + for (const [table, rows] of Object.entries(input.tables)) { + const jsonl = rows.map((r) => JSON.stringify(r)).join('\n') + (rows.length > 0 ? '\n' : ''); + entries.push({ name: `data/${table}.jsonl`, body: enc.encode(jsonl) }); + } + + if (input.readme) { + entries.push({ name: 'README.md', body: enc.encode(input.readme) }); + } + + return buildZip(entries); +} + +/** + * Build the *inner* data-only zip — used by the passphrase path to + * produce the blob that gets encrypted into `data.sealed`. Same row + * format as the unsealed archive, just without the manifest or README. + */ +export function buildSealedDataBody(tables: Record[]>): Uint8Array { + const enc = new TextEncoder(); + const entries: EntrySpec[] = []; + for (const [table, rows] of Object.entries(tables)) { + const jsonl = rows.map((r) => JSON.stringify(r)).join('\n') + (rows.length > 0 ? '\n' : ''); + entries.push({ name: `data/${table}.jsonl`, body: enc.encode(jsonl) }); + } + return buildZip(entries); +} + +/** + * Assemble the outer archive when the inner body is passphrase-wrapped. + * The wrapped bytes get stored as `data.sealed` (uncompressed — already + * high-entropy ciphertext, deflate gains nothing). + */ +export function buildSealedBackup( + manifest: BackupManifestV2, + sealedData: Uint8Array, + readme?: string +): Uint8Array { + const enc = new TextEncoder(); + const entries: EntrySpec[] = [ + { + name: 'manifest.json', + body: enc.encode(JSON.stringify(manifest, null, 2)), + }, + { name: 'data.sealed', body: sealedData, method: STORED }, + ]; + if (readme) entries.push({ name: 'README.md', body: enc.encode(readme) }); + return buildZip(entries); +} + +export function parseManifest(json: string): BackupManifestV2 { + let raw: unknown; + try { + raw = JSON.parse(json); + } catch (e) { + throw new BackupParseError(`manifest.json is not valid JSON: ${(e as Error).message}`); + } + if (!raw || typeof raw !== 'object') { + throw new BackupParseError('manifest must be an object'); + } + const o = raw as Record; + if (o.formatVersion !== 2) { + throw new BackupParseError( + `unsupported backup formatVersion ${String(o.formatVersion)} (this build supports v${BACKUP_FORMAT_VERSION})` + ); + } + if (typeof o.schemaVersion !== 'number') + throw new BackupParseError('manifest.schemaVersion missing'); + if (typeof o.userId !== 'string' || !o.userId) + throw new BackupParseError('manifest.userId missing'); + if (typeof o.exportedAt !== 'string') throw new BackupParseError('manifest.exportedAt missing'); + if (!o.scope || typeof o.scope !== 'object') throw new BackupParseError('manifest.scope missing'); + if (!o.rowCounts || typeof o.rowCounts !== 'object') + throw new BackupParseError('manifest.rowCounts missing'); + if (typeof o.fieldsPlaintext !== 'boolean') + throw new BackupParseError('manifest.fieldsPlaintext missing'); + return raw as BackupManifestV2; +} + +// ─── Hashes ────────────────────────────────────────────────────── + +export async function sha256Hex(bytes: Uint8Array): Promise { + // Copy so subtle.digest gets a plain ArrayBuffer (DOM typings reject + // SharedArrayBuffer-backed views even though runtime accepts them). + const copy = new Uint8Array(bytes.byteLength); + copy.set(bytes); + const digest = await crypto.subtle.digest('SHA-256', copy.buffer); + return bytesToHex(new Uint8Array(digest)); +} + +function bytesToHex(bytes: Uint8Array): string { + const out: string[] = []; + for (let i = 0; i < bytes.length; i++) out.push(bytes[i].toString(16).padStart(2, '0')); + return out.join(''); +} + +// ─── Zip reader + writer ───────────────────────────────────────── +// +// Narrow but spec-compliant implementation: LocalFileHeader + CentralDir + +// EOCD. Supports deflate (method 8) + stored (method 0). No Zip64, no +// encryption at the zip level (we do our own AEAD above), no multi-part. +// Writes valid CRC32 so other tools (Finder, unzip, fflate) accept the +// output too — v1's test helper skipped this. + +const SIG_LOCAL = 0x04034b50; +const SIG_CENTRAL = 0x02014b50; +const SIG_EOCD = 0x06054b50; +const DEFLATE = 8; +const STORED = 0; + +interface EntrySpec { + name: string; + body: Uint8Array; + method?: typeof DEFLATE | typeof STORED; +} + +interface ZipEntry { + nameUtf8: string; + method: number; + crc32: number; + compressedSize: number; + uncompressedSize: number; + localHeaderOffset: number; + source: Uint8Array; +} + +export function buildZip(entries: EntrySpec[]): Uint8Array { + const parts: Uint8Array[] = []; + const central: Uint8Array[] = []; + let offset = 0; + + for (const e of entries) { + const method = e.method ?? DEFLATE; + const nameBytes = new TextEncoder().encode(e.name); + const data = method === DEFLATE ? deflateRaw(e.body) : e.body; + const crc = crc32(e.body); + + // Local file header (30 bytes fixed + name + extra) + const lh = new Uint8Array(30); + const lv = new DataView(lh.buffer); + lv.setUint32(0, SIG_LOCAL, true); + lv.setUint16(4, 20, true); // version needed + lv.setUint16(6, 0, true); // flags + lv.setUint16(8, method, true); + lv.setUint16(10, 0, true); // mtime + lv.setUint16(12, 0, true); // mdate + lv.setUint32(14, crc, true); + lv.setUint32(18, data.length, true); + lv.setUint32(22, e.body.length, true); + lv.setUint16(26, nameBytes.length, true); + lv.setUint16(28, 0, true); // extra len + + parts.push(lh, nameBytes, data); + const localHeaderOffset = offset; + offset += lh.length + nameBytes.length + data.length; + + // Central directory entry (46 bytes fixed + name + extra + comment) + const cd = new Uint8Array(46); + const cv = new DataView(cd.buffer); + cv.setUint32(0, SIG_CENTRAL, true); + cv.setUint16(4, 20, true); // version made by + cv.setUint16(6, 20, true); // version needed + cv.setUint16(8, 0, true); // flags + cv.setUint16(10, method, true); + cv.setUint32(16, crc, true); + cv.setUint32(20, data.length, true); + cv.setUint32(24, e.body.length, true); + cv.setUint16(28, nameBytes.length, true); + cv.setUint32(42, localHeaderOffset, true); + central.push(cd, nameBytes); + } + + const centralStart = offset; + for (const c of central) { + parts.push(c); + offset += c.length; + } + const centralSize = offset - centralStart; + + const eocd = new Uint8Array(22); + const ev = new DataView(eocd.buffer); + ev.setUint32(0, SIG_EOCD, true); + ev.setUint16(8, entries.length, true); + ev.setUint16(10, entries.length, true); + ev.setUint32(12, centralSize, true); + ev.setUint32(16, centralStart, true); + parts.push(eocd); + + const total = parts.reduce((n, p) => n + p.length, 0); + const out = new Uint8Array(total); + let p = 0; + for (const part of parts) { + out.set(part, p); + p += part.length; + } + return out; +} + +function readZipEntries(buf: Uint8Array): Map { + const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength); + const eocdOffset = findEOCD(view); + if (eocdOffset < 0) throw new BackupParseError('not a valid zip archive (no EOCD)'); + + const entryCount = view.getUint16(eocdOffset + 10, true); + const cdOffset = view.getUint32(eocdOffset + 16, true); + + const entries = new Map(); + let p = cdOffset; + for (let i = 0; i < entryCount; i++) { + if (view.getUint32(p, true) !== SIG_CENTRAL) { + throw new BackupParseError('central directory entry signature mismatch'); + } + const method = view.getUint16(p + 10, true); + const crc32Val = view.getUint32(p + 16, true); + const compressedSize = view.getUint32(p + 20, true); + const uncompressedSize = view.getUint32(p + 24, true); + const nameLen = view.getUint16(p + 28, true); + const extraLen = view.getUint16(p + 30, true); + const commentLen = view.getUint16(p + 32, true); + const localHeaderOffset = view.getUint32(p + 42, true); + const nameUtf8 = new TextDecoder('utf-8').decode(buf.subarray(p + 46, p + 46 + nameLen)); + + entries.set(nameUtf8, { + nameUtf8, + method, + crc32: crc32Val, + compressedSize, + uncompressedSize, + localHeaderOffset, + source: buf, + }); + + p += 46 + nameLen + extraLen + commentLen; + } + return entries; +} + +function findEOCD(view: DataView): number { + const maxCommentLen = 65535; + const minOffset = Math.max(0, view.byteLength - 22 - maxCommentLen); + for (let i = view.byteLength - 22; i >= minOffset; i--) { + if (view.getUint32(i, true) === SIG_EOCD) return i; + } + return -1; +} + +function inflateEntry(entry: ZipEntry): Uint8Array { + const buf = entry.source; + const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength); + const p = entry.localHeaderOffset; + if (view.getUint32(p, true) !== SIG_LOCAL) { + throw new BackupParseError(`local header signature mismatch for ${entry.nameUtf8}`); + } + const nameLen = view.getUint16(p + 26, true); + const extraLen = view.getUint16(p + 28, true); + const dataStart = p + 30 + nameLen + extraLen; + const compressed = buf.subarray(dataStart, dataStart + entry.compressedSize); + + switch (entry.method) { + case STORED: + return compressed.slice(); + case DEFLATE: + return inflateRaw(compressed); + default: + throw new BackupParseError(`unsupported zip compression method ${entry.method}`); + } +} + +async function collectTables( + entries: Map +): Promise[]>> { + const tables: Record[]> = {}; + const dec = new TextDecoder(); + for (const [name, entry] of entries) { + if (!name.startsWith('data/') || !name.endsWith('.jsonl')) continue; + const tableName = name.slice('data/'.length, -'.jsonl'.length); + if (!tableName || tableName.includes('/')) continue; // defensive: skip nested / empty + const text = dec.decode(inflateEntry(entry)); + const rows: Record[] = []; + for (const line of text.split('\n')) { + const trimmed = line.trim(); + if (!trimmed) continue; + try { + rows.push(JSON.parse(trimmed) as Record); + } catch (e) { + throw new BackupParseError(`data/${tableName}.jsonl parse failed: ${(e as Error).message}`); + } + } + tables[tableName] = rows; + } + return tables; +} + +// ─── CRC32 (IEEE-802.3) ────────────────────────────────────────── + +const CRC32_TABLE = (() => { + const table = new Uint32Array(256); + for (let i = 0; i < 256; i++) { + let c = i; + for (let k = 0; k < 8; k++) c = c & 1 ? 0xedb88320 ^ (c >>> 1) : c >>> 1; + table[i] = c >>> 0; + } + return table; +})(); + +function crc32(bytes: Uint8Array): number { + let crc = 0xffffffff; + for (let i = 0; i < bytes.length; i++) { + crc = CRC32_TABLE[(crc ^ bytes[i]) & 0xff] ^ (crc >>> 8); + } + return (crc ^ 0xffffffff) >>> 0; +} diff --git a/apps/mana/apps/web/src/lib/data/backup/v2/import.ts b/apps/mana/apps/web/src/lib/data/backup/v2/import.ts new file mode 100644 index 000000000..ce4504605 --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/backup/v2/import.ts @@ -0,0 +1,175 @@ +/** + * Client-driven import: unseal (if needed) → re-encrypt per field → + * `bulkPut` into Dexie. + * + * The reverse of `export.ts`. A few non-obvious decisions: + * + * - **Per-row re-encryption uses `encryptRecord(table, row)`**. That + * walks `ENCRYPTION_REGISTRY[table].fields` and wraps each listed + * field. Because the exporter fully decrypted the fields, the + * importer sees them as plain strings regardless of which key they + * were encrypted under before — which is exactly what enables + * cross-account migration. + * + * - **bulkPut, not bulkAdd**. Same-id rows overwrite the local copy. + * That matches LWW semantics for the common "restore over a fresh + * install" case. If the user imports into an account that already + * has some content, they might lose unseen-on-this-device edits — + * we surface that in the confirmation UI, not the import logic. + * + * - **Unknown tables are skipped, not fatal**. A backup might carry + * a `wisekeep` table from when that module existed; if the current + * build dropped it, we silently ignore the rows rather than block + * the restore. + * + * - **ownerId is NOT overwritten**. Dexie's creating-hook stamps the + * current session's userId onto any row that doesn't carry one. We + * delete `userId` from the incoming rows to force that stamp — so + * cross-account restores don't leak the source userId. + */ + +import { db } from '$lib/data/database'; +import { encryptRecord } from '$lib/data/crypto'; +import { MODULE_CONFIGS } from '$lib/data/module-registry'; +import { + BackupParseError, + parseSealedData, + readBackup, + type BackupManifestV2, + type ParsedBackupV2, +} from './format'; +import { PassphraseError, unseal } from './passphrase'; + +export interface ImportOptions { + /** Required if the manifest declares a passphrase-wrap. */ + passphrase?: string; + /** Progress callback — fires per table. */ + onProgress?: (p: ImportProgress) => void; +} + +export interface ImportProgress { + phase: 'parsing' | 'unsealing' | 'applying' | 'done'; + tablesProcessed: number; + totalTables: number; + currentTable?: string; +} + +export interface ImportResult { + manifest: BackupManifestV2; + /** Rows actually applied, by table. Skipped tables not listed. */ + appliedPerTable: Record; + totalApplied: number; + /** Tables that were in the archive but aren't in the current build. */ + skippedTables: string[]; +} + +export async function applyClientBackup( + file: Blob, + opts: ImportOptions = {} +): Promise { + opts.onProgress?.({ phase: 'parsing', tablesProcessed: 0, totalTables: 0 }); + + const parsed = await readBackup(file); + + // Unwrap passphrase-sealed archives. + let data: ParsedBackupV2; + if ('sealedData' in parsed) { + if (!parsed.manifest.passphrase) { + throw new BackupParseError('archive contains data.sealed but manifest.passphrase is missing'); + } + if (!opts.passphrase) { + throw new PassphraseError('archive is passphrase-protected but no passphrase was provided'); + } + opts.onProgress?.({ phase: 'unsealing', tablesProcessed: 0, totalTables: 0 }); + const inner = await unseal(opts.passphrase, parsed.sealedData, parsed.manifest.passphrase); + data = await parseSealedData(parsed.manifest, inner); + } else { + data = parsed; + } + + // Compat-check. formatVersion was already checked in parseManifest; + // guard against schema drift here. + if (!isSchemaCompatible(data.manifest.schemaVersion)) { + throw new BackupParseError( + `archive schema v${data.manifest.schemaVersion} is not compatible with this build — ` + + `update Mana or re-export from a matching version` + ); + } + + const knownTables = collectKnownTables(); + const entries = Object.entries(data.tables); + const totalTables = entries.length; + const appliedPerTable: Record = {}; + const skippedTables: string[] = []; + let totalApplied = 0; + + for (let i = 0; i < entries.length; i++) { + const [table, rows] = entries[i]; + opts.onProgress?.({ + phase: 'applying', + tablesProcessed: i, + totalTables, + currentTable: table, + }); + + if (!knownTables.has(table)) { + skippedTables.push(table); + continue; + } + + if (rows.length === 0) { + appliedPerTable[table] = 0; + continue; + } + + const prepared: Record[] = []; + for (const row of rows) { + // Strip the source user's id so the Dexie creating-hook stamps + // the current session's userId. This is what makes cross-account + // restores work correctly — the imported rows are "adopted" by + // the importing user. + const clone = { ...row } as Record; + delete clone.userId; + await encryptRecord(table, clone); + prepared.push(clone); + } + + await db.table(table).bulkPut(prepared); + appliedPerTable[table] = prepared.length; + totalApplied += prepared.length; + } + + opts.onProgress?.({ phase: 'done', tablesProcessed: totalTables, totalTables }); + + return { + manifest: data.manifest, + appliedPerTable, + totalApplied, + skippedTables, + }; +} + +/** + * Collect every table name declared by the currently-built modules. + * Rows for tables NOT in this set are skipped (logged, not crashed). + */ +function collectKnownTables(): Set { + const out = new Set(); + for (const mod of MODULE_CONFIGS) for (const t of mod.tables) out.add(t.name); + return out; +} + +/** + * Minimal schema-compat gate. Policy: accept exports from the current + * Dexie version and up to two versions older. Two rationale points: + * 1. Anything older has likely had a destructive migration step we + * can't replay client-side. + * 2. Exports from the FUTURE (user downgraded Mana) are outright + * refused — we have no idea what fields might have been added. + */ +function isSchemaCompatible(schemaVersion: number): boolean { + const current = (db.verno as number | undefined) ?? 0; + if (schemaVersion > current) return false; + if (schemaVersion === 0) return true; // producedBy reported 'unknown' — be lenient + return current - schemaVersion <= 2; +} diff --git a/apps/mana/apps/web/src/lib/data/backup/v2/passphrase.ts b/apps/mana/apps/web/src/lib/data/backup/v2/passphrase.ts new file mode 100644 index 000000000..0bec358ed --- /dev/null +++ b/apps/mana/apps/web/src/lib/data/backup/v2/passphrase.ts @@ -0,0 +1,166 @@ +/** + * Passphrase-based wrap/unwrap for `.mana` v2 archives. + * + * Design: + * - KDF: PBKDF2-HMAC-SHA256, 600k iterations (OWASP 2023 guidance). + * - AEAD: AES-GCM-256 (Web Crypto native, same primitive as the + * per-field vault crypto). + * - 16-byte random salt per archive, 12-byte random IV per archive + * (GCM standard). + * - SHA-256 of the plaintext body goes into the manifest so a + * wrong-passphrase failure is distinguishable from file corruption: + * * AEAD auth-tag mismatch → "that passphrase doesn't open this" + * * AEAD OK but sha256 mismatch → "archive is corrupted" + * + * We use Web Crypto exclusively — no argon2-browser / scrypt-js deps. + * If Argon2id is desired later it's an additive manifest field: + * manifest.passphrase.kdf === 'Argon2id-v1.3' → new code path. + * + * Memory: the whole body sits in memory during wrap/unwrap. For a 20 MB + * snapshot that's fine; if we ever ship GB-class datasets we'd stream, + * but Web Crypto's one-shot encrypt API would have to grow along with it. + */ + +import { sha256Hex, type PassphraseWrap } from './format'; + +export const KDF_ITERATIONS = 600_000; +const SALT_BYTES = 16; +const IV_BYTES = 12; + +export interface SealResult { + sealed: Uint8Array; + wrap: PassphraseWrap; +} + +/** + * Encrypt `plaintextBody` under a key derived from `passphrase`. Returns + * the ciphertext and the manifest `passphrase` block the caller should + * stamp onto the BackupManifestV2. + */ +export async function seal(passphrase: string, plaintextBody: Uint8Array): Promise { + if (!passphrase) throw new PassphraseError('passphrase must not be empty'); + + const salt = crypto.getRandomValues(new Uint8Array(SALT_BYTES)); + const iv = crypto.getRandomValues(new Uint8Array(IV_BYTES)); + const key = await deriveKey(passphrase, salt, KDF_ITERATIONS); + const plaintextSha256 = await sha256Hex(plaintextBody); + + const copy = new Uint8Array(plaintextBody.byteLength); + copy.set(plaintextBody); + const ct = await crypto.subtle.encrypt({ name: 'AES-GCM', iv: toBuffer(iv) }, key, copy.buffer); + + return { + sealed: new Uint8Array(ct), + wrap: { + kdf: 'PBKDF2-SHA256', + kdfIterations: KDF_ITERATIONS, + kdfSaltBase64: bytesToBase64Url(salt), + cipher: 'AES-GCM-256', + ivBase64: bytesToBase64Url(iv), + plaintextSha256, + }, + }; +} + +/** + * Decrypt a sealed body. Throws one of two specific errors: + * - `PassphraseError`: wrong passphrase (AEAD tag mismatch) OR the + * recovered body's sha256 doesn't match the manifest + * - regular Error: malformed wrap metadata (invalid base64 etc.) + */ +export async function unseal( + passphrase: string, + sealed: Uint8Array, + wrap: PassphraseWrap +): Promise { + if (!passphrase) throw new PassphraseError('passphrase must not be empty'); + if (wrap.kdf !== 'PBKDF2-SHA256') { + throw new Error(`unsupported KDF "${wrap.kdf}"`); + } + if (wrap.cipher !== 'AES-GCM-256') { + throw new Error(`unsupported cipher "${wrap.cipher}"`); + } + + const salt = base64UrlToBytes(wrap.kdfSaltBase64); + const iv = base64UrlToBytes(wrap.ivBase64); + const key = await deriveKey(passphrase, salt, wrap.kdfIterations); + + let plaintextBuf: ArrayBuffer; + try { + const copy = new Uint8Array(sealed.byteLength); + copy.set(sealed); + plaintextBuf = await crypto.subtle.decrypt( + { name: 'AES-GCM', iv: toBuffer(iv) }, + key, + copy.buffer + ); + } catch { + // AES-GCM auth-tag failure. Most likely the user typed the wrong + // passphrase. We don't leak any ciphertext info. + throw new PassphraseError('wrong passphrase'); + } + + const plaintext = new Uint8Array(plaintextBuf); + const actualSha = await sha256Hex(plaintext); + if (actualSha !== wrap.plaintextSha256) { + throw new PassphraseError('archive integrity check failed after decrypt — file is corrupted'); + } + return plaintext; +} + +export class PassphraseError extends Error { + constructor(message: string) { + super(message); + this.name = 'PassphraseError'; + } +} + +// ─── Internals ─────────────────────────────────────────────────── + +async function deriveKey( + passphrase: string, + salt: Uint8Array, + iterations: number +): Promise { + const passphraseBytes = new TextEncoder().encode(passphrase); + const baseKey = await crypto.subtle.importKey('raw', toBuffer(passphraseBytes), 'PBKDF2', false, [ + 'deriveKey', + ]); + return crypto.subtle.deriveKey( + { + name: 'PBKDF2', + salt: toBuffer(salt), + iterations, + hash: 'SHA-256', + }, + baseKey, + { name: 'AES-GCM', length: 256 }, + false, + ['encrypt', 'decrypt'] + ); +} + +function toBuffer(bytes: Uint8Array): ArrayBuffer { + // Make a fresh ArrayBuffer — the DOM typings refuse SharedArrayBuffer- + // backed views even though Web Crypto accepts them at runtime. + const copy = new Uint8Array(bytes.byteLength); + copy.set(bytes); + return copy.buffer; +} + +// Base64-URL: fits cleanly into JSON manifests, no padding, no +/ conflicts. + +function bytesToBase64Url(bytes: Uint8Array): string { + let binary = ''; + for (let i = 0; i < bytes.length; i++) binary += String.fromCharCode(bytes[i]); + return btoa(binary).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, ''); +} + +function base64UrlToBytes(b64url: string): Uint8Array { + const b64 = b64url.replace(/-/g, '+').replace(/_/g, '/'); + const padded = b64 + '==='.slice((b64.length + 3) % 4); + const binary = atob(padded); + const out = new Uint8Array(binary.length); + for (let i = 0; i < binary.length; i++) out[i] = binary.charCodeAt(i); + return out; +} diff --git a/docs/plans/data-export-v2.md b/docs/plans/data-export-v2.md new file mode 100644 index 000000000..65f8b3e5f --- /dev/null +++ b/docs/plans/data-export-v2.md @@ -0,0 +1,309 @@ +# Data Export / Import — v2 + +## Status (2026-04-22) + +Proposed. Ersetzt den bisherigen server-cipher-Backup-Pfad (noch +nicht GA, niemand hat Produktionsdaten davon erstellt) durch ein +einziges client-getriebenes Export/Import-System. + +## Ziel + +Ein Mana-User kann seine Daten (ganz oder modulweise) als portable, +menschenlesbare Datei exportieren und wieder importieren. Das System +ist: + +- **Ein Pfad**, nicht zwei. Kein „Server-Dump vs. Client-Dump" mit + abweichenden Features. +- **Snapshot-basiert** (pro Tabelle eine `.jsonl` mit dem aktuellen + Row-Stand), nicht sync-event-replay. Kleiner, lesbarer, importierbar + in beliebige andere Tools. +- **Plaintext als Default**. GDPR-Art.-20-Datenübertragbarkeit ist ein + Feature, kein Edge-Case. Der User muss seine eigenen Daten lesen + können ohne Mana zu installieren. +- **Optional passphrase-wrapped** für Transport (z. B. Cloud-Ablage). + Nutzt Web-Crypto-Standard-Primitive — unabhängig vom Mana-Vault. +- **Modul-selektiv**. User wählt Module-Checkboxen, Export enthält + nur deren Tabellen. +- **Cross-Account-migrationsfähig**. Plaintext-Datei aus Account A + → Import in Account B → B's Vault-Key verschlüsselt das beim + `bulkPut` automatisch. + +## Abgrenzung + +- **Kein Event-Replay-Format mehr**. Feld-Level-LWW-Timestamps, Actor- + Attribution, causedBy-Chains gehen im Snapshot verloren. Für Backup- + Zwecke egal — der User will seinen aktuellen State, nicht die + Historie. Wenn debug-fähige Event-Dumps jemals gebraucht werden, + kriegen die einen eigenen CLI-Pfad (nicht User-facing). +- **Kein Foreign-Format-Native-Export** (Pocket-CSV, OPML, ICS). Die + sind Adapter die `.mana-v2` transformieren — nicht dritte Pfade im + Core-Code. Bauen wir nur wenn konkreter Bedarf entsteht. +- **Kein Inkrementelles Backup**. Erster Wurf ist „voller Snapshot pro + Export". Delta-Backups kommen falls jemand GB-Datasets fährt — heute + irrelevant. +- **Keine Key-Transfer-Semantik für Zero-Knowledge-Cross-Account**. + Wenn Account A im ZK-Mode läuft und der User nach B wechselt, muss + er während des Exports den Vault entsperren — dann klappt's wie bei + regulären Usern. + +## Entscheidungen vorab + +- **Format-Versionsbruch**. Die alten `.mana-v1`-Dateien (Event-Stream + aus mana-sync) sind nicht migrierbar nach v2 — unterschiedliche + Semantik. Da keine Produktionsdaten existieren, löschen wir v1 + komplett (Code + Endpoint). +- **Client-seitig vollständig**. Kein HTTP-Roundtrip fürs Export. + Funktioniert offline, überlebt mana-sync-Ausfälle, braucht keine + Server-Rolle. +- **Zip-Container + jsonl-Dateien**, pro Tabelle eine Datei. Gleiche + Technik wie v1 (`pako` für Deflate ist schon im Repo), aber mit + neuem Inhalts-Schema. +- **Passphrase-Crypto** ist **nicht** das Per-Feld-AES-GCM aus dem + Vault. Stattdessen: PBKDF2-SHA-256 (600k Iterations, OWASP 2023- + Empfehlung) für KDF + AES-GCM-256 als AEAD. Web Crypto native, kein + argon2-Dep. Die Entscheidung gegen Argon2id: eine einzelne 32-KB- + Entschlüsselung bei 600k PBKDF2 cost ~200 ms — ausreichend schwer + gegen offline-Brute-force, gut handhabbar als UX. Wenn wir später + Argon2id wollen, ist das ein additives Field-Update im Manifest. +- **Per-Field-Decrypt nutzt den existierenden `decryptRecord()`-Pfad** + aus `crypto/record-helpers.ts`. Keine Duplikat-Logik für Export. +- **Per-Field-Re-Encrypt beim Import nutzt `encryptRecord()`**. + `ENCRYPTION_REGISTRY` bestimmt was verschlüsselt wird — wenn sich + die Allowlist später ändert, reagiert der Import mit. +- **Schema-Version im Manifest** — sobald das Row-Shape einer Tabelle + sich ändert (Dexie-Version-Bump mit Migration), bekommt der + Exporter das via Manifest-Schema-Version. Import prüft und refused + ältere Schemas mit klarer Fehlermeldung statt still zu korrumpieren. + +## Format: `.mana` v2 + +``` +archive.mana (zip container, DEFLATE, no password protection on zip level) +├── manifest.json +├── data/ +│ ├── articles.jsonl Eine Zeile pro Row (JSON object) +│ ├── articleHighlights.jsonl +│ ├── articleTags.jsonl +│ ├── globalTags.jsonl +│ ├── tagGroups.jsonl +│ ├── notes.jsonl +│ ├── … Je nach scope + MODULE_CONFIGS +└── README.md Menschenlesbar, erklärt Inhalt +``` + +### `manifest.json` + +```typescript +interface BackupManifestV2 { + /** Hardcoded `2`. Bump on breaking changes only. */ + formatVersion: 2; + + /** Mana app schema version at export time — derived from Dexie version. */ + schemaVersion: number; + + /** Who generated this. Informational, not verified. */ + producedBy: string; // z.B. "mana-web/1.2.3" + + /** ISO timestamp of export. */ + exportedAt: string; + + /** userId at export time. Informational; importer does NOT refuse cross-account. */ + userId: string; + + /** Scope declaration. */ + scope: + | { type: 'full' } + | { type: 'filtered'; appIds: string[] }; + + /** Row-Count pro Tabelle (für UI-Progress + quick-validate). */ + rowCounts: Record; + + /** Plaintext der encrypted fields im JSON (true) oder re-exportiert mit */ + /** dem Mana-Vault-Key (false)? Default true. false wäre absurd — der */ + /** Export-Receiver hätte keinen Vault. Behalten als Flag damit Zukunfts-*/ + /** Clients mit z.B. Vault-Sync denselben Parser wiederverwenden können. */ + fieldsPlaintext: boolean; + + /** Wrap-Info wenn passphrase-protected, sonst `undefined`. */ + passphrase?: { + kdf: 'PBKDF2-SHA256'; + kdfIterations: number; // 600_000 + kdfSaltBase64: string; // 16 bytes random + cipher: 'AES-GCM-256'; + ivBase64: string; // 12 bytes random + /** SHA256 der plaintext `data/`-Konkatenation, hex. Post-unwrap-integritätscheck. */ + dataSha256: string; + }; +} +``` + +### Row-Schema + +Pro Tabelle wird `LocalXxx`-TypeScript-Shape serialisiert. Beispiel `articles.jsonl`: + +```json +{"id":"…","originalUrl":"https://…","title":"…","content":"…","status":"unread","savedAt":"…",…} +{"id":"…","originalUrl":"…","title":"…",…} +``` + +Felder die im ENCRYPTION_REGISTRY stehen **und** in der Quelldatei +verschlüsselt waren, werden beim Export entschlüsselt → plaintext in +der jsonl. + +## Export-Pipeline + +``` +Client: + 1. User-Input: appIds[] + optional passphrase + 2. Sammel-Schleife: + for appId in selected: + for table in MODULE_CONFIGS[appId].tables: + rows = scopedForModule(...).toArray() + decrypted = decryptRecords(table, rows) + jsonl += decrypted.map(toSerializable).join('\n') + 3. Manifest bauen (rowCounts, exportedAt, userId, scope, schemaVersion) + 4. Zip-Struktur schnüren (manifest.json + data/*.jsonl + README.md) + 5. Wenn passphrase: + - data/ in-memory konkatenieren (dataBytes) + - sha256 = hash(dataBytes) + - kdfSalt = random(16), iv = random(12) + - wrappedKey = PBKDF2(passphrase, salt, 600k, 32B) + - ciphertext = AES-GCM-encrypt(wrappedKey, iv, dataBytes) + - Manifest.passphrase = { …salt, …iv, dataSha256 } + - Zip enthält `data.enc` statt `data/`-Ordner + - Ciphertext-Prüfsumme (AEAD-Tag) ist implizit + 6. Return Blob → Browser-Download +``` + +## Import-Pipeline + +``` +Client: + 1. User-Input: File + optional passphrase-prompt + 2. parseBackupV2(file) → { manifest, data or sealedData } + 3. Manifest-Validierung: + - formatVersion === 2 + - schemaVersion kompatibel (max 2 Versions Rückstand) + - scope-Struktur valide + 4. Wenn passphrase: + - User-prompt für Passphrase + - KDF: PBKDF2(passphrase, salt, iterations, 32B) + - Decrypt AES-GCM → dataBytes + - sha256(dataBytes) === manifest.passphrase.dataSha256 ? sonst FAIL + 5. Pro jsonl-Datei in data/: + - Parse Zeilen zu Row-Objekten + - Field-by-field: wenn Feldname in ENCRYPTION_REGISTRY[table].fields + → encryptRecord(row) mit aktuellem Master-Key + - bulkPut(table, rows) in Dexie + - Dexie-Creating-Hook stempelt userId, timestamps, tracks pending_changes + → Sync zum Server läuft automatisch an + 6. Progress-Callback pro Tabelle + 7. Done +``` + +## File-Struktur + +``` +apps/mana/apps/web/src/lib/data/backup/ +├── v2/ +│ ├── format.ts Types + Zip-Reader/Writer + sha256 +│ ├── passphrase.ts PBKDF2-KDF + AES-GCM-AEAD wrap/unwrap +│ ├── schema.ts Pro-Tabelle-Row-Serialisation (toJson/fromJson) +│ ├── export.ts buildClientBackup({ appIds, passphrase }) +│ ├── import.ts applyClientBackup(file, { passphrase }) +│ └── format.test.ts Round-trip-Tests (encrypted + plaintext) +└── (v1/ wird gelöscht) +``` + +**Kein shared-Parser mit v1**. v1 ist Event-Stream, v2 ist Row-Snapshot +— unterschiedliche Semantik. Besser komplett separat halten. + +## UI + +Settings → My Data → **„Export & Import"** Panel (ersetzt bisherige +„Backup"-Sektion): + +``` +┌────────────────────────────────────────────────────┐ +│ Export & Import │ +│ │ +│ Lade deine Mana-Daten als portable .mana-Datei herunter.│ +│ │ +│ Module wählen │ +│ [✓] Alles │ +│ ─── oder einzeln ─── │ +│ [ ] Artikel [ ] Notizen [ ] Kalender … │ +│ │ +│ [○] Mit Passphrase verschlüsseln │ +│ ┌──────────────────────┐ │ +│ │ Passphrase │ │ +│ └──────────────────────┘ │ +│ ┌──────────────────────┐ │ +│ │ Bestätigen │ │ +│ └──────────────────────┘ │ +│ │ +│ [ Exportieren ] │ +│ │ +│ ───────────────────────── │ +│ │ +│ Import: .mana-Datei wählen [ Datei wählen ] │ +│ │ +└────────────────────────────────────────────────────┘ +``` + +Import-Ablauf: +1. File-Picker — akzeptiert nur `*.mana` +2. Parser liest Manifest +3. Wenn `passphrase` gesetzt → Modal prompts user +4. Progress-Bar mit Tabelle-für-Tabelle-Updates +5. Success-Toast mit Summary („142 Artikel, 48 Highlights, 23 Tags + importiert") + +## Milestones + +1. **M1 — Format + Crypto-Primitive** + - `v2/format.ts`: Manifest-Types, Zip read/write (re-use v1's pako- + basierten Zip-Code, aber eigene Manifest-Struktur), sha256-Helper + - `v2/passphrase.ts`: PBKDF2-KDF + AES-GCM wrap/unwrap, 100% Web- + Crypto, keine neuen Deps + - `v2/schema.ts`: serialize/deserialize-Helpers pro bekannter Tabelle + - Unit-Tests für Passphrase-Round-Trip + Zip-Round-Trip +2. **M2 — Export-Builder** + - `v2/export.ts`: `buildClientBackup({ appIds?, passphrase? }): Promise` + - Iteriert `MODULE_CONFIGS`, nutzt `decryptRecords()`, schreibt jsonl + - Manifest baut `rowCounts` live +3. **M3 — Import-Pipeline** + - `v2/import.ts`: `applyClientBackup(file: Blob, opts): Promise` + - Re-encrypt via `encryptRecord()`, `bulkPut` in Dexie + - Progress-Callback, strukturierte Fehler +4. **M4 — UI** + - `MyDataSection.svelte` — alte Backup-Buttons raus, neue Export-&-Import-Karte rein + - Modul-Multi-Select, Passphrase-Toggle, Progress-Bar, File-Picker +5. **M5 — Legacy-Cleanup** + - `services/mana-sync/` — `/backup/export` Go-Handler raus + - `apps/mana/apps/web/src/lib/api/services/backup.ts` — raus + - `lib/data/backup/format.ts`, `import.ts`, `format.test.ts` — raus + - Tests + Docs durchkämmen, alte Referenzen purgen + +## Offene Fragen + +- **Schema-Version-Kompat-Policy**: Einseitig rückwärts (neuer Import + liest ältere Exports) ist nötig. Frage: ab wann muss der Import + hart fehlen? Vorschlag: `schemaVersion < (currentSchema - 2)` → + Fehler mit Upgrade-Hinweis. In zwei Versionen kann genug Migration + nötig sein dass Auto-Migration riskant wird. +- **Passphrase-Stärke-Indikator**: Frontend-seitig zxcvbn-ish-Hinweis + oder minimum-length? Pragmatisch: min 12 Zeichen, keine weitere + Validierung — User ist Erwachsen. +- **Conflict-Handling beim Import**: Wenn ein Row mit derselben `id` + schon existiert — überschreiben oder skip? Vorschlag: **überschreiben** + (simpler, passt zu LWW-Semantik). UI könnte „Dry-Run mit Diff" als + Phase-2-Feature kriegen. +- **Binäre Daten** (uploaded files, images): Phase 1 exportiert nur + Metadaten. Blob-Bodies leben in MinIO/Storage, nicht in Dexie. Wenn + Binary-Export kommt, wird der Manifest-Eintrag `binaryAssets: []` + ergänzt und die Files in `blobs/`-Unterordner gepackt. +- **Memory**: bei sehr großen Datensätzen streamed man idealerweise. + Erste Iteration: alles in-memory bauen. Reicht für realistische + Haushaltsgrößen (10k Artikel + Highlights + Tags ≈ 20 MB JSON). + Streaming kommt wenn's wirklich nötig wird. diff --git a/services/mana-sync/CLAUDE.md b/services/mana-sync/CLAUDE.md index bd26d4479..ffea51787 100644 --- a/services/mana-sync/CLAUDE.md +++ b/services/mana-sync/CLAUDE.md @@ -132,30 +132,19 @@ Result: title="Buy eggs", completed=true (merged — different fields) | `GET /sync/{appId}/stream` | GET | JWT + Billing | SSE stream for real-time changes | | `GET /ws` | WS | JWT (in-band) | Unified real-time sync (all apps, one connection) | | `GET /ws/{appId}` | WS | JWT (in-band) | Legacy per-app sync notifications | -| `GET /backup/export` | GET | JWT only | **GDPR-grade full-account export** as `.mana` zip (see below) | | `GET /health` | GET | No | Health check with connection stats | | `GET /metrics` | GET | No | Prometheus metrics | -**Billing gate**: Push, pull, and stream endpoints are wrapped by a billing middleware that checks the user's sync subscription status via `mana-credits`. Returns **402 Payment Required** if sync is not active. Status is cached for 5 minutes per user. Fail-open: if mana-credits is unreachable, sync is allowed. **`/backup/export` is intentionally outside the billing gate** — GDPR data-portability must always be available. +**Billing gate**: Push, pull, and stream endpoints are wrapped by a billing middleware that checks the user's sync subscription status via `mana-credits`. Returns **402 Payment Required** if sync is not active. Status is cached for 5 minutes per user. Fail-open: if mana-credits is unreachable, sync is allowed. -## Backup / Restore +## Data Export / Import -`GET /backup/export` streams a `.mana` archive (zip) with the user's full `sync_changes` log. Format: +Data export is **not** a mana-sync responsibility anymore (since 2026-04-22). The previous `GET /backup/export` server-side event-stream export was removed in favour of a fully client-driven snapshot export: the webapp reads its local Dexie store, decrypts per-field, optionally passphrase-seals, and downloads a `.mana` archive. See `apps/mana/apps/web/src/lib/data/backup/v2/` and `docs/plans/data-export-v2.md` for the format + pipeline. -``` -mana-backup-{userId}-{YYYYMMDD-HHMMSS}.mana (application/zip) -├── events.jsonl — one SyncChange per line (chronological) -└── manifest.json — formatVersion, schemaVersion, userId, eventCount, - eventsSha256, apps[], createdAt, schemaVersionMin/Max -``` - -The zip is built in a single DB pass: `events.jsonl` is written via `io.MultiWriter(entry, sha256)` so the manifest's `eventsSha256` can be filled without a second scan. The client (web) parses the zip with a hand-rolled reader against `pako` deflate, validates `userId` match + sha256, then replays events through `applyServerChanges()` in 300-event batches per `appId`. - -Ciphertext (27 encrypted tables, client-side AES-GCM) passes through untouched — the archive is effectively encrypted at rest for sensitive fields. - -**Protocol stability (v1, pre-launch):** Once this ships, these event fields are append-only: `eventId`, `schemaVersion`, `op`, `fields` (LWW-canonical) / `data` (insert-snapshot). Tombstones stay in `sync_changes` forever so exports remain complete. - -**Split**: pure logic lives in `internal/backup/writer.go::WriteBackup(w, userID, createdAt, iter)`. The HTTP handler (`handler.go`) is a thin shim; tests use a slice-backed iterator so they run without Postgres. See `writer_test.go` (4 cases) + `apps/mana/apps/web/src/lib/data/backup/format.test.ts` (8 cases). +Rationale for the move: +- Zero-knowledge users hold their vault key client-side only — a server-side exporter cannot produce plaintext archives for them. +- GDPR data-portability is better served by plaintext-by-default (Art. 20) than by ciphertext blobs only decryptable with an active Mana install. +- Module-selective export is intrinsically a client concern — the server has no business knowing which subset of a user's data the user wants to hand out. ## Database Schema diff --git a/services/mana-sync/cmd/server/main.go b/services/mana-sync/cmd/server/main.go index c39a65376..96ff021b7 100644 --- a/services/mana-sync/cmd/server/main.go +++ b/services/mana-sync/cmd/server/main.go @@ -13,7 +13,6 @@ import ( "time" "github.com/mana/mana-sync/internal/auth" - "github.com/mana/mana-sync/internal/backup" "github.com/mana/mana-sync/internal/billing" "github.com/mana/mana-sync/internal/config" "github.com/mana/mana-sync/internal/memberships" @@ -73,11 +72,12 @@ func main() { mux.Handle("GET /sync/{appId}/pull", billingMiddleware(http.HandlerFunc(handler.HandlePull))) mux.Handle("GET /sync/{appId}/stream", billingMiddleware(http.HandlerFunc(handler.HandleStream))) - // Backup/export — GDPR-grade, auth-only (no billing gate so users can - // always retrieve their data). M1 thin slice: streams raw sync_changes - // as JSONL. Manifest + zip container land in M3. - backupHandler := backup.NewHandler(db, validator) - mux.Handle("GET /backup/export", http.HandlerFunc(backupHandler.HandleExport)) + // Backup/export — removed 2026-04-22 (data-export-v2 rollout). + // Data export is now fully client-driven (apps/mana/apps/web/src/lib/ + // data/backup/v2/): client reads local Dexie, decrypts per-field, + // optionally passphrase-seals, downloads. Server would need the user's + // vault key to produce plaintext exports — which is a key it + // deliberately never sees. // WebSocket endpoints // Unified: one connection per user, receives all app notifications with appId in payload diff --git a/services/mana-sync/internal/backup/handler.go b/services/mana-sync/internal/backup/handler.go deleted file mode 100644 index 55f1e5124..000000000 --- a/services/mana-sync/internal/backup/handler.go +++ /dev/null @@ -1,128 +0,0 @@ -// Package backup implements the user-data backup endpoint. -// -// Streams a .mana archive (zip container) to the authenticated user containing: -// -// events.jsonl — one SyncChange per line, chronological -// manifest.json — header with userId, counts, integrity hash, format version -// -// Design notes: -// -// - The zip is built in a single DB pass. events.jsonl is written first -// while the body is teed through a sha256 hasher; manifest.json lands as -// a second zip entry after the stream closes, so the manifest can embed -// the final eventsSha256 without a second scan. -// -// - Ciphertext passes through untouched: fields encrypted by the client- -// side registry remain AES-GCM ciphertext, so the archive is effectively -// encrypted at rest for sensitive fields. Plaintext fields (IDs, sort -// keys, timestamps) are visible in the archive — this matches the GDPR -// data-portability expectation. -// -// - The route is wired outside billingMiddleware in main.go so users can -// always retrieve their data regardless of subscription status. -// -// - Signature over manifest.json is deferred to phase 2; the eventsSha256 -// already catches accidental corruption during download/storage. -package backup - -import ( - "context" - "fmt" - "log/slog" - "net/http" - "time" - - "github.com/mana/mana-sync/internal/auth" - "github.com/mana/mana-sync/internal/store" -) - -// BackupFormatVersion is the container-format version (manifest.formatVersion). -// Distinct from syncproto.CurrentSchemaVersion — the container can change -// (signature added, different body encoding) without bumping every event. -const BackupFormatVersion = 1 - -// Handler serves GET /backup/export. -type Handler struct { - store *store.Store - validator *auth.Validator -} - -// NewHandler constructs a backup handler. -func NewHandler(s *store.Store, v *auth.Validator) *Handler { - return &Handler{store: s, validator: v} -} - -// exportLine is the on-wire shape of one row inside events.jsonl. Shared -// with writer.go so both the HTTP path and the writer tests serialize -// identically. -type exportLine struct { - EventID string `json:"eventId"` - SchemaVersion int `json:"schemaVersion"` - AppID string `json:"appId"` - Table string `json:"table"` - RecordID string `json:"id"` - Op string `json:"op"` - Data map[string]any `json:"data,omitempty"` - FieldTimestamps map[string]string `json:"fieldTimestamps,omitempty"` - ClientID string `json:"clientId"` - CreatedAt string `json:"createdAt"` -} - -// manifestFile is the header object serialized as manifest.json. -type manifestFile struct { - FormatVersion int `json:"formatVersion"` - SchemaVersion int `json:"schemaVersion"` - UserID string `json:"userId"` - CreatedAt string `json:"createdAt"` - EventCount int `json:"eventCount"` - EventsSHA256 string `json:"eventsSha256"` - Apps []string `json:"apps"` - ProducedBy string `json:"producedBy"` - SchemaVersionMin int `json:"schemaVersionMin,omitempty"` - SchemaVersionMax int `json:"schemaVersionMax,omitempty"` -} - -// HandleExport is an HTTP shim over WriteBackup: it authenticates, sets -// download headers, and hands the response writer plus a store-backed -// iterator to the shared writer. Tests talk to WriteBackup directly with -// a synthetic iterator. -func (h *Handler) HandleExport(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodGet { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - userID, err := h.validator.UserIDFromRequest(r) - if err != nil { - http.Error(w, "unauthorized: "+err.Error(), http.StatusUnauthorized) - return - } - - createdAt := time.Now().UTC() - filename := fmt.Sprintf("mana-backup-%s-%s.mana", userID, createdAt.Format("20060102-150405")) - - w.Header().Set("Content-Type", "application/zip") - w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, filename)) - w.Header().Set("X-Content-Type-Options", "nosniff") - w.Header().Set("X-Accel-Buffering", "no") - w.Header().Set("Cache-Control", "no-store") - - iter := storeIterator(r.Context(), h.store, userID) - if err := WriteBackup(w, userID, createdAt, iter); err != nil { - // Headers are flushed so we cannot downgrade to a 500 here; closing - // the zip partial is the best we can do. The missing manifest is - // itself a signal to the importer that the export was truncated. - slog.Error("backup: write failed", "user_id", userID, "error", err) - return - } - - slog.Info("backup export ok", "user_id", userID) -} - -// storeIterator adapts store.Store.StreamAllUserChanges to the RowIterator -// shape WriteBackup expects, holding the request context in the closure. -func storeIterator(ctx context.Context, s *store.Store, userID string) RowIterator { - return func(fn func(store.ChangeRow) error) error { - return s.StreamAllUserChanges(ctx, userID, fn) - } -} diff --git a/services/mana-sync/internal/backup/writer.go b/services/mana-sync/internal/backup/writer.go deleted file mode 100644 index e4eeeb6d9..000000000 --- a/services/mana-sync/internal/backup/writer.go +++ /dev/null @@ -1,133 +0,0 @@ -package backup - -import ( - "archive/zip" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "fmt" - "io" - "sort" - "time" - - syncproto "github.com/mana/mana-sync/internal/sync" - "github.com/mana/mana-sync/internal/store" -) - -// RowIterator yields every sync_changes row that belongs in a backup, -// invoking fn for each. The HTTP handler wires this to -// store.StreamAllUserChanges; tests wire it to an in-memory slice so the -// zip writer can be exercised without Postgres. -type RowIterator func(fn func(store.ChangeRow) error) error - -// WriteBackup serializes the user's sync_changes as a .mana zip archive -// into dst. This is the integration point with io.Writer so both the HTTP -// streaming path and tests share the same byte-for-byte production code. -// -// Single pass: events.jsonl is written first while sha256 tees through the -// encoder; manifest.json lands as a second zip entry with the final hash. -// -// The function returns after closing the zip's central directory, so dst -// contains a fully valid archive by the time err == nil. -func WriteBackup(dst io.Writer, userID string, createdAt time.Time, iter RowIterator) error { - if userID == "" { - return fmt.Errorf("backup: empty userID") - } - - zw := zip.NewWriter(dst) - defer zw.Close() - - eventsWriter, err := zw.CreateHeader(&zip.FileHeader{ - Name: "events.jsonl", - Method: zip.Deflate, - Modified: createdAt, - }) - if err != nil { - return fmt.Errorf("backup: create events.jsonl entry: %w", err) - } - - hasher := sha256.New() - teed := io.MultiWriter(eventsWriter, hasher) - encoder := json.NewEncoder(teed) - - var ( - count int - appSet = make(map[string]struct{}) - minVer int - maxVer int - ) - - if err := iter(func(row store.ChangeRow) error { - sv := row.SchemaVersion - if sv <= 0 { - sv = 1 - } - if count == 0 { - minVer = sv - maxVer = sv - } else { - if sv < minVer { - minVer = sv - } - if sv > maxVer { - maxVer = sv - } - } - line := exportLine{ - EventID: row.ID, - SchemaVersion: sv, - AppID: row.AppID, - Table: row.TableName, - RecordID: row.RecordID, - Op: row.Op, - Data: row.Data, - FieldTimestamps: row.FieldTimestamps, - ClientID: row.ClientID, - CreatedAt: row.CreatedAt.UTC().Format(time.RFC3339Nano), - } - if err := encoder.Encode(line); err != nil { - return err - } - appSet[row.AppID] = struct{}{} - count++ - return nil - }); err != nil { - return fmt.Errorf("backup: iterate rows: %w", err) - } - - apps := make([]string, 0, len(appSet)) - for a := range appSet { - apps = append(apps, a) - } - sort.Strings(apps) - - manifest := manifestFile{ - FormatVersion: BackupFormatVersion, - SchemaVersion: syncproto.CurrentSchemaVersion, - UserID: userID, - CreatedAt: createdAt.UTC().Format(time.RFC3339Nano), - EventCount: count, - EventsSHA256: hex.EncodeToString(hasher.Sum(nil)), - Apps: apps, - ProducedBy: "mana-sync", - SchemaVersionMin: minVer, - SchemaVersionMax: maxVer, - } - manifestBytes, err := json.MarshalIndent(manifest, "", " ") - if err != nil { - return fmt.Errorf("backup: marshal manifest: %w", err) - } - manifestWriter, err := zw.CreateHeader(&zip.FileHeader{ - Name: "manifest.json", - Method: zip.Deflate, - Modified: createdAt, - }) - if err != nil { - return fmt.Errorf("backup: create manifest entry: %w", err) - } - if _, err := manifestWriter.Write(manifestBytes); err != nil { - return fmt.Errorf("backup: write manifest: %w", err) - } - - return zw.Close() -} diff --git a/services/mana-sync/internal/backup/writer_test.go b/services/mana-sync/internal/backup/writer_test.go deleted file mode 100644 index 5e19c5152..000000000 --- a/services/mana-sync/internal/backup/writer_test.go +++ /dev/null @@ -1,251 +0,0 @@ -package backup - -import ( - "archive/zip" - "bytes" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "io" - "strings" - "testing" - "time" - - "github.com/mana/mana-sync/internal/store" -) - -// rowsIterator returns a RowIterator that walks a fixed slice of rows. -// Used in place of the Postgres store so tests exercise the writer -// end-to-end without a live DB. -func rowsIterator(rows []store.ChangeRow) RowIterator { - return func(fn func(store.ChangeRow) error) error { - for _, r := range rows { - if err := fn(r); err != nil { - return err - } - } - return nil - } -} - -func sampleRows() []store.ChangeRow { - ts := func(s string) time.Time { - t, err := time.Parse(time.RFC3339Nano, s) - if err != nil { - panic(err) - } - return t - } - return []store.ChangeRow{ - { - ID: "evt-1", - AppID: "todo", - TableName: "tasks", - RecordID: "task-1", - Op: "insert", - Data: map[string]any{"title": "Buy milk"}, - ClientID: "client-a", - CreatedAt: ts("2026-04-14T10:00:00.000Z"), - SchemaVersion: 1, - }, - { - ID: "evt-2", - AppID: "todo", - TableName: "tasks", - RecordID: "task-1", - Op: "update", - Data: map[string]any{"completed": true}, - FieldTimestamps: map[string]string{"completed": "2026-04-14T10:05:00.000Z"}, - ClientID: "client-a", - CreatedAt: ts("2026-04-14T10:05:00.000Z"), - SchemaVersion: 1, - }, - { - ID: "evt-3", - AppID: "calendar", - TableName: "events", - RecordID: "evt-42", - Op: "insert", - Data: map[string]any{"title": "Meeting"}, - ClientID: "client-b", - CreatedAt: ts("2026-04-14T11:00:00.000Z"), - SchemaVersion: 1, - }, - } -} - -func TestWriteBackup_Roundtrip(t *testing.T) { - var buf bytes.Buffer - createdAt := time.Date(2026, 4, 14, 12, 0, 0, 0, time.UTC) - - if err := WriteBackup(&buf, "user-123", createdAt, rowsIterator(sampleRows())); err != nil { - t.Fatalf("WriteBackup: %v", err) - } - - // Archive must parse as a valid zip with exactly two entries. - zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) - if err != nil { - t.Fatalf("zip.NewReader: %v", err) - } - if len(zr.File) != 2 { - t.Fatalf("expected 2 entries, got %d", len(zr.File)) - } - - events := readZipEntry(t, zr, "events.jsonl") - manifestBytes := readZipEntry(t, zr, "manifest.json") - - // events.jsonl: three newline-separated JSON records in input order. - lines := strings.Split(strings.TrimRight(string(events), "\n"), "\n") - if len(lines) != 3 { - t.Fatalf("expected 3 events, got %d", len(lines)) - } - - // Event 1 is insert with data, no fieldTimestamps. - var e1 map[string]any - if err := json.Unmarshal([]byte(lines[0]), &e1); err != nil { - t.Fatalf("parse line 0: %v", err) - } - if e1["op"] != "insert" || e1["eventId"] != "evt-1" || e1["appId"] != "todo" { - t.Fatalf("event 0 unexpected: %#v", e1) - } - if _, ok := e1["fieldTimestamps"]; ok { - t.Fatalf("event 0 should omit fieldTimestamps (insert)") - } - - // Event 2 is update with fieldTimestamps surfaced. - var e2 map[string]any - if err := json.Unmarshal([]byte(lines[1]), &e2); err != nil { - t.Fatalf("parse line 1: %v", err) - } - ft, ok := e2["fieldTimestamps"].(map[string]any) - if !ok { - t.Fatalf("event 1 fieldTimestamps missing") - } - if ft["completed"] != "2026-04-14T10:05:00.000Z" { - t.Fatalf("event 1 fieldTimestamps wrong: %#v", ft) - } - - // Manifest: all declared fields match what we wrote. - var m manifestFile - if err := json.Unmarshal(manifestBytes, &m); err != nil { - t.Fatalf("parse manifest: %v", err) - } - if m.FormatVersion != BackupFormatVersion { - t.Fatalf("formatVersion=%d want %d", m.FormatVersion, BackupFormatVersion) - } - if m.UserID != "user-123" { - t.Fatalf("userId=%q want user-123", m.UserID) - } - if m.EventCount != 3 { - t.Fatalf("eventCount=%d want 3", m.EventCount) - } - if m.SchemaVersionMin != 1 || m.SchemaVersionMax != 1 { - t.Fatalf("schemaVersion range=[%d,%d] want [1,1]", m.SchemaVersionMin, m.SchemaVersionMax) - } - if len(m.Apps) != 2 || m.Apps[0] != "calendar" || m.Apps[1] != "todo" { - t.Fatalf("apps=%v want sorted [calendar todo]", m.Apps) - } - if m.ProducedBy != "mana-sync" { - t.Fatalf("producedBy=%q want mana-sync", m.ProducedBy) - } - - // eventsSha256 must match a fresh SHA of the decompressed events body. - h := sha256.New() - h.Write(events) - want := hex.EncodeToString(h.Sum(nil)) - if m.EventsSHA256 != want { - t.Fatalf("eventsSha256 mismatch: manifest=%s recomputed=%s", m.EventsSHA256, want) - } -} - -func TestWriteBackup_EmptyUser(t *testing.T) { - var buf bytes.Buffer - err := WriteBackup(&buf, "", time.Now(), rowsIterator(nil)) - if err == nil { - t.Fatal("expected error for empty userID") - } - if !strings.Contains(err.Error(), "empty userID") { - t.Fatalf("unexpected error: %v", err) - } -} - -func TestWriteBackup_NoRows(t *testing.T) { - var buf bytes.Buffer - createdAt := time.Date(2026, 4, 14, 12, 0, 0, 0, time.UTC) - - if err := WriteBackup(&buf, "user-x", createdAt, rowsIterator(nil)); err != nil { - t.Fatalf("WriteBackup: %v", err) - } - - zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) - if err != nil { - t.Fatalf("zip.NewReader: %v", err) - } - - events := readZipEntry(t, zr, "events.jsonl") - if len(events) != 0 { - t.Fatalf("expected empty events.jsonl, got %d bytes", len(events)) - } - - manifestBytes := readZipEntry(t, zr, "manifest.json") - var m manifestFile - if err := json.Unmarshal(manifestBytes, &m); err != nil { - t.Fatalf("parse manifest: %v", err) - } - if m.EventCount != 0 { - t.Fatalf("eventCount=%d want 0", m.EventCount) - } - if len(m.Apps) != 0 { - t.Fatalf("apps=%v want empty", m.Apps) - } - // Empty body still needs a valid sha. - if m.EventsSHA256 == "" { - t.Fatal("eventsSha256 empty even for zero-row export") - } -} - -func TestWriteBackup_DefaultsSchemaVersionZeroRowsToOne(t *testing.T) { - // Legacy rows stored before the schema_version column existed scan as - // 0. The writer must clamp them to 1 so the manifest's - // schemaVersionMin/Max never claims a nonexistent protocol version. - rows := []store.ChangeRow{{ - ID: "e1", AppID: "todo", TableName: "tasks", RecordID: "t1", - Op: "insert", Data: map[string]any{"x": 1}, ClientID: "c", - CreatedAt: time.Now(), SchemaVersion: 0, - }} - var buf bytes.Buffer - if err := WriteBackup(&buf, "u", time.Now(), rowsIterator(rows)); err != nil { - t.Fatalf("WriteBackup: %v", err) - } - zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) - if err != nil { - t.Fatalf("zip.NewReader: %v", err) - } - events := readZipEntry(t, zr, "events.jsonl") - if !strings.Contains(string(events), `"schemaVersion":1`) { - t.Fatalf("expected schemaVersion:1 in events body, got: %s", events) - } -} - -// readZipEntry reads the named entry out of a zip archive in full. Fails -// the test if the entry is missing or cannot be decompressed. -func readZipEntry(t *testing.T, zr *zip.Reader, name string) []byte { - t.Helper() - for _, f := range zr.File { - if f.Name != name { - continue - } - rc, err := f.Open() - if err != nil { - t.Fatalf("open %s: %v", name, err) - } - defer rc.Close() - body, err := io.ReadAll(rc) - if err != nil { - t.Fatalf("read %s: %v", name, err) - } - return body - } - t.Fatalf("entry %q not found in zip", name) - return nil -}