mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 22:01:09 +02:00
feat(backup): client-driven v2 snapshot export, drop server-side backup
Replaces the mana-sync event-stream export (GET /backup/export) with a
fully client-driven `.mana` v2 archive: webapp reads Dexie, decrypts
per-field, packages JSONL + manifest, optionally PBKDF2+AES-GCM seals
with a passphrase.
- New: backup/v2/{format,passphrase,export,import}.ts + format.test.ts
(10 tests: round-trip, sealed path, 3 failure modes incl. wrong-
passphrase vs. tamper distinction).
- UI: ExportImportPanel with module multi-select, optional passphrase,
progress + sealed-file detection — replaces the old backup flow in
Settings → MyData.
- Removes services/mana-sync/internal/backup/ and the corresponding
client helpers + v1 tests. No parallel paths, no legacy shim.
- Why client-driven: zero-knowledge users hold their vault key only
client-side, so a server exporter cannot produce plaintext archives;
GDPR Art. 20 portability is better served by plaintext-by-default.
- Cross-account restore works via re-encryption under the target
vault key (no MK transfer needed).
DATA_LAYER_AUDIT.md §8 rewritten to reflect the new architecture.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3a7bc7f1c3
commit
fd1ea47075
18 changed files with 2145 additions and 1530 deletions
|
|
@ -1,128 +0,0 @@
|
|||
// Package backup implements the user-data backup endpoint.
|
||||
//
|
||||
// Streams a .mana archive (zip container) to the authenticated user containing:
|
||||
//
|
||||
// events.jsonl — one SyncChange per line, chronological
|
||||
// manifest.json — header with userId, counts, integrity hash, format version
|
||||
//
|
||||
// Design notes:
|
||||
//
|
||||
// - The zip is built in a single DB pass. events.jsonl is written first
|
||||
// while the body is teed through a sha256 hasher; manifest.json lands as
|
||||
// a second zip entry after the stream closes, so the manifest can embed
|
||||
// the final eventsSha256 without a second scan.
|
||||
//
|
||||
// - Ciphertext passes through untouched: fields encrypted by the client-
|
||||
// side registry remain AES-GCM ciphertext, so the archive is effectively
|
||||
// encrypted at rest for sensitive fields. Plaintext fields (IDs, sort
|
||||
// keys, timestamps) are visible in the archive — this matches the GDPR
|
||||
// data-portability expectation.
|
||||
//
|
||||
// - The route is wired outside billingMiddleware in main.go so users can
|
||||
// always retrieve their data regardless of subscription status.
|
||||
//
|
||||
// - Signature over manifest.json is deferred to phase 2; the eventsSha256
|
||||
// already catches accidental corruption during download/storage.
|
||||
package backup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/mana/mana-sync/internal/auth"
|
||||
"github.com/mana/mana-sync/internal/store"
|
||||
)
|
||||
|
||||
// BackupFormatVersion is the container-format version (manifest.formatVersion).
|
||||
// Distinct from syncproto.CurrentSchemaVersion — the container can change
|
||||
// (signature added, different body encoding) without bumping every event.
|
||||
const BackupFormatVersion = 1
|
||||
|
||||
// Handler serves GET /backup/export.
|
||||
type Handler struct {
|
||||
store *store.Store
|
||||
validator *auth.Validator
|
||||
}
|
||||
|
||||
// NewHandler constructs a backup handler.
|
||||
func NewHandler(s *store.Store, v *auth.Validator) *Handler {
|
||||
return &Handler{store: s, validator: v}
|
||||
}
|
||||
|
||||
// exportLine is the on-wire shape of one row inside events.jsonl. Shared
|
||||
// with writer.go so both the HTTP path and the writer tests serialize
|
||||
// identically.
|
||||
type exportLine struct {
|
||||
EventID string `json:"eventId"`
|
||||
SchemaVersion int `json:"schemaVersion"`
|
||||
AppID string `json:"appId"`
|
||||
Table string `json:"table"`
|
||||
RecordID string `json:"id"`
|
||||
Op string `json:"op"`
|
||||
Data map[string]any `json:"data,omitempty"`
|
||||
FieldTimestamps map[string]string `json:"fieldTimestamps,omitempty"`
|
||||
ClientID string `json:"clientId"`
|
||||
CreatedAt string `json:"createdAt"`
|
||||
}
|
||||
|
||||
// manifestFile is the header object serialized as manifest.json.
|
||||
type manifestFile struct {
|
||||
FormatVersion int `json:"formatVersion"`
|
||||
SchemaVersion int `json:"schemaVersion"`
|
||||
UserID string `json:"userId"`
|
||||
CreatedAt string `json:"createdAt"`
|
||||
EventCount int `json:"eventCount"`
|
||||
EventsSHA256 string `json:"eventsSha256"`
|
||||
Apps []string `json:"apps"`
|
||||
ProducedBy string `json:"producedBy"`
|
||||
SchemaVersionMin int `json:"schemaVersionMin,omitempty"`
|
||||
SchemaVersionMax int `json:"schemaVersionMax,omitempty"`
|
||||
}
|
||||
|
||||
// HandleExport is an HTTP shim over WriteBackup: it authenticates, sets
|
||||
// download headers, and hands the response writer plus a store-backed
|
||||
// iterator to the shared writer. Tests talk to WriteBackup directly with
|
||||
// a synthetic iterator.
|
||||
func (h *Handler) HandleExport(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
userID, err := h.validator.UserIDFromRequest(r)
|
||||
if err != nil {
|
||||
http.Error(w, "unauthorized: "+err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
createdAt := time.Now().UTC()
|
||||
filename := fmt.Sprintf("mana-backup-%s-%s.mana", userID, createdAt.Format("20060102-150405"))
|
||||
|
||||
w.Header().Set("Content-Type", "application/zip")
|
||||
w.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, filename))
|
||||
w.Header().Set("X-Content-Type-Options", "nosniff")
|
||||
w.Header().Set("X-Accel-Buffering", "no")
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
|
||||
iter := storeIterator(r.Context(), h.store, userID)
|
||||
if err := WriteBackup(w, userID, createdAt, iter); err != nil {
|
||||
// Headers are flushed so we cannot downgrade to a 500 here; closing
|
||||
// the zip partial is the best we can do. The missing manifest is
|
||||
// itself a signal to the importer that the export was truncated.
|
||||
slog.Error("backup: write failed", "user_id", userID, "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("backup export ok", "user_id", userID)
|
||||
}
|
||||
|
||||
// storeIterator adapts store.Store.StreamAllUserChanges to the RowIterator
|
||||
// shape WriteBackup expects, holding the request context in the closure.
|
||||
func storeIterator(ctx context.Context, s *store.Store, userID string) RowIterator {
|
||||
return func(fn func(store.ChangeRow) error) error {
|
||||
return s.StreamAllUserChanges(ctx, userID, fn)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
package backup
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
syncproto "github.com/mana/mana-sync/internal/sync"
|
||||
"github.com/mana/mana-sync/internal/store"
|
||||
)
|
||||
|
||||
// RowIterator yields every sync_changes row that belongs in a backup,
|
||||
// invoking fn for each. The HTTP handler wires this to
|
||||
// store.StreamAllUserChanges; tests wire it to an in-memory slice so the
|
||||
// zip writer can be exercised without Postgres.
|
||||
type RowIterator func(fn func(store.ChangeRow) error) error
|
||||
|
||||
// WriteBackup serializes the user's sync_changes as a .mana zip archive
|
||||
// into dst. This is the integration point with io.Writer so both the HTTP
|
||||
// streaming path and tests share the same byte-for-byte production code.
|
||||
//
|
||||
// Single pass: events.jsonl is written first while sha256 tees through the
|
||||
// encoder; manifest.json lands as a second zip entry with the final hash.
|
||||
//
|
||||
// The function returns after closing the zip's central directory, so dst
|
||||
// contains a fully valid archive by the time err == nil.
|
||||
func WriteBackup(dst io.Writer, userID string, createdAt time.Time, iter RowIterator) error {
|
||||
if userID == "" {
|
||||
return fmt.Errorf("backup: empty userID")
|
||||
}
|
||||
|
||||
zw := zip.NewWriter(dst)
|
||||
defer zw.Close()
|
||||
|
||||
eventsWriter, err := zw.CreateHeader(&zip.FileHeader{
|
||||
Name: "events.jsonl",
|
||||
Method: zip.Deflate,
|
||||
Modified: createdAt,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("backup: create events.jsonl entry: %w", err)
|
||||
}
|
||||
|
||||
hasher := sha256.New()
|
||||
teed := io.MultiWriter(eventsWriter, hasher)
|
||||
encoder := json.NewEncoder(teed)
|
||||
|
||||
var (
|
||||
count int
|
||||
appSet = make(map[string]struct{})
|
||||
minVer int
|
||||
maxVer int
|
||||
)
|
||||
|
||||
if err := iter(func(row store.ChangeRow) error {
|
||||
sv := row.SchemaVersion
|
||||
if sv <= 0 {
|
||||
sv = 1
|
||||
}
|
||||
if count == 0 {
|
||||
minVer = sv
|
||||
maxVer = sv
|
||||
} else {
|
||||
if sv < minVer {
|
||||
minVer = sv
|
||||
}
|
||||
if sv > maxVer {
|
||||
maxVer = sv
|
||||
}
|
||||
}
|
||||
line := exportLine{
|
||||
EventID: row.ID,
|
||||
SchemaVersion: sv,
|
||||
AppID: row.AppID,
|
||||
Table: row.TableName,
|
||||
RecordID: row.RecordID,
|
||||
Op: row.Op,
|
||||
Data: row.Data,
|
||||
FieldTimestamps: row.FieldTimestamps,
|
||||
ClientID: row.ClientID,
|
||||
CreatedAt: row.CreatedAt.UTC().Format(time.RFC3339Nano),
|
||||
}
|
||||
if err := encoder.Encode(line); err != nil {
|
||||
return err
|
||||
}
|
||||
appSet[row.AppID] = struct{}{}
|
||||
count++
|
||||
return nil
|
||||
}); err != nil {
|
||||
return fmt.Errorf("backup: iterate rows: %w", err)
|
||||
}
|
||||
|
||||
apps := make([]string, 0, len(appSet))
|
||||
for a := range appSet {
|
||||
apps = append(apps, a)
|
||||
}
|
||||
sort.Strings(apps)
|
||||
|
||||
manifest := manifestFile{
|
||||
FormatVersion: BackupFormatVersion,
|
||||
SchemaVersion: syncproto.CurrentSchemaVersion,
|
||||
UserID: userID,
|
||||
CreatedAt: createdAt.UTC().Format(time.RFC3339Nano),
|
||||
EventCount: count,
|
||||
EventsSHA256: hex.EncodeToString(hasher.Sum(nil)),
|
||||
Apps: apps,
|
||||
ProducedBy: "mana-sync",
|
||||
SchemaVersionMin: minVer,
|
||||
SchemaVersionMax: maxVer,
|
||||
}
|
||||
manifestBytes, err := json.MarshalIndent(manifest, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("backup: marshal manifest: %w", err)
|
||||
}
|
||||
manifestWriter, err := zw.CreateHeader(&zip.FileHeader{
|
||||
Name: "manifest.json",
|
||||
Method: zip.Deflate,
|
||||
Modified: createdAt,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("backup: create manifest entry: %w", err)
|
||||
}
|
||||
if _, err := manifestWriter.Write(manifestBytes); err != nil {
|
||||
return fmt.Errorf("backup: write manifest: %w", err)
|
||||
}
|
||||
|
||||
return zw.Close()
|
||||
}
|
||||
|
|
@ -1,251 +0,0 @@
|
|||
package backup
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/mana/mana-sync/internal/store"
|
||||
)
|
||||
|
||||
// rowsIterator returns a RowIterator that walks a fixed slice of rows.
|
||||
// Used in place of the Postgres store so tests exercise the writer
|
||||
// end-to-end without a live DB.
|
||||
func rowsIterator(rows []store.ChangeRow) RowIterator {
|
||||
return func(fn func(store.ChangeRow) error) error {
|
||||
for _, r := range rows {
|
||||
if err := fn(r); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func sampleRows() []store.ChangeRow {
|
||||
ts := func(s string) time.Time {
|
||||
t, err := time.Parse(time.RFC3339Nano, s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
return []store.ChangeRow{
|
||||
{
|
||||
ID: "evt-1",
|
||||
AppID: "todo",
|
||||
TableName: "tasks",
|
||||
RecordID: "task-1",
|
||||
Op: "insert",
|
||||
Data: map[string]any{"title": "Buy milk"},
|
||||
ClientID: "client-a",
|
||||
CreatedAt: ts("2026-04-14T10:00:00.000Z"),
|
||||
SchemaVersion: 1,
|
||||
},
|
||||
{
|
||||
ID: "evt-2",
|
||||
AppID: "todo",
|
||||
TableName: "tasks",
|
||||
RecordID: "task-1",
|
||||
Op: "update",
|
||||
Data: map[string]any{"completed": true},
|
||||
FieldTimestamps: map[string]string{"completed": "2026-04-14T10:05:00.000Z"},
|
||||
ClientID: "client-a",
|
||||
CreatedAt: ts("2026-04-14T10:05:00.000Z"),
|
||||
SchemaVersion: 1,
|
||||
},
|
||||
{
|
||||
ID: "evt-3",
|
||||
AppID: "calendar",
|
||||
TableName: "events",
|
||||
RecordID: "evt-42",
|
||||
Op: "insert",
|
||||
Data: map[string]any{"title": "Meeting"},
|
||||
ClientID: "client-b",
|
||||
CreatedAt: ts("2026-04-14T11:00:00.000Z"),
|
||||
SchemaVersion: 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteBackup_Roundtrip(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
createdAt := time.Date(2026, 4, 14, 12, 0, 0, 0, time.UTC)
|
||||
|
||||
if err := WriteBackup(&buf, "user-123", createdAt, rowsIterator(sampleRows())); err != nil {
|
||||
t.Fatalf("WriteBackup: %v", err)
|
||||
}
|
||||
|
||||
// Archive must parse as a valid zip with exactly two entries.
|
||||
zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
|
||||
if err != nil {
|
||||
t.Fatalf("zip.NewReader: %v", err)
|
||||
}
|
||||
if len(zr.File) != 2 {
|
||||
t.Fatalf("expected 2 entries, got %d", len(zr.File))
|
||||
}
|
||||
|
||||
events := readZipEntry(t, zr, "events.jsonl")
|
||||
manifestBytes := readZipEntry(t, zr, "manifest.json")
|
||||
|
||||
// events.jsonl: three newline-separated JSON records in input order.
|
||||
lines := strings.Split(strings.TrimRight(string(events), "\n"), "\n")
|
||||
if len(lines) != 3 {
|
||||
t.Fatalf("expected 3 events, got %d", len(lines))
|
||||
}
|
||||
|
||||
// Event 1 is insert with data, no fieldTimestamps.
|
||||
var e1 map[string]any
|
||||
if err := json.Unmarshal([]byte(lines[0]), &e1); err != nil {
|
||||
t.Fatalf("parse line 0: %v", err)
|
||||
}
|
||||
if e1["op"] != "insert" || e1["eventId"] != "evt-1" || e1["appId"] != "todo" {
|
||||
t.Fatalf("event 0 unexpected: %#v", e1)
|
||||
}
|
||||
if _, ok := e1["fieldTimestamps"]; ok {
|
||||
t.Fatalf("event 0 should omit fieldTimestamps (insert)")
|
||||
}
|
||||
|
||||
// Event 2 is update with fieldTimestamps surfaced.
|
||||
var e2 map[string]any
|
||||
if err := json.Unmarshal([]byte(lines[1]), &e2); err != nil {
|
||||
t.Fatalf("parse line 1: %v", err)
|
||||
}
|
||||
ft, ok := e2["fieldTimestamps"].(map[string]any)
|
||||
if !ok {
|
||||
t.Fatalf("event 1 fieldTimestamps missing")
|
||||
}
|
||||
if ft["completed"] != "2026-04-14T10:05:00.000Z" {
|
||||
t.Fatalf("event 1 fieldTimestamps wrong: %#v", ft)
|
||||
}
|
||||
|
||||
// Manifest: all declared fields match what we wrote.
|
||||
var m manifestFile
|
||||
if err := json.Unmarshal(manifestBytes, &m); err != nil {
|
||||
t.Fatalf("parse manifest: %v", err)
|
||||
}
|
||||
if m.FormatVersion != BackupFormatVersion {
|
||||
t.Fatalf("formatVersion=%d want %d", m.FormatVersion, BackupFormatVersion)
|
||||
}
|
||||
if m.UserID != "user-123" {
|
||||
t.Fatalf("userId=%q want user-123", m.UserID)
|
||||
}
|
||||
if m.EventCount != 3 {
|
||||
t.Fatalf("eventCount=%d want 3", m.EventCount)
|
||||
}
|
||||
if m.SchemaVersionMin != 1 || m.SchemaVersionMax != 1 {
|
||||
t.Fatalf("schemaVersion range=[%d,%d] want [1,1]", m.SchemaVersionMin, m.SchemaVersionMax)
|
||||
}
|
||||
if len(m.Apps) != 2 || m.Apps[0] != "calendar" || m.Apps[1] != "todo" {
|
||||
t.Fatalf("apps=%v want sorted [calendar todo]", m.Apps)
|
||||
}
|
||||
if m.ProducedBy != "mana-sync" {
|
||||
t.Fatalf("producedBy=%q want mana-sync", m.ProducedBy)
|
||||
}
|
||||
|
||||
// eventsSha256 must match a fresh SHA of the decompressed events body.
|
||||
h := sha256.New()
|
||||
h.Write(events)
|
||||
want := hex.EncodeToString(h.Sum(nil))
|
||||
if m.EventsSHA256 != want {
|
||||
t.Fatalf("eventsSha256 mismatch: manifest=%s recomputed=%s", m.EventsSHA256, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteBackup_EmptyUser(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
err := WriteBackup(&buf, "", time.Now(), rowsIterator(nil))
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty userID")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "empty userID") {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteBackup_NoRows(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
createdAt := time.Date(2026, 4, 14, 12, 0, 0, 0, time.UTC)
|
||||
|
||||
if err := WriteBackup(&buf, "user-x", createdAt, rowsIterator(nil)); err != nil {
|
||||
t.Fatalf("WriteBackup: %v", err)
|
||||
}
|
||||
|
||||
zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
|
||||
if err != nil {
|
||||
t.Fatalf("zip.NewReader: %v", err)
|
||||
}
|
||||
|
||||
events := readZipEntry(t, zr, "events.jsonl")
|
||||
if len(events) != 0 {
|
||||
t.Fatalf("expected empty events.jsonl, got %d bytes", len(events))
|
||||
}
|
||||
|
||||
manifestBytes := readZipEntry(t, zr, "manifest.json")
|
||||
var m manifestFile
|
||||
if err := json.Unmarshal(manifestBytes, &m); err != nil {
|
||||
t.Fatalf("parse manifest: %v", err)
|
||||
}
|
||||
if m.EventCount != 0 {
|
||||
t.Fatalf("eventCount=%d want 0", m.EventCount)
|
||||
}
|
||||
if len(m.Apps) != 0 {
|
||||
t.Fatalf("apps=%v want empty", m.Apps)
|
||||
}
|
||||
// Empty body still needs a valid sha.
|
||||
if m.EventsSHA256 == "" {
|
||||
t.Fatal("eventsSha256 empty even for zero-row export")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteBackup_DefaultsSchemaVersionZeroRowsToOne(t *testing.T) {
|
||||
// Legacy rows stored before the schema_version column existed scan as
|
||||
// 0. The writer must clamp them to 1 so the manifest's
|
||||
// schemaVersionMin/Max never claims a nonexistent protocol version.
|
||||
rows := []store.ChangeRow{{
|
||||
ID: "e1", AppID: "todo", TableName: "tasks", RecordID: "t1",
|
||||
Op: "insert", Data: map[string]any{"x": 1}, ClientID: "c",
|
||||
CreatedAt: time.Now(), SchemaVersion: 0,
|
||||
}}
|
||||
var buf bytes.Buffer
|
||||
if err := WriteBackup(&buf, "u", time.Now(), rowsIterator(rows)); err != nil {
|
||||
t.Fatalf("WriteBackup: %v", err)
|
||||
}
|
||||
zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
|
||||
if err != nil {
|
||||
t.Fatalf("zip.NewReader: %v", err)
|
||||
}
|
||||
events := readZipEntry(t, zr, "events.jsonl")
|
||||
if !strings.Contains(string(events), `"schemaVersion":1`) {
|
||||
t.Fatalf("expected schemaVersion:1 in events body, got: %s", events)
|
||||
}
|
||||
}
|
||||
|
||||
// readZipEntry reads the named entry out of a zip archive in full. Fails
|
||||
// the test if the entry is missing or cannot be decompressed.
|
||||
func readZipEntry(t *testing.T, zr *zip.Reader, name string) []byte {
|
||||
t.Helper()
|
||||
for _, f := range zr.File {
|
||||
if f.Name != name {
|
||||
continue
|
||||
}
|
||||
rc, err := f.Open()
|
||||
if err != nil {
|
||||
t.Fatalf("open %s: %v", name, err)
|
||||
}
|
||||
defer rc.Close()
|
||||
body, err := io.ReadAll(rc)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", name, err)
|
||||
}
|
||||
return body
|
||||
}
|
||||
t.Fatalf("entry %q not found in zip", name)
|
||||
return nil
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue