From e37c008a7aad541bfb380fc42dfcf1ee039b867e Mon Sep 17 00:00:00 2001 From: Till JS Date: Wed, 29 Apr 2026 02:42:46 +0200 Subject: [PATCH] =?UTF-8?q?chore(articles):=20polish=20pass=20=E2=80=94=20?= =?UTF-8?q?schema=20cleanup,=20MAX=20cap,=20filters,=20docs=20(#8,#9,#13,#?= =?UTF-8?q?15,#18,#20)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Polish-pass on top of the bulk-import rollout. Five contained items. #8 + #9 — Dexie v60 schema cleanup - Drop articleImportJobs.leasedBy + .leasedUntil. They were defined on the original v57 schema as a soft-lease handshake, but the worker uses pg_try_advisory_xact_lock and never wrote them. Local-* type + projection row stripped. - Drop the standalone `state` index on articleImportItems. [jobId+state] covers the worker's hot query; the state-solo index had no call site. Both changes lossless — Dexie just removes the column declarations from new rows; existing rows still carry the dead nulls (zombies) until the next full row-rewrite. Not worth a hard migration for two never-written columns. #15 — MAX_URLS_PER_JOB hard cap (200) articleImportsStore.createJob() throws if the URL list exceeds the cap. BulkImportForm surfaces the limit in the live counter chip and disables the submit when over. The worker can chew through any N, but at high counts the UI gets unwieldy (no virtualisation) and wall-clock duration climbs into multi-hour. 200 is a pragmatic ceiling — Pocket-export dumps average 50–150. #13 — Filter-Tabs in JobsList Pill-style tabs above the list: Alle / Aktiv / Fertig / Mit Fehlern, each with the row count. Disabled when the bucket is empty so the user only sees actionable filters. The "Mit Fehlern" filter (errorCount > 0) is the most valuable for triage. #18 — apps/mana/CLAUDE.md - Articles row added to the Tool Coverage table (5 propose + 1 auto, including the new auto-policy import_articles_from_urls). - New "Articles bulk-import" section after the AI Workbench part: pipeline diagram, table list, actor + metrics + cap pointers. #20 — ARTICLES_IMPORT_WORKER_DISABLED env var documented New row under "Mana API — Articles Bulk-Import Worker" in docs/ENVIRONMENT_VARIABLES.md. Plan: docs/plans/articles-bulk-import.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/modules/articles/import-projection.ts | 4 - apps/mana/CLAUDE.md | 31 +++++ apps/mana/apps/web/src/lib/data/database.ts | 28 +++++ .../articles/components/BulkImportForm.svelte | 23 +++- .../articles/components/JobsList.svelte | 118 +++++++++++++++++- .../web/src/lib/modules/articles/queries.ts | 2 - .../modules/articles/stores/imports.svelte.ts | 18 ++- .../web/src/lib/modules/articles/types.ts | 10 +- docs/ENVIRONMENT_VARIABLES.md | 6 + 9 files changed, 218 insertions(+), 22 deletions(-) diff --git a/apps/api/src/modules/articles/import-projection.ts b/apps/api/src/modules/articles/import-projection.ts index e832f83b0..51a005afa 100644 --- a/apps/api/src/modules/articles/import-projection.ts +++ b/apps/api/src/modules/articles/import-projection.ts @@ -39,8 +39,6 @@ export interface ImportJobRow { spaceId: string | null; totalUrls: number; status: 'queued' | 'running' | 'paused' | 'done' | 'cancelled'; - leasedBy: string | null; - leasedUntil: string | null; startedAt: string | null; finishedAt: string | null; savedCount: number; @@ -192,8 +190,6 @@ function projectJob(userId: string, recordId: string, merged: Row | null): Impor spaceId: optStr(merged.spaceId), totalUrls, status, - leasedBy: optStr(merged.leasedBy), - leasedUntil: optStr(merged.leasedUntil), startedAt: optStr(merged.startedAt), finishedAt: optStr(merged.finishedAt), savedCount: num(merged.savedCount) ?? 0, diff --git a/apps/mana/CLAUDE.md b/apps/mana/CLAUDE.md index 003ea5be5..c5804bdfb 100644 --- a/apps/mana/CLAUDE.md +++ b/apps/mana/CLAUDE.md @@ -275,6 +275,7 @@ Agents interact with the app through tools — each one either auto (executes si | food | — | `nutrition_summary`, `log_meal` | | news | `save_news_article` | — | | news-research | `research_news` | — | +| articles | `save_article`, `archive_article`, `tag_article`, `add_article_highlight`, `import_articles_from_urls` (auto) | `list_articles` | | journal | `create_journal_entry` | — | | habits | `create_habit`, `log_habit` | `get_habits` | | contacts | `create_contact` | `get_contacts` | @@ -304,6 +305,36 @@ Each template bundles: optional agent + optional scene layout + optional starter Full architecture (Planner prompt + parser in `@mana/shared-ai`, server-side runner, Postgres actor column, materialized snapshots, Multi-Agent gating, server-side web-research, Prometheus metrics + status.mana.how integration): [`docs/architecture/COMPANION_BRAIN_ARCHITECTURE.md`](../../docs/architecture/COMPANION_BRAIN_ARCHITECTURE.md) §20 (AI Workbench) + §21 (Mission Grants) + §22 (Multi-Agent Workbench). +## Articles bulk-import + +Background pipeline that ingests N URLs into a user's reading list as +one Job, with the same encryption + scope semantics as a single-URL +save. Same shape as the AI mission runner: state lives in +`sync_changes`, a server-side worker projects + writes back, the +client encrypts the final article. + +``` +client createJob(urls) + → bulkAdd articleImportItems(state='pending') + articleImportJobs(queued) + → sync push → mana_sync.sync_changes + → apps/api worker tick (every 2s, advisory-lock-gated) + → extractFromUrl (shared-rss / Readability) + → write articleExtractPickup row + flip item → 'extracted' + → sync pull → liveQuery + → consume-pickup encryptRecord + articleTable.add + → flip item → 'saved' (or 'duplicate' / 'consent-wall') + → delete pickup row + → server flips job → 'done', emits ArticleImportFinished +``` + +Tables: `articleImportJobs`, `articleImportItems`, `articleExtractPickup` +(all plaintext-allowlisted — see `data/crypto/plaintext-allowlist.ts`). +Actor on every server-write: `system:articles-import-worker`. Worker +metrics under `mana_api_articles_import_*`. Hard cap of 200 URLs per +job (`MAX_URLS_PER_JOB` in `modules/articles/stores/imports.svelte`). + +Plan: [`docs/plans/articles-bulk-import.md`](../../docs/plans/articles-bulk-import.md). + ## Reference Documents | Path | Purpose | diff --git a/apps/mana/apps/web/src/lib/data/database.ts b/apps/mana/apps/web/src/lib/data/database.ts index 89fce5dea..95f89dae5 100644 --- a/apps/mana/apps/web/src/lib/data/database.ts +++ b/apps/mana/apps/web/src/lib/data/database.ts @@ -1465,6 +1465,34 @@ db.version(59).stores({ documentTags: null, }); +// v60 — Articles bulk-import schema cleanup. +// Two changes, both lossless: +// +// 1. articleImportJobs: drop the unused `leasedBy`/`leasedUntil` +// columns. They were on the original v57 schema as a soft-lease +// handshake, but the worker uses pg_try_advisory_xact_lock +// instead and never wrote them. Dexie's index list shrinks but +// no data is migrated — the columns simply disappear from +// future writes; existing rows still carry them as zombies (a +// one-shot row-rewrite to delete the field would be a hard- +// migration; not worth it for two never-written nulls). +// 2. articleImportItems: drop the standalone `state` index. +// `[jobId+state]` covers the only hot query (worker's per-job +// pending scan). The state-solo index had no call site — +// retryFailed uses [jobId+state]. Trimming the index list saves +// a bit of write amplification. +// +// Kept on the schema (not dropped here): `idx` standalone index on +// articleImportItems. It's also unused right now, but the +// JobDetailView currently sorts items in JS via .sort((a,b)=>a.idx-b.idx); +// if that view ever switches to a server-side ordered scan we'd want +// the index back, and re-adding indexes after the fact is more +// painful than keeping a small one around. +db.version(60).stores({ + articleImportJobs: 'id, status, [spaceId+status], _updatedAtIndex', + articleImportItems: 'id, jobId, [jobId+state], idx', +}); + // ─── Sync Routing ────────────────────────────────────────── // SYNC_APP_MAP, TABLE_TO_SYNC_NAME, TABLE_TO_APP, SYNC_NAME_TO_TABLE, // toSyncName() and fromSyncName() are now derived from per-module diff --git a/apps/mana/apps/web/src/lib/modules/articles/components/BulkImportForm.svelte b/apps/mana/apps/web/src/lib/modules/articles/components/BulkImportForm.svelte index ea7c72065..848b33d94 100644 --- a/apps/mana/apps/web/src/lib/modules/articles/components/BulkImportForm.svelte +++ b/apps/mana/apps/web/src/lib/modules/articles/components/BulkImportForm.svelte @@ -7,13 +7,14 @@ --> -{#if jobs.length > 0} +{#if allJobs.length > 0}
-

Bisherige Imports

+
+

Bisherige Imports

+ +
+ {#if visibleJobs.length === 0} +

Keine Jobs in dieser Ansicht.

+ {/if}
    - {#each jobs as job (job.id)} + {#each visibleJobs as job (job.id)}