diff --git a/apps/api/src/modules/articles/import-projection.ts b/apps/api/src/modules/articles/import-projection.ts index e832f83b0..51a005afa 100644 --- a/apps/api/src/modules/articles/import-projection.ts +++ b/apps/api/src/modules/articles/import-projection.ts @@ -39,8 +39,6 @@ export interface ImportJobRow { spaceId: string | null; totalUrls: number; status: 'queued' | 'running' | 'paused' | 'done' | 'cancelled'; - leasedBy: string | null; - leasedUntil: string | null; startedAt: string | null; finishedAt: string | null; savedCount: number; @@ -192,8 +190,6 @@ function projectJob(userId: string, recordId: string, merged: Row | null): Impor spaceId: optStr(merged.spaceId), totalUrls, status, - leasedBy: optStr(merged.leasedBy), - leasedUntil: optStr(merged.leasedUntil), startedAt: optStr(merged.startedAt), finishedAt: optStr(merged.finishedAt), savedCount: num(merged.savedCount) ?? 0, diff --git a/apps/mana/CLAUDE.md b/apps/mana/CLAUDE.md index 003ea5be5..c5804bdfb 100644 --- a/apps/mana/CLAUDE.md +++ b/apps/mana/CLAUDE.md @@ -275,6 +275,7 @@ Agents interact with the app through tools — each one either auto (executes si | food | — | `nutrition_summary`, `log_meal` | | news | `save_news_article` | — | | news-research | `research_news` | — | +| articles | `save_article`, `archive_article`, `tag_article`, `add_article_highlight`, `import_articles_from_urls` (auto) | `list_articles` | | journal | `create_journal_entry` | — | | habits | `create_habit`, `log_habit` | `get_habits` | | contacts | `create_contact` | `get_contacts` | @@ -304,6 +305,36 @@ Each template bundles: optional agent + optional scene layout + optional starter Full architecture (Planner prompt + parser in `@mana/shared-ai`, server-side runner, Postgres actor column, materialized snapshots, Multi-Agent gating, server-side web-research, Prometheus metrics + status.mana.how integration): [`docs/architecture/COMPANION_BRAIN_ARCHITECTURE.md`](../../docs/architecture/COMPANION_BRAIN_ARCHITECTURE.md) §20 (AI Workbench) + §21 (Mission Grants) + §22 (Multi-Agent Workbench). +## Articles bulk-import + +Background pipeline that ingests N URLs into a user's reading list as +one Job, with the same encryption + scope semantics as a single-URL +save. Same shape as the AI mission runner: state lives in +`sync_changes`, a server-side worker projects + writes back, the +client encrypts the final article. + +``` +client createJob(urls) + → bulkAdd articleImportItems(state='pending') + articleImportJobs(queued) + → sync push → mana_sync.sync_changes + → apps/api worker tick (every 2s, advisory-lock-gated) + → extractFromUrl (shared-rss / Readability) + → write articleExtractPickup row + flip item → 'extracted' + → sync pull → liveQuery + → consume-pickup encryptRecord + articleTable.add + → flip item → 'saved' (or 'duplicate' / 'consent-wall') + → delete pickup row + → server flips job → 'done', emits ArticleImportFinished +``` + +Tables: `articleImportJobs`, `articleImportItems`, `articleExtractPickup` +(all plaintext-allowlisted — see `data/crypto/plaintext-allowlist.ts`). +Actor on every server-write: `system:articles-import-worker`. Worker +metrics under `mana_api_articles_import_*`. Hard cap of 200 URLs per +job (`MAX_URLS_PER_JOB` in `modules/articles/stores/imports.svelte`). + +Plan: [`docs/plans/articles-bulk-import.md`](../../docs/plans/articles-bulk-import.md). + ## Reference Documents | Path | Purpose | diff --git a/apps/mana/apps/web/src/lib/data/database.ts b/apps/mana/apps/web/src/lib/data/database.ts index 89fce5dea..95f89dae5 100644 --- a/apps/mana/apps/web/src/lib/data/database.ts +++ b/apps/mana/apps/web/src/lib/data/database.ts @@ -1465,6 +1465,34 @@ db.version(59).stores({ documentTags: null, }); +// v60 — Articles bulk-import schema cleanup. +// Two changes, both lossless: +// +// 1. articleImportJobs: drop the unused `leasedBy`/`leasedUntil` +// columns. They were on the original v57 schema as a soft-lease +// handshake, but the worker uses pg_try_advisory_xact_lock +// instead and never wrote them. Dexie's index list shrinks but +// no data is migrated — the columns simply disappear from +// future writes; existing rows still carry them as zombies (a +// one-shot row-rewrite to delete the field would be a hard- +// migration; not worth it for two never-written nulls). +// 2. articleImportItems: drop the standalone `state` index. +// `[jobId+state]` covers the only hot query (worker's per-job +// pending scan). The state-solo index had no call site — +// retryFailed uses [jobId+state]. Trimming the index list saves +// a bit of write amplification. +// +// Kept on the schema (not dropped here): `idx` standalone index on +// articleImportItems. It's also unused right now, but the +// JobDetailView currently sorts items in JS via .sort((a,b)=>a.idx-b.idx); +// if that view ever switches to a server-side ordered scan we'd want +// the index back, and re-adding indexes after the fact is more +// painful than keeping a small one around. +db.version(60).stores({ + articleImportJobs: 'id, status, [spaceId+status], _updatedAtIndex', + articleImportItems: 'id, jobId, [jobId+state], idx', +}); + // ─── Sync Routing ────────────────────────────────────────── // SYNC_APP_MAP, TABLE_TO_SYNC_NAME, TABLE_TO_APP, SYNC_NAME_TO_TABLE, // toSyncName() and fromSyncName() are now derived from per-module diff --git a/apps/mana/apps/web/src/lib/modules/articles/components/BulkImportForm.svelte b/apps/mana/apps/web/src/lib/modules/articles/components/BulkImportForm.svelte index ea7c72065..848b33d94 100644 --- a/apps/mana/apps/web/src/lib/modules/articles/components/BulkImportForm.svelte +++ b/apps/mana/apps/web/src/lib/modules/articles/components/BulkImportForm.svelte @@ -7,13 +7,14 @@ --> -{#if jobs.length > 0} +{#if allJobs.length > 0} - Bisherige Imports + + Bisherige Imports + + (filter = 'all')} + > + Alle ({allJobs.length}) + + (filter = 'active')} + disabled={activeCount === 0} + > + Aktiv ({activeCount}) + + (filter = 'done')} + disabled={doneCount === 0} + > + Fertig ({doneCount}) + + (filter = 'errors')} + disabled={errorCount === 0} + > + Mit Fehlern ({errorCount}) + + + + {#if visibleJobs.length === 0} + Keine Jobs in dieser Ansicht. + {/if} - {#each jobs as job (job.id)} + {#each visibleJobs as job (job.id)} goto(`/articles/import/${job.id}`)}> {statusLabel(job.status)} {progress(job)} @@ -63,10 +127,54 @@ margin: 1.5rem auto 0; padding: 0 1.5rem; } + .list-header { + display: flex; + gap: 0.85rem; + align-items: baseline; + flex-wrap: wrap; + margin-bottom: 0.65rem; + } .jobs-list h2 { - margin: 0 0 0.65rem 0; + margin: 0; font-size: 1.05rem; } + .filter-tabs { + display: flex; + gap: 0.25rem; + flex-wrap: wrap; + } + .tab { + padding: 0.18rem 0.55rem; + border-radius: 999px; + border: 1px solid var(--color-border, rgba(0, 0, 0, 0.12)); + background: transparent; + color: var(--color-text-muted, #64748b); + font: inherit; + font-size: 0.78rem; + cursor: pointer; + } + .tab:hover:not(:disabled) { + border-color: color-mix(in srgb, #f97316 60%, transparent); + color: inherit; + } + .tab:disabled { + opacity: 0.4; + cursor: not-allowed; + } + .tab-active { + background: #f97316; + color: white; + border-color: #f97316; + } + .tab-active:hover:not(:disabled) { + background: #ea580c; + color: white; + } + .empty-filter { + margin: 0.5rem 0 0 0; + color: var(--color-text-muted, #64748b); + font-size: 0.85rem; + } .jobs-list ul { list-style: none; margin: 0; diff --git a/apps/mana/apps/web/src/lib/modules/articles/queries.ts b/apps/mana/apps/web/src/lib/modules/articles/queries.ts index 0819f9eae..d7560fd08 100644 --- a/apps/mana/apps/web/src/lib/modules/articles/queries.ts +++ b/apps/mana/apps/web/src/lib/modules/articles/queries.ts @@ -285,8 +285,6 @@ export function toImportJob(local: LocalArticleImportJob): ArticleImportJob { id: local.id, totalUrls: local.totalUrls, status: local.status, - leasedBy: local.leasedBy ?? null, - leasedUntil: local.leasedUntil ?? null, startedAt: local.startedAt ?? null, finishedAt: local.finishedAt ?? null, savedCount: local.savedCount ?? 0, diff --git a/apps/mana/apps/web/src/lib/modules/articles/stores/imports.svelte.ts b/apps/mana/apps/web/src/lib/modules/articles/stores/imports.svelte.ts index 6eaa25ce4..7f9bb11f5 100644 --- a/apps/mana/apps/web/src/lib/modules/articles/stores/imports.svelte.ts +++ b/apps/mana/apps/web/src/lib/modules/articles/stores/imports.svelte.ts @@ -26,6 +26,17 @@ import type { // (BulkImportForm, tools.ts) keep working unchanged. export { parseUrls, type ParsedUrls }; +/** + * Hard cap on the URL count per job. The worker can chew through any + * number of items, but at very high counts the UI becomes unwieldy + * (JobDetailView is a flat list, no virtualisation yet) and the + * worst-case wall-clock duration climbs into the multi-hour range + * (50 URLs ≈ 5–10 min at concurrency 3, scales linearly). 200 is a + * pragmatic ceiling — real reading-list dumps from Pocket exports + * average 50–150 items. + */ +export const MAX_URLS_PER_JOB = 200; + export const articleImportsStore = { /** * Create a job with N items, all in state='pending'. Returns the @@ -39,14 +50,17 @@ export const articleImportsStore = { if (urls.length === 0) { throw new Error('createJob: empty url list'); } + if (urls.length > MAX_URLS_PER_JOB) { + throw new Error( + `createJob: too many URLs (${urls.length}). Max ${MAX_URLS_PER_JOB} pro Job — splitte den Import in mehrere Jobs.` + ); + } const jobId = crypto.randomUUID(); const job: LocalArticleImportJob = { id: jobId, totalUrls: urls.length, status: 'queued', - leasedBy: null, - leasedUntil: null, startedAt: null, finishedAt: null, savedCount: 0, diff --git a/apps/mana/apps/web/src/lib/modules/articles/types.ts b/apps/mana/apps/web/src/lib/modules/articles/types.ts index 89cab9fdc..2dc0b5010 100644 --- a/apps/mana/apps/web/src/lib/modules/articles/types.ts +++ b/apps/mana/apps/web/src/lib/modules/articles/types.ts @@ -165,10 +165,6 @@ export type ArticleImportItemState = export interface LocalArticleImportJob extends BaseRecord { totalUrls: number; status: ArticleImportJobStatus; - /** Worker lease — workerId of the apps/api instance that claimed the job. */ - leasedBy: string | null; - /** ISO timestamp; lease is dead once `leasedUntil < now`. */ - leasedUntil: string | null; startedAt: string | null; finishedAt: string | null; /** Counters mirror the per-item terminal states. Cache for fast list @@ -178,6 +174,10 @@ export interface LocalArticleImportJob extends BaseRecord { duplicateCount: number; errorCount: number; warningCount: number; + // NOTE: `leasedBy` + `leasedUntil` were defined on the original + // schema as a soft-lease handshake but the worker uses + // pg_try_advisory_xact_lock instead, so they were never written. + // Removed in Dexie v58 — see database.ts. } export interface LocalArticleImportItem extends BaseRecord { @@ -227,8 +227,6 @@ export interface ArticleImportJob { id: string; totalUrls: number; status: ArticleImportJobStatus; - leasedBy: string | null; - leasedUntil: string | null; startedAt: string | null; finishedAt: string | null; savedCount: number; diff --git a/docs/ENVIRONMENT_VARIABLES.md b/docs/ENVIRONMENT_VARIABLES.md index 7a4843a2a..70cc6dbb5 100644 --- a/docs/ENVIRONMENT_VARIABLES.md +++ b/docs/ENVIRONMENT_VARIABLES.md @@ -113,6 +113,12 @@ The generator then creates app-specific `.env` files with the correct prefixes f | `MANA_SUPABASE_URL` | Supabase project URL | | `MANA_SUPABASE_ANON_KEY` | Supabase anonymous key | +### Mana API — Articles Bulk-Import Worker + +| Variable | Description | Default | +|----------|-------------|---------| +| `ARTICLES_IMPORT_WORKER_DISABLED` | Set to `true` to skip starting the bulk-import worker on this apps/api instance. Useful for tests, or when running multiple apps/api replicas and you want to designate a specific one as the worker. The worker uses `pg_try_advisory_xact_lock` so multiple instances are safe by default — this env-var is the explicit opt-out. | `false` | + ### Cards Project | Variable | Description | Default |
Keine Jobs in dieser Ansicht.