/** * Articles Bulk-Import — sync_changes → live record projection. * * Mirror of `services/mana-ai/src/db/missions-projection.ts` and * `apps/api/src/mcp/sync-db.ts:readLatestRecords()`, specialised for the * two tables the import-worker tick reads each cycle: * * articleImportJobs — to find running jobs whose lease is free * articleImportItems — to find pending items inside those jobs * * No materialized snapshots yet — this is the simple "replay every row * for these tables" path. The total volume is small (a few hundred rows * per active job, all import history per user) and the worker tick is * the only consumer. If the table grows we can plug in the same * `mission_snapshots` pattern mana-ai uses; the projection API stays * the same. * * Plan: docs/plans/articles-bulk-import.md. */ import { getSyncConnection } from '../../mcp/sync-db'; type Row = Record; interface ChangeRow { user_id: string; record_id: string; op: string; data: Row | null; field_meta: Record | null; created_at: Date; } export interface ImportJobRow { id: string; userId: string; spaceId: string | null; totalUrls: number; status: 'queued' | 'running' | 'paused' | 'done' | 'cancelled'; leasedBy: string | null; leasedUntil: string | null; startedAt: string | null; finishedAt: string | null; savedCount: number; duplicateCount: number; errorCount: number; warningCount: number; } export type ImportItemState = | 'pending' | 'extracting' | 'extracted' | 'saved' | 'duplicate' | 'consent-wall' | 'error' | 'cancelled'; export interface ImportItemRow { id: string; userId: string; spaceId: string | null; jobId: string; idx: number; url: string; state: ImportItemState; articleId: string | null; warning: 'probable_consent_wall' | null; error: string | null; attempts: number; lastAttemptAt: string | null; } /** * Cross-user scan: which jobs need attention this tick. RLS is * intentionally NOT applied — the worker is a privileged consumer that * needs to see all users' running jobs in one pass. Per-user RLS * scoping is applied on the write-back path in import-extractor.ts. */ export async function listClaimableJobs(): Promise { const sql = getSyncConnection(); const rows = await sql` SELECT user_id, record_id, op, data, field_meta, created_at FROM sync_changes WHERE app_id = 'articles' AND table_name = 'articleImportJobs' ORDER BY user_id, record_id, created_at ASC `; const out: ImportJobRow[] = []; for (const m of mergeByUserAndRecord(rows).values()) { const job = projectJob(m.userId, m.recordId, m.merged); if (!job) continue; if (job.status !== 'running' && job.status !== 'queued') continue; out.push(job); } return out; } /** * Per-job item scan. Returns ALL items so the worker can compute * job-completion + counter deltas in one pass. */ export async function listItemsForJob(userId: string, jobId: string): Promise { const sql = getSyncConnection(); const rows = await sql` SELECT user_id, record_id, op, data, field_meta, created_at FROM sync_changes WHERE app_id = 'articles' AND table_name = 'articleImportItems' AND user_id = ${userId} ORDER BY record_id, created_at ASC `; const out: ImportItemRow[] = []; for (const m of mergeByUserAndRecord(rows).values()) { const item = projectItem(m.userId, m.recordId, m.merged); if (!item || item.jobId !== jobId) continue; out.push(item); } out.sort((a, b) => a.idx - b.idx); return out; } // ─── Internal: LWW merge per (userId, recordId) ────────────── interface MergedEntry { userId: string; recordId: string; merged: Row | null; } function mergeByUserAndRecord(rows: readonly ChangeRow[]): Map { const out = new Map(); type Cur = { key: string; userId: string; recordId: string; record: Row | null; fm: Record; }; let current: Cur | null = null; const flush = (c: Cur) => { out.set(c.key, { userId: c.userId, recordId: c.recordId, merged: c.record }); }; for (const r of rows) { const key = `${r.user_id}:${r.record_id}`; if (!current || current.key !== key) { if (current) flush(current); current = { key, userId: r.user_id, recordId: r.record_id, record: null, fm: {} }; } if (r.op === 'delete') { current.record = null; continue; } if (!r.data) continue; if (!current.record) { current.record = { id: r.record_id, ...r.data }; current.fm = { ...(r.field_meta ?? {}) }; continue; } const rowFM = r.field_meta ?? {}; for (const [k, v] of Object.entries(r.data)) { const serverTime = rowFM[k] ?? r.created_at.toISOString(); const localTime = current.fm[k] ?? ''; if (serverTime >= localTime) { current.record[k] = v; current.fm[k] = serverTime; } } } if (current) flush(current); return out; } function projectJob(userId: string, recordId: string, merged: Row | null): ImportJobRow | null { if (!merged || merged.deletedAt) return null; const totalUrls = num(merged.totalUrls); const status = str(merged.status); if (totalUrls == null || !isJobStatus(status)) return null; return { id: recordId, userId, spaceId: optStr(merged.spaceId), totalUrls, status, leasedBy: optStr(merged.leasedBy), leasedUntil: optStr(merged.leasedUntil), startedAt: optStr(merged.startedAt), finishedAt: optStr(merged.finishedAt), savedCount: num(merged.savedCount) ?? 0, duplicateCount: num(merged.duplicateCount) ?? 0, errorCount: num(merged.errorCount) ?? 0, warningCount: num(merged.warningCount) ?? 0, }; } function projectItem(userId: string, recordId: string, merged: Row | null): ImportItemRow | null { if (!merged || merged.deletedAt) return null; const jobId = str(merged.jobId); const url = str(merged.url); const state = str(merged.state); const idx = num(merged.idx); if (!jobId || !url || !isItemState(state) || idx == null) return null; return { id: recordId, userId, spaceId: optStr(merged.spaceId), jobId, idx, url, state, articleId: optStr(merged.articleId), warning: merged.warning === 'probable_consent_wall' ? 'probable_consent_wall' : null, error: optStr(merged.error), attempts: num(merged.attempts) ?? 0, lastAttemptAt: optStr(merged.lastAttemptAt), }; } function isJobStatus(s: string): s is ImportJobRow['status'] { return s === 'queued' || s === 'running' || s === 'paused' || s === 'done' || s === 'cancelled'; } function isItemState(s: string): s is ImportItemState { return ( s === 'pending' || s === 'extracting' || s === 'extracted' || s === 'saved' || s === 'duplicate' || s === 'consent-wall' || s === 'error' || s === 'cancelled' ); } function num(v: unknown): number | null { return typeof v === 'number' && Number.isFinite(v) ? v : null; } function str(v: unknown): string { return typeof v === 'string' ? v : ''; } function optStr(v: unknown): string | null { return typeof v === 'string' && v ? v : null; }