chore(articles): hygiene pass — shared-ai actor + lib/sync-db + metrics (#5,#7,#11)

#5 — SYSTEM_ARTICLES_IMPORT_WORKER hoisted into @mana/shared-ai
   The worker built its actor inline, bypassing the SystemSource union
   that's the blessed list for system-write principals. Now uses
   makeSystemActor(SYSTEM_ARTICLES_IMPORT_WORKER) like every other
   server-side system writer (mission-runner, projection, …).

#7 — sync-db helper hoisted out of mcp/ into lib/
   Implementation moved to apps/api/src/lib/sync-db.ts; mcp/sync-db.ts
   is a re-export shim so existing MCP imports keep working. Articles
   bulk-import + future modules import from lib/ directly — no more
   "articles depending on mcp" layering smell.

#11 — Prometheus metrics for the worker
   New counters + histogram in lib/metrics.ts under
   mana_api_articles_import_*:
     - ticks_total{result=processed|skipped|error}
     - items_total{result=extracted|error|consent_wall|cancelled}
     - extract_duration_seconds (histogram, 0.25–30s buckets)
     - jobs_completed_total{result=done}
     - pickup_gc_rows_total
   Worker tick + extractor instrumented at the right transition points.
   Steady-state pickup_gc_rows_total > 0 over time signals a stuck
   consumer somewhere — useful operator alert.

Plan: docs/plans/articles-bulk-import.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-29 01:06:15 +02:00
parent 93545f8516
commit 59373c0d57
8 changed files with 245 additions and 122 deletions

View file

@ -25,24 +25,27 @@
*/
import { extractFromUrl } from '@mana/shared-rss';
import { makeFieldMeta, type Actor, type FieldOrigin } from '@mana/shared-ai';
import { getSyncConnection } from '../../mcp/sync-db';
import {
makeFieldMeta,
makeSystemActor,
originFromActor,
SYSTEM_ARTICLES_IMPORT_WORKER,
type Actor,
type FieldOrigin,
} from '@mana/shared-ai';
import { getSyncConnection } from '../../lib/sync-db';
import { articlesImportExtractDuration, articlesImportItemsTotal } from '../../lib/metrics';
import { looksLikeConsentWall } from './consent-wall';
import type { ImportItemRow } from './import-projection';
const MAX_ATTEMPTS = 3;
const CLIENT_ID = 'articles-import-worker';
/** System-actor blob stamped on every worker write. Built inline because
* the underlying SystemSource union in @mana/shared-ai isn't extended
* here both fields are runtime values, not type discriminators, so
* this composes cleanly without a shared-ai change. */
const WORKER_ACTOR: Actor = Object.freeze({
kind: 'system' as const,
principalId: 'system:articles-import-worker',
displayName: 'Artikel-Import',
});
const WORKER_ORIGIN: FieldOrigin = 'system';
/** System-actor blob stamped on every worker write sourced from the
* blessed SystemSource union in @mana/shared-ai so the actor.ts audit
* + Workbench filters know about it. */
const WORKER_ACTOR: Actor = makeSystemActor(SYSTEM_ARTICLES_IMPORT_WORKER);
const WORKER_ORIGIN: FieldOrigin = originFromActor(WORKER_ACTOR);
export interface ExtractStats {
itemId: string;
@ -73,7 +76,9 @@ export async function extractOneItem(item: ImportItemRow): Promise<ExtractStats>
// Step 2 — fetch + parse. Hard-failure path returns null; we treat
// that as a single failed attempt and recycle.
const extractStart = Date.now();
const extracted = await extractFromUrl(item.url);
articlesImportExtractDuration.observe((Date.now() - extractStart) / 1000);
const nowDone = new Date().toISOString();
if (!extracted) {
@ -84,6 +89,9 @@ export async function extractOneItem(item: ImportItemRow): Promise<ExtractStats>
error: nextState === 'error' ? 'Extraktion fehlgeschlagen nach mehreren Versuchen.' : null,
lastAttemptAt: nowDone,
});
if (nextState === 'error') {
articlesImportItemsTotal.inc({ result: 'error' });
}
return { itemId: item.id, terminal: nextState === 'error' ? 'error' : 'pending' };
}
@ -122,6 +130,7 @@ export async function extractOneItem(item: ImportItemRow): Promise<ExtractStats>
lastAttemptAt: nowDone,
});
articlesImportItemsTotal.inc({ result: warning ? 'consent_wall' : 'extracted' });
return { itemId: item.id, terminal: 'extracted' };
}

View file

@ -2,7 +2,7 @@
* Articles Bulk-Import sync_changes live record projection.
*
* Mirror of `services/mana-ai/src/db/missions-projection.ts` and
* `apps/api/src/mcp/sync-db.ts:readLatestRecords()`, specialised for the
* `apps/api/src/lib/sync-db.ts:readLatestRecords()`, specialised for the
* two tables the import-worker tick reads each cycle:
*
* articleImportJobs to find running jobs whose lease is free
@ -18,7 +18,7 @@
* Plan: docs/plans/articles-bulk-import.md.
*/
import { getSyncConnection } from '../../mcp/sync-db';
import { getSyncConnection } from '../../lib/sync-db';
import { fieldMetaTime } from './field-meta';
type Row = Record<string, unknown>;

View file

@ -22,7 +22,12 @@
* Plan: docs/plans/articles-bulk-import.md.
*/
import { getSyncConnection } from '../../mcp/sync-db';
import { getSyncConnection } from '../../lib/sync-db';
import {
articlesImportJobsCompletedTotal,
articlesImportPickupGcRows,
articlesImportTicksTotal,
} from '../../lib/metrics';
import {
listClaimableJobs,
listItemsForJob,
@ -94,8 +99,13 @@ async function runTickGuarded(): Promise<void> {
if (running) return;
running = true;
try {
await runTickOnce();
const result = await runTickOnce();
articlesImportTicksTotal.inc({ result: result.skipped ? 'skipped' : 'processed' });
if (typeof result.pickupGcRows === 'number' && result.pickupGcRows > 0) {
articlesImportPickupGcRows.inc(result.pickupGcRows);
}
} catch (err) {
articlesImportTicksTotal.inc({ result: 'error' });
console.error('[articles-import] tick error:', err);
} finally {
running = false;
@ -229,6 +239,7 @@ async function processOneJob(job: ImportJobRow): Promise<number> {
counterPatch.status = 'done';
counterPatch.finishedAt = new Date().toISOString();
dirty = true;
articlesImportJobsCompletedTotal.inc({ result: 'done' });
}
if (dirty) {
await writeJobUpdate(job.userId, job.id, counterPatch);