mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-17 19:09:41 +02:00
chore(articles): hygiene pass — shared-ai actor + lib/sync-db + metrics (#5,#7,#11)
#5 — SYSTEM_ARTICLES_IMPORT_WORKER hoisted into @mana/shared-ai The worker built its actor inline, bypassing the SystemSource union that's the blessed list for system-write principals. Now uses makeSystemActor(SYSTEM_ARTICLES_IMPORT_WORKER) like every other server-side system writer (mission-runner, projection, …). #7 — sync-db helper hoisted out of mcp/ into lib/ Implementation moved to apps/api/src/lib/sync-db.ts; mcp/sync-db.ts is a re-export shim so existing MCP imports keep working. Articles bulk-import + future modules import from lib/ directly — no more "articles depending on mcp" layering smell. #11 — Prometheus metrics for the worker New counters + histogram in lib/metrics.ts under mana_api_articles_import_*: - ticks_total{result=processed|skipped|error} - items_total{result=extracted|error|consent_wall|cancelled} - extract_duration_seconds (histogram, 0.25–30s buckets) - jobs_completed_total{result=done} - pickup_gc_rows_total Worker tick + extractor instrumented at the right transition points. Steady-state pickup_gc_rows_total > 0 over time signals a stuck consumer somewhere — useful operator alert. Plan: docs/plans/articles-bulk-import.md. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
93545f8516
commit
59373c0d57
8 changed files with 245 additions and 122 deletions
|
|
@ -25,24 +25,27 @@
|
|||
*/
|
||||
|
||||
import { extractFromUrl } from '@mana/shared-rss';
|
||||
import { makeFieldMeta, type Actor, type FieldOrigin } from '@mana/shared-ai';
|
||||
import { getSyncConnection } from '../../mcp/sync-db';
|
||||
import {
|
||||
makeFieldMeta,
|
||||
makeSystemActor,
|
||||
originFromActor,
|
||||
SYSTEM_ARTICLES_IMPORT_WORKER,
|
||||
type Actor,
|
||||
type FieldOrigin,
|
||||
} from '@mana/shared-ai';
|
||||
import { getSyncConnection } from '../../lib/sync-db';
|
||||
import { articlesImportExtractDuration, articlesImportItemsTotal } from '../../lib/metrics';
|
||||
import { looksLikeConsentWall } from './consent-wall';
|
||||
import type { ImportItemRow } from './import-projection';
|
||||
|
||||
const MAX_ATTEMPTS = 3;
|
||||
const CLIENT_ID = 'articles-import-worker';
|
||||
|
||||
/** System-actor blob stamped on every worker write. Built inline because
|
||||
* the underlying SystemSource union in @mana/shared-ai isn't extended
|
||||
* here — both fields are runtime values, not type discriminators, so
|
||||
* this composes cleanly without a shared-ai change. */
|
||||
const WORKER_ACTOR: Actor = Object.freeze({
|
||||
kind: 'system' as const,
|
||||
principalId: 'system:articles-import-worker',
|
||||
displayName: 'Artikel-Import',
|
||||
});
|
||||
const WORKER_ORIGIN: FieldOrigin = 'system';
|
||||
/** System-actor blob stamped on every worker write — sourced from the
|
||||
* blessed SystemSource union in @mana/shared-ai so the actor.ts audit
|
||||
* + Workbench filters know about it. */
|
||||
const WORKER_ACTOR: Actor = makeSystemActor(SYSTEM_ARTICLES_IMPORT_WORKER);
|
||||
const WORKER_ORIGIN: FieldOrigin = originFromActor(WORKER_ACTOR);
|
||||
|
||||
export interface ExtractStats {
|
||||
itemId: string;
|
||||
|
|
@ -73,7 +76,9 @@ export async function extractOneItem(item: ImportItemRow): Promise<ExtractStats>
|
|||
|
||||
// Step 2 — fetch + parse. Hard-failure path returns null; we treat
|
||||
// that as a single failed attempt and recycle.
|
||||
const extractStart = Date.now();
|
||||
const extracted = await extractFromUrl(item.url);
|
||||
articlesImportExtractDuration.observe((Date.now() - extractStart) / 1000);
|
||||
const nowDone = new Date().toISOString();
|
||||
|
||||
if (!extracted) {
|
||||
|
|
@ -84,6 +89,9 @@ export async function extractOneItem(item: ImportItemRow): Promise<ExtractStats>
|
|||
error: nextState === 'error' ? 'Extraktion fehlgeschlagen nach mehreren Versuchen.' : null,
|
||||
lastAttemptAt: nowDone,
|
||||
});
|
||||
if (nextState === 'error') {
|
||||
articlesImportItemsTotal.inc({ result: 'error' });
|
||||
}
|
||||
return { itemId: item.id, terminal: nextState === 'error' ? 'error' : 'pending' };
|
||||
}
|
||||
|
||||
|
|
@ -122,6 +130,7 @@ export async function extractOneItem(item: ImportItemRow): Promise<ExtractStats>
|
|||
lastAttemptAt: nowDone,
|
||||
});
|
||||
|
||||
articlesImportItemsTotal.inc({ result: warning ? 'consent_wall' : 'extracted' });
|
||||
return { itemId: item.id, terminal: 'extracted' };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
* Articles Bulk-Import — sync_changes → live record projection.
|
||||
*
|
||||
* Mirror of `services/mana-ai/src/db/missions-projection.ts` and
|
||||
* `apps/api/src/mcp/sync-db.ts:readLatestRecords()`, specialised for the
|
||||
* `apps/api/src/lib/sync-db.ts:readLatestRecords()`, specialised for the
|
||||
* two tables the import-worker tick reads each cycle:
|
||||
*
|
||||
* articleImportJobs — to find running jobs whose lease is free
|
||||
|
|
@ -18,7 +18,7 @@
|
|||
* Plan: docs/plans/articles-bulk-import.md.
|
||||
*/
|
||||
|
||||
import { getSyncConnection } from '../../mcp/sync-db';
|
||||
import { getSyncConnection } from '../../lib/sync-db';
|
||||
import { fieldMetaTime } from './field-meta';
|
||||
|
||||
type Row = Record<string, unknown>;
|
||||
|
|
|
|||
|
|
@ -22,7 +22,12 @@
|
|||
* Plan: docs/plans/articles-bulk-import.md.
|
||||
*/
|
||||
|
||||
import { getSyncConnection } from '../../mcp/sync-db';
|
||||
import { getSyncConnection } from '../../lib/sync-db';
|
||||
import {
|
||||
articlesImportJobsCompletedTotal,
|
||||
articlesImportPickupGcRows,
|
||||
articlesImportTicksTotal,
|
||||
} from '../../lib/metrics';
|
||||
import {
|
||||
listClaimableJobs,
|
||||
listItemsForJob,
|
||||
|
|
@ -94,8 +99,13 @@ async function runTickGuarded(): Promise<void> {
|
|||
if (running) return;
|
||||
running = true;
|
||||
try {
|
||||
await runTickOnce();
|
||||
const result = await runTickOnce();
|
||||
articlesImportTicksTotal.inc({ result: result.skipped ? 'skipped' : 'processed' });
|
||||
if (typeof result.pickupGcRows === 'number' && result.pickupGcRows > 0) {
|
||||
articlesImportPickupGcRows.inc(result.pickupGcRows);
|
||||
}
|
||||
} catch (err) {
|
||||
articlesImportTicksTotal.inc({ result: 'error' });
|
||||
console.error('[articles-import] tick error:', err);
|
||||
} finally {
|
||||
running = false;
|
||||
|
|
@ -229,6 +239,7 @@ async function processOneJob(job: ImportJobRow): Promise<number> {
|
|||
counterPatch.status = 'done';
|
||||
counterPatch.finishedAt = new Date().toISOString();
|
||||
dirty = true;
|
||||
articlesImportJobsCompletedTotal.inc({ result: 'done' });
|
||||
}
|
||||
if (dirty) {
|
||||
await writeJobUpdate(job.userId, job.id, counterPatch);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue