mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 18:01:09 +02:00
fix(articles, mana-ai): rollout-block hardening for sync_changes projections
Four cross-cutting fixes that make the bulk-import worker safe to run
under real production load. All four were called out as live-rollout
risks in the post-ship review of docs/plans/articles-bulk-import.md.
#1 — Same fieldMetaTime bug fixed in mana-ai
The articles fix in 054b9e5be hoists the helper to its own file
`apps/api/src/modules/articles/field-meta.ts`. The same naive
`rowFM[k] >= localTime` LWW comparison existed in three more
projections under services/mana-ai (missions-projection,
snapshot-refresh, agents-projection). Once any F3 stamp lands
beside a legacy-string stamp, the comparison evaluates
`'[object Object]' >= 'ISO-…'` (false) and the older value wins.
New `services/mana-ai/src/db/field-meta.ts` — same helper,
deliberately duplicated (each service treats sync_changes as a
read-only event log; sharing infra across services is out of
scope here). All 61 mana-ai bun tests still pass.
#2 — Stale 'extracting' items recycle
If the worker dies mid-fetch (OOM, pod restart), items stay in
state='extracting' forever and the job never completes. New sweep
at the start of `processOneJob`: items whose lastAttemptAt is
older than 5 minutes get bounced back to 'pending' so the next
tick re-claims them. STALE_EXTRACTING_MS tuned for the 15s
shared-rss fetch + JSDOM-parse worst case.
#3 — Pickup-row GC
Every 30 ticks (~once per minute) the worker hard-deletes
articleExtractPickup rows older than 24h. Without this a stuck
pickup-consumer (all tabs closed, Web-Lock mismatch) would let
sync_changes accumulate without bound. Logs the row count when
non-zero so we can spot stuck consumers in the wild.
#4 — DRY consent-wall heuristic
Identical CONSENT_KEYWORDS + threshold lived in routes.ts AND
import-extractor.ts. Hoisted to
`apps/api/src/modules/articles/consent-wall.ts`; both call sites
now share one heuristic.
Plan: docs/plans/articles-bulk-import.md.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e99fea1938
commit
b297f68ee4
10 changed files with 223 additions and 69 deletions
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
import type { Sql } from './connection';
|
||||
import { withUser } from './connection';
|
||||
import { fieldMetaTime } from './field-meta';
|
||||
import type { AiPolicy } from '@mana/shared-ai';
|
||||
|
||||
export interface ServerAgent {
|
||||
|
|
@ -54,7 +55,9 @@ interface ChangeRow {
|
|||
record_id: string;
|
||||
op: string;
|
||||
data: Record<string, unknown> | null;
|
||||
field_meta: Record<string, string> | null;
|
||||
/** See `./field-meta.ts` — wire shape is two-tone (legacy ISO string
|
||||
* vs. F3 `{at, actor, origin}` object). */
|
||||
field_meta: Record<string, unknown> | null;
|
||||
created_at: Date;
|
||||
}
|
||||
|
||||
|
|
@ -180,15 +183,20 @@ export function mergeRaw(rows: readonly ChangeRow[]): Record<string, unknown> |
|
|||
|
||||
for (const row of rows) {
|
||||
if (row.op === 'delete') return null;
|
||||
const rowCreatedAt = row.created_at.toISOString();
|
||||
if (!record) {
|
||||
record = row.data ? { id: row.record_id, ...row.data } : { id: row.record_id };
|
||||
fm = { ...(row.field_meta ?? {}) };
|
||||
const initFM = row.field_meta ?? {};
|
||||
fm = {};
|
||||
for (const k of Object.keys(initFM)) {
|
||||
fm[k] = fieldMetaTime(initFM[k]) || rowCreatedAt;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!row.data) continue;
|
||||
const rowFM = row.field_meta ?? {};
|
||||
for (const [k, v] of Object.entries(row.data)) {
|
||||
const serverTime = rowFM[k] ?? row.created_at.toISOString();
|
||||
const serverTime = fieldMetaTime(rowFM[k]) || rowCreatedAt;
|
||||
const localTime = fm[k] ?? '';
|
||||
if (serverTime >= localTime) {
|
||||
record[k] = v;
|
||||
|
|
|
|||
39
services/mana-ai/src/db/field-meta.ts
Normal file
39
services/mana-ai/src/db/field-meta.ts
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Wire-shape adapter for `sync_changes.field_meta`.
|
||||
*
|
||||
* Two shapes coexist on the wire today:
|
||||
*
|
||||
* - Legacy plaintext writes: { state: 'ISO-8601' }
|
||||
* - Field-meta-overhaul (F3): { state: { at, actor, origin } }
|
||||
*
|
||||
* Every projection / snapshot-refresh in this service performs LWW
|
||||
* merges by string-comparing the per-field timestamp. A naive
|
||||
* `rowFM[k] >= localTime` works for the all-legacy case but silently
|
||||
* collapses the moment one side is an F3 object — the comparison
|
||||
* becomes `'[object Object]' >= 'ISO-…'` (false), the older value
|
||||
* wins and the projection lies.
|
||||
*
|
||||
* This single helper folds both shapes into a comparable ISO string.
|
||||
* Any consumer that reads `field_meta` for LWW MUST go through it.
|
||||
*
|
||||
* Same helper exists in `apps/api/src/modules/articles/import-projection.ts`
|
||||
* (kept duplicated for now — both services treat sync_changes as a
|
||||
* read-only event log; sharing infrastructure code across services
|
||||
* is out of scope here).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns the ISO-string timestamp of a single `field_meta[k]` slot,
|
||||
* regardless of whether the wire format is the legacy plain string
|
||||
* or the F3 `{ at, actor, origin }` object. Returns the empty string
|
||||
* when no usable value is present so the LWW comparison treats the
|
||||
* field as never-stamped (callers fall back to row.created_at).
|
||||
*/
|
||||
export function fieldMetaTime(meta: unknown): string {
|
||||
if (typeof meta === 'string') return meta;
|
||||
if (meta && typeof meta === 'object') {
|
||||
const at = (meta as { at?: unknown }).at;
|
||||
if (typeof at === 'string') return at;
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
import type { MissionGrant } from '@mana/shared-ai';
|
||||
import type { Sql } from './connection';
|
||||
import { fieldMetaTime } from './field-meta';
|
||||
|
||||
/**
|
||||
* Subset of the Mission shape the server needs. Matches
|
||||
|
|
@ -44,10 +45,18 @@ interface ChangeRow {
|
|||
user_id: string;
|
||||
op: string;
|
||||
data: Record<string, unknown> | null;
|
||||
field_meta: Record<string, string> | null;
|
||||
/**
|
||||
* Two-shaped on the wire:
|
||||
* - Legacy plaintext writes: { state: 'ISO-8601' }
|
||||
* - F3 field-meta-overhaul: { state: { at, actor, origin } }
|
||||
* The merge uses `fieldMetaTime` to fold both into a comparable string.
|
||||
*/
|
||||
field_meta: Record<string, unknown> | null;
|
||||
created_at: Date;
|
||||
}
|
||||
|
||||
// fieldMetaTime imported from ./field-meta — see comment in that file.
|
||||
|
||||
/**
|
||||
* Return all currently-active missions whose `nextRunAt` has passed.
|
||||
*
|
||||
|
|
@ -120,8 +129,9 @@ export function mergeAndFilter(
|
|||
const prevFM = (existing.__fieldMeta as Record<string, string> | undefined) ?? {};
|
||||
const nextFM = { ...prevFM };
|
||||
if (row.data) {
|
||||
const rowCreatedAt = row.created_at.toISOString();
|
||||
for (const [k, v] of Object.entries(row.data)) {
|
||||
const serverTime = row.field_meta?.[k] ?? row.created_at.toISOString();
|
||||
const serverTime = fieldMetaTime(row.field_meta?.[k]) || rowCreatedAt;
|
||||
const localTime = prevFM[k] ?? '';
|
||||
if (serverTime >= localTime) {
|
||||
existing[k] = v;
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
import type { Sql } from './connection';
|
||||
import { withUser } from './connection';
|
||||
import { fieldMetaTime } from './field-meta';
|
||||
|
||||
interface SnapshotRow {
|
||||
user_id: string;
|
||||
|
|
@ -29,7 +30,9 @@ interface ChangeRow {
|
|||
record_id: string;
|
||||
op: string;
|
||||
data: Record<string, unknown> | null;
|
||||
field_meta: Record<string, string> | null;
|
||||
/** See `./field-meta.ts` — wire shape is two-tone (legacy ISO string
|
||||
* vs. F3 `{at, actor, origin}` object). */
|
||||
field_meta: Record<string, unknown> | null;
|
||||
created_at: Date;
|
||||
}
|
||||
|
||||
|
|
@ -170,15 +173,20 @@ function mergeRaw(rows: readonly ChangeRow[]): Record<string, unknown> | null {
|
|||
|
||||
for (const row of rows) {
|
||||
if (row.op === 'delete') return null;
|
||||
const rowCreatedAt = row.created_at.toISOString();
|
||||
if (!record) {
|
||||
record = row.data ? { id: row.record_id, ...row.data } : { id: row.record_id };
|
||||
fm = { ...(row.field_meta ?? {}) };
|
||||
const initFM = row.field_meta ?? {};
|
||||
fm = {};
|
||||
for (const k of Object.keys(initFM)) {
|
||||
fm[k] = fieldMetaTime(initFM[k]) || rowCreatedAt;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!row.data) continue;
|
||||
const rowFM = row.field_meta ?? {};
|
||||
for (const [k, v] of Object.entries(row.data)) {
|
||||
const serverTime = rowFM[k] ?? row.created_at.toISOString();
|
||||
const serverTime = fieldMetaTime(rowFM[k]) || rowCreatedAt;
|
||||
const localTime = fm[k] ?? '';
|
||||
if (serverTime >= localTime) {
|
||||
record[k] = v;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue