diff --git a/apps/api/src/modules/articles/consent-wall.test.ts b/apps/api/src/modules/articles/consent-wall.test.ts new file mode 100644 index 000000000..8cf35579d --- /dev/null +++ b/apps/api/src/modules/articles/consent-wall.test.ts @@ -0,0 +1,47 @@ +import { describe, it, expect } from 'bun:test'; +import { looksLikeConsentWall } from './consent-wall'; + +describe('looksLikeConsentWall', () => { + it('flags short text containing German consent vocabulary', () => { + const text = + 'Cookies zustimmen — Wir und unsere Partner speichern Informationen auf einem Endgerät.'; + expect(looksLikeConsentWall(text, 14)).toBe(true); + }); + + it('flags short English consent dialogs', () => { + const text = 'Please accept all cookies to continue using this website.'; + expect(looksLikeConsentWall(text, 9)).toBe(true); + }); + + it('flags JavaScript-disabled walls', () => { + const text = 'JavaScript is disabled. Please enable JavaScript to continue.'; + expect(looksLikeConsentWall(text, 7)).toBe(true); + }); + + it('does NOT flag long articles even if they mention cookies', () => { + // Long-form article that happens to mention cookies in body. The + // heuristic only fires below the wordcount threshold (300) so a + // real article about cookies isn't misclassified. + const text = 'cookie consent ' + 'lorem '.repeat(400); + expect(looksLikeConsentWall(text, 800)).toBe(false); + }); + + it('does NOT flag short text without consent vocabulary', () => { + const text = 'A short blog post about hiking trails in the Black Forest.'; + expect(looksLikeConsentWall(text, 11)).toBe(false); + }); + + it('is case-insensitive', () => { + const text = 'COOKIES ZUSTIMMEN — KLICKE HIER'; + expect(looksLikeConsentWall(text, 4)).toBe(true); + }); + + it('returns false on empty content', () => { + expect(looksLikeConsentWall('', 0)).toBe(false); + }); + + it('returns false at exactly the wordcount threshold (boundary check)', () => { + const text = 'cookie consent ' + 'lorem '.repeat(300); + expect(looksLikeConsentWall(text, 300)).toBe(false); + }); +}); diff --git a/apps/api/src/modules/articles/field-meta.test.ts b/apps/api/src/modules/articles/field-meta.test.ts new file mode 100644 index 000000000..52012b2af --- /dev/null +++ b/apps/api/src/modules/articles/field-meta.test.ts @@ -0,0 +1,51 @@ +import { describe, it, expect } from 'bun:test'; +import { fieldMetaTime } from './field-meta'; + +describe('fieldMetaTime — wire-shape adapter for sync_changes.field_meta', () => { + it('passes through legacy plain ISO strings unchanged', () => { + expect(fieldMetaTime('2026-04-28T21:14:30.000Z')).toBe('2026-04-28T21:14:30.000Z'); + }); + + it('extracts the `at` field from F3 object stamps', () => { + expect( + fieldMetaTime({ + at: '2026-04-28T21:14:30.000Z', + actor: { kind: 'system', principalId: 'system:foo', displayName: 'Foo' }, + origin: 'system', + }) + ).toBe('2026-04-28T21:14:30.000Z'); + }); + + it('returns "" for undefined / null (so callers can fall back)', () => { + expect(fieldMetaTime(undefined)).toBe(''); + expect(fieldMetaTime(null)).toBe(''); + }); + + it('returns "" for malformed objects without an at-string', () => { + expect(fieldMetaTime({})).toBe(''); + expect(fieldMetaTime({ at: 12345 })).toBe(''); + expect(fieldMetaTime({ at: null })).toBe(''); + }); + + it('returns "" for non-string non-object inputs', () => { + expect(fieldMetaTime(42)).toBe(''); + expect(fieldMetaTime(true)).toBe(''); + expect(fieldMetaTime([])).toBe(''); + }); + + // Regression: this is the bug that triggered the cross-service fix. + // Before fieldMetaTime, a string >= object compare evaluated to false + // stably and the older value won. Now both shapes fold to comparable + // ISO strings. + it('makes string-vs-object comparison work correctly across both shapes', () => { + const earlierLegacy = '2026-04-28T21:00:00.000Z'; + const laterF3 = { + at: '2026-04-28T22:00:00.000Z', + actor: { kind: 'user', principalId: 'u', displayName: 'Du' }, + origin: 'user', + }; + // The F3 stamp is later in time, so its normalised form must + // compare strictly greater than the legacy stamp. + expect(fieldMetaTime(laterF3) > fieldMetaTime(earlierLegacy)).toBe(true); + }); +}); diff --git a/apps/api/src/modules/articles/import-worker.test.ts b/apps/api/src/modules/articles/import-worker.test.ts new file mode 100644 index 000000000..d9f63fc19 --- /dev/null +++ b/apps/api/src/modules/articles/import-worker.test.ts @@ -0,0 +1,80 @@ +import { describe, it, expect } from 'bun:test'; +import { countByState } from './import-worker'; +import type { ImportItemRow } from './import-projection'; + +function item(state: ImportItemRow['state'], idx = 0): ImportItemRow { + return { + id: `i-${idx}`, + userId: 'u-1', + spaceId: 'sp-1', + jobId: 'j-1', + idx, + url: `https://example.com/${idx}`, + state, + articleId: null, + warning: null, + error: null, + attempts: 0, + lastAttemptAt: null, + }; +} + +describe('countByState — worker job-counter rollup', () => { + it('returns zeros for empty input + allTerminal=false', () => { + const c = countByState([]); + expect(c).toEqual({ + saved: 0, + duplicate: 0, + error: 0, + consentWall: 0, + cancelled: 0, + allTerminal: false, + }); + }); + + it('counts each terminal state independently', () => { + const c = countByState([ + item('saved', 0), + item('saved', 1), + item('duplicate', 2), + item('error', 3), + item('cancelled', 4), + ]); + expect(c.saved).toBe(2); + expect(c.duplicate).toBe(1); + expect(c.error).toBe(1); + expect(c.cancelled).toBe(1); + expect(c.allTerminal).toBe(true); + }); + + it('treats consent-wall as semantically saved (so progress UI advances)', () => { + // One real-saved + two consent-wall = three "saved" from the + // user's perspective, but the warning counter tracks the wall hits. + const c = countByState([item('saved', 0), item('consent-wall', 1), item('consent-wall', 2)]); + expect(c.saved).toBe(3); + expect(c.consentWall).toBe(2); + expect(c.allTerminal).toBe(true); + }); + + it('does not flag allTerminal when any item is non-terminal', () => { + const states: ImportItemRow['state'][] = ['pending', 'extracting', 'extracted']; + for (const nonTerminal of states) { + const c = countByState([item('saved', 0), item(nonTerminal, 1)]); + expect(c.allTerminal).toBe(false); + } + }); + + it('preserves the saved + consent-wall sum when both are present', () => { + // Regression check: saved must include consent-wall items so the + // finished-counter UI doesn't off-by-one. + const c = countByState([ + item('saved', 0), + item('saved', 1), + item('consent-wall', 2), + item('error', 3), + ]); + expect(c.saved).toBe(3); // 2 saved + 1 consent-wall + expect(c.consentWall).toBe(1); + expect(c.error).toBe(1); + }); +}); diff --git a/apps/api/src/modules/articles/import-worker.ts b/apps/api/src/modules/articles/import-worker.ts index 828b64528..39bba2c14 100644 --- a/apps/api/src/modules/articles/import-worker.ts +++ b/apps/api/src/modules/articles/import-worker.ts @@ -272,7 +272,7 @@ async function processOneJob(job: ImportJobRow): Promise { return claimable.length; } -interface StateCounts { +export interface StateCounts { saved: number; duplicate: number; error: number; @@ -281,7 +281,7 @@ interface StateCounts { allTerminal: boolean; } -function countByState(items: readonly ImportItemRow[]): StateCounts { +export function countByState(items: readonly ImportItemRow[]): StateCounts { let saved = 0; let duplicate = 0; let error = 0; diff --git a/apps/mana/apps/web/src/lib/modules/articles/components/JobDetailView.svelte b/apps/mana/apps/web/src/lib/modules/articles/components/JobDetailView.svelte index 2123e091d..73952ded2 100644 --- a/apps/mana/apps/web/src/lib/modules/articles/components/JobDetailView.svelte +++ b/apps/mana/apps/web/src/lib/modules/articles/components/JobDetailView.svelte @@ -161,13 +161,34 @@ + {#if j.warningCount > 0} + + {/if} +