test(mana-research): fixture-based tests for Gemini poll-response parser

The real Gemini /v1beta/interactions/:id completed shape bit us once already during the initial smoke-test (we had OpenAI-style nested `output.message.content[]` coded; reality is a flat `outputs` array of thought|text|image items, with url_citations that carry no title and usage fields named `total_input_tokens` rather than `input_tokens`). This test pins the parser against a synthetic fixture covering the cases we saw in the wild plus the failure modes that are hard to provoke from a live API call: - status dispatch (queued, in_progress, failed, cancelled, incomplete) - completed body concatenated across text items, skipping thought/image - empty/missing `outputs` without crashing - missing usage - citations deduped by url, hostname extracted as title - wrong-type annotations and those without url skipped - real vertexaisearch redirect URLs Gemini emits - fallback to url as title when the URL is unparseable - trimming of leading/trailing whitespace To make this testable I pulled the completed-branch of pollGeminiDeepResearch into a standalone parseInteractionResponse helper — same behaviour, now reachable without mocking global fetch. Also adds the `test` script to package.json so `pnpm --filter @mana/research-service test` works. 17 pass / 0 fail. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 00:41:09 +02:00 · 2026-04-22 18:34:33 +02:00 · 2026-04-22 18:34:33 +02:00 · c413ab7dd3
commit c413ab7dd3
parent e9b9544ea3
5 changed files with 375 additions and 0 deletions
--- a/apps/mana/apps/web/src/lib/data/crypto/at-rest-sweep.ts
+++ b/apps/mana/apps/web/src/lib/data/crypto/at-rest-sweep.ts
@ -0,0 +1,158 @@
 /**
 * One-shot at-rest encryption sweep.
 *
 * The Phase 2e encryption flip (docs/plans/space-scoped-data-model.md
 * §2e) turned `enabled: true` on globalTags / tagGroups /
 * workbenchScenes / aiMissions. Because `decryptRecords` is lenient
 * (it skips fields that aren't already encrypted), rows written BEFORE
 * the flip stay readable but remain plaintext at rest — a weakened
 * security posture if the user's IndexedDB is ever inspected.
 *
 * This sweep closes that gap: after login (when the vault is
 * unlocked) we iterate every row in every table that currently has
 * encryption enabled AND hasn't been swept before, re-save it through
 * `encryptRecord`, and mark the table done via a localStorage
 * sentinel.
 *
 * Key design points:
 *
 * - **Per-table sentinel**: if a new table flips to enabled:true in
 *   the future, only that table is swept on the next run. Already-
 *   swept tables aren't touched.
 * - **Change-tracking suppression**: writes inside the sweep go
 *   through `beginApplyingTables()` so the Dexie hook skips the
 *   `_pendingChanges` insert — we don't want to fire 100+ sync pushes
 *   for a re-encryption that never changed field values.
 * - **Idempotent inside each row**: `encryptRecord` checks
 *   `isEncrypted(value)` before wrapping, so a row with 2 of 3
 *   designated fields already encrypted (partial prior sweep, mixed
 *   boot state) gets only the remaining field wrapped.
 * - **Fire-and-forget at call site**: the sweep is async and logs
 *   its progress; callers don't await it. A failed sweep is never
 *   fatal to the boot path.
 */
 import Dexie from 'dexie';
 import { db, beginApplyingTables } from '../database';
 import { isVaultUnlocked } from './key-provider';
 import { ENCRYPTION_REGISTRY } from './registry';
 import { encryptRecord } from './record-helpers';
 const SENTINEL_PREFIX = 'mana:crypto:at-rest-sweep';
 const SENTINEL_VERSION = 'v1';
 function sentinelKey(tableName: string): string {
 	return `${SENTINEL_PREFIX}:${tableName}:${SENTINEL_VERSION}:done`;
 }
 function hasSwept(tableName: string): boolean {
 	if (typeof localStorage === 'undefined') return true; // SSR or test env — skip
 	try {
 		return localStorage.getItem(sentinelKey(tableName)) !== null;
 	} catch {
 		return true;
 	}
 }
 function markSwept(tableName: string, rowCount: number): void {
 	if (typeof localStorage === 'undefined') return;
 	try {
 		localStorage.setItem(
 			sentinelKey(tableName),
 			JSON.stringify({ at: new Date().toISOString(), rows: rowCount })
 		);
 	} catch {
 		/* storage quota — the sweep is a one-time optimisation, not load-bearing */
 	}
 }
 /**
 * Sweep a single table: re-save every non-deleted row through
 * `encryptRecord` so any plaintext fields from before the encryption
 * flip get wrapped. Returns the number of rows touched.
 */
 async function sweepTable(tableName: string): Promise<number> {
 	const rows = (await db.table(tableName).toArray()) as Record<string, unknown>[];
 	if (rows.length === 0) return 0;
 	const dispose = beginApplyingTables([tableName]);
 	try {
 		let touched = 0;
 		for (const row of rows) {
 			if (row.deletedAt) continue;
 			// encryptRecord mutates in place; isEncrypted() gate inside
 			// means fields already encrypted stay untouched.
 			await encryptRecord(tableName, row);
 			// put() overwrites the row — safe because we just mutated the
 			// same primary key. Dexie's default keyPath is 'id'; every
 			// Mana record schema uses that.
 			await db.table(tableName).put(row);
 			touched++;
 		}
 		return touched;
 	} finally {
 		dispose();
 	}
 }
 /**
 * Run the sweep across every currently-enabled encryption target that
 * hasn't been swept on this device before. Safe to call on every
 * unlock — already-swept tables short-circuit via their localStorage
 * sentinel.
 */
 export async function runAtRestEncryptSweep(): Promise<void> {
 	if (!isVaultUnlocked()) {
 		console.warn('[mana-crypto:at-rest-sweep] vault locked, skipping — re-run after unlock');
 		return;
 	}
 	const targets = Object.entries(ENCRYPTION_REGISTRY)
 		.filter(([, cfg]) => cfg.enabled && cfg.fields.length > 0)
 		.map(([tableName]) => tableName)
 		.filter((tableName) => !hasSwept(tableName));
 	if (targets.length === 0) return; // everything swept already
 	console.info(
 		`[mana-crypto:at-rest-sweep] starting for ${targets.length} table(s): ${targets.join(', ')}`
 	);
 	for (const tableName of targets) {
 		try {
 			const touched = await sweepTable(tableName);
 			markSwept(tableName, touched);
 			if (touched > 0) {
 				console.info(`[mana-crypto:at-rest-sweep] ${tableName}: re-saved ${touched} row(s)`);
 			}
 		} catch (err) {
 			if (err instanceof Dexie.DexieError) {
 				console.error(`[mana-crypto:at-rest-sweep] ${tableName} failed (Dexie): ${err.message}`);
 			} else {
 				console.error(`[mana-crypto:at-rest-sweep] ${tableName} failed:`, err);
 			}
 			// Don't mark swept — the next unlock will retry this table.
 		}
 	}
 }
 /**
 * Test / recovery helper: clears every sweep sentinel so the next
 * `runAtRestEncryptSweep()` re-processes all enabled tables. No UI
 * hooks this up; exported for integration tests + manual recovery
 * via the browser console.
 */
 export function resetSweepSentinels(): void {
 	if (typeof localStorage === 'undefined') return;
 	const prefix = `${SENTINEL_PREFIX}:`;
 	try {
 		const keys: string[] = [];
 		for (let i = 0; i < localStorage.length; i++) {
 			const k = localStorage.key(i);
 			if (k && k.startsWith(prefix)) keys.push(k);
 		}
 		for (const k of keys) localStorage.removeItem(k);
 	} catch {
 		/* ignore */
 	}
 }
--- a/apps/mana/apps/web/src/routes/+layout.svelte
+++ b/apps/mana/apps/web/src/routes/+layout.svelte
@ -71,6 +71,15 @@
 				if (state.status === 'unlocked') {
 					console.info('[mana-crypto] vault unlocked successfully');
 					needsRecoveryCode = false;
 					// Post-unlock: run the one-shot at-rest encryption
 					// sweep over tables whose encryption was flipped
 					// after they already had plaintext rows. Guarded by
 					// a per-table localStorage sentinel so it's idempotent
 					// and cheap on every subsequent unlock. Fire-and-
 					// forget — a failed sweep logs but never blocks.
 					void import('$lib/data/crypto/at-rest-sweep').then(({ runAtRestEncryptSweep }) =>
 						runAtRestEncryptSweep()
 					);
 					return;
 				}
 				if (state.status === 'awaiting-recovery-code') {
--- a/services/mana-research/package.json
+++ b/services/mana-research/package.json
@ -6,6 +6,7 @@
 	"scripts": {
 		"dev": "bun run --watch src/index.ts",
 		"start": "bun run src/index.ts",
 		"test": "bun test",
 		"db:push": "drizzle-kit push",
 		"db:generate": "drizzle-kit generate",
 		"db:studio": "drizzle-kit studio",
--- a/services/mana-research/src/providers/agent/gemini-deep-research.test.ts
+++ b/services/mana-research/src/providers/agent/gemini-deep-research.test.ts
@ -0,0 +1,194 @@
 /**
 * Parser tests for the Gemini Deep Research `/v1beta/interactions/:id`
 * response. Shape was derived from a real smoke-test on 2026-04-22 —
 * see docs/reports/gemini-deep-research.md §1.3.
 *
 * We test the pure `parseInteractionResponse` helper, not the full
 * poll function, so there's no fetch mocking and the fixtures can
 * exercise edge cases the live API might not hand back on demand
 * (empty output items, duplicate citations, wrong annotation types).
 */
 import { describe, expect, it } from 'bun:test';
 import { parseInteractionResponse } from './gemini-deep-research';
 // Typed as `any` because we want to feed the parser shapes that
 // deliberately don't match the happy-path TS interface (e.g. missing
 // fields, wrong annotation types) to verify defensive handling.
 type Fixture = Parameters<typeof parseInteractionResponse>[0];
 describe('parseInteractionResponse — status dispatch', () => {
 	it('maps queued → queued', () => {
 		const r = parseInteractionResponse({ status: 'queued' } as Fixture);
 		expect(r).toEqual({ status: 'queued' });
 	});
 	it('maps in_progress → running', () => {
 		const r = parseInteractionResponse({ status: 'in_progress' } as Fixture);
 		expect(r).toEqual({ status: 'running' });
 	});
 	it('maps failed → failed with error message', () => {
 		const r = parseInteractionResponse({
 			status: 'failed',
 			error: { message: 'model timeout' },
 		} as Fixture);
 		expect(r).toEqual({ status: 'failed', error: 'model timeout' });
 	});
 	it('maps cancelled → failed (uses status string as fallback error)', () => {
 		const r = parseInteractionResponse({ status: 'cancelled' } as Fixture);
 		expect(r).toEqual({ status: 'failed', error: 'cancelled' });
 	});
 	it('maps incomplete → failed', () => {
 		const r = parseInteractionResponse({ status: 'incomplete' } as Fixture);
 		expect(r.status).toBe('failed');
 	});
 });
 describe('parseInteractionResponse — completed response', () => {
 	const completed: Fixture = {
 		id: 'test_interaction_123',
 		status: 'completed',
 		outputs: [
 			// thought item — should be ignored entirely
 			{
 				type: 'thought',
 				text: undefined, // thought uses `summary`, not `text` — irrelevant, we skip anyway
 			} as never,
 			// empty item Google occasionally emits — must not crash the loop
 			{} as never,
 			// primary text item with url_citations (including a duplicate and a non-url_citation)
 			{
 				type: 'text',
 				text: '# Main Report\n\nThis is the body with [cite: 1, 2].',
 				annotations: [
 					{ type: 'url_citation', url: 'https://example.com/a', start_index: 0, end_index: 10 },
 					{ type: 'url_citation', url: 'https://example.com/b', start_index: 15, end_index: 25 },
 					// duplicate of /a — must be deduped
 					{ type: 'url_citation', url: 'https://example.com/a', start_index: 30, end_index: 40 },
 					// wrong type — must be skipped
 					{ type: 'other_citation', url: 'https://should-not-capture.com' },
 					// missing url — must be skipped
 					{ type: 'url_citation' },
 				],
 			},
 			// image — skipped (lives in providerRaw)
 			{ type: 'image', mime_type: 'image/png', data: 'aGVsbG8=' } as never,
 			// second text block without annotations — must be concatenated
 			{ type: 'text', text: '\n\n**Sources above.**' },
 		],
 		usage: {
 			total_tokens: 1000,
 			total_input_tokens: 700,
 			total_output_tokens: 300,
 			total_cached_tokens: 100,
 		},
 	} as Fixture;
 	const result = parseInteractionResponse(completed);
 	it('returns completed status with an answer body', () => {
 		expect(result.status).toBe('completed');
 		expect(result.answer).toBeDefined();
 	});
 	it('concatenates all text items, skipping thoughts/images/empty', () => {
 		expect(result.answer?.answer).toBe(
 			'# Main Report\n\nThis is the body with [cite: 1, 2].\n\n**Sources above.**'
 		);
 	});
 	it('leaves `query` empty — caller fills it in', () => {
 		expect(result.answer?.query).toBe('');
 	});
 	it('extracts url_citations deduped by url, using hostname as title', () => {
 		expect(result.answer?.citations).toEqual([
 			{ url: 'https://example.com/a', title: 'example.com' },
 			{ url: 'https://example.com/b', title: 'example.com' },
 		]);
 	});
 	it('maps usage.total_input_tokens / total_output_tokens to tokenUsage', () => {
 		expect(result.answer?.tokenUsage).toEqual({ input: 700, output: 300 });
 	});
 	it('preserves the raw response for downstream consumers', () => {
 		expect(result.answer?.providerRaw).toBe(completed);
 	});
 });
 describe('parseInteractionResponse — completed edge cases', () => {
 	it('handles completely empty outputs', () => {
 		const r = parseInteractionResponse({ status: 'completed', outputs: [] } as Fixture);
 		expect(r.status).toBe('completed');
 		expect(r.answer?.answer).toBe('');
 		expect(r.answer?.citations).toEqual([]);
 	});
 	it('handles missing outputs field entirely', () => {
 		const r = parseInteractionResponse({ status: 'completed' } as Fixture);
 		expect(r.status).toBe('completed');
 		expect(r.answer?.answer).toBe('');
 	});
 	it('handles missing usage', () => {
 		const r = parseInteractionResponse({
 			status: 'completed',
 			outputs: [{ type: 'text', text: 'hi' }],
 		} as Fixture);
 		expect(r.answer?.tokenUsage).toBeUndefined();
 	});
 	it('trims leading/trailing whitespace on the concatenated answer', () => {
 		const r = parseInteractionResponse({
 			status: 'completed',
 			outputs: [
 				{ type: 'text', text: '   \n\n' },
 				{ type: 'text', text: 'Report body' },
 				{ type: 'text', text: '\n\n   ' },
 			],
 		} as Fixture);
 		expect(r.answer?.answer).toBe('Report body');
 	});
 	it('falls back to url as title when hostname parse fails', () => {
 		const r = parseInteractionResponse({
 			status: 'completed',
 			outputs: [
 				{
 					type: 'text',
 					text: 'x',
 					annotations: [{ type: 'url_citation', url: 'not a valid url' }],
 				},
 			],
 		} as Fixture);
 		expect(r.answer?.citations[0]).toEqual({
 			url: 'not a valid url',
 			title: 'not a valid url',
 		});
 	});
 	it('handles the real vertexaisearch redirect URLs Gemini emits', () => {
 		const r = parseInteractionResponse({
 			status: 'completed',
 			outputs: [
 				{
 					type: 'text',
 					text: 'Hono is ...',
 					annotations: [
 						{
 							type: 'url_citation',
 							url: 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQF...',
 							start_index: 268,
 							end_index: 283,
 						},
 					],
 				},
 			],
 		} as Fixture);
 		expect(r.answer?.citations[0]?.title).toBe('vertexaisearch.cloud.google.com');
 	});
 });
--- a/services/mana-research/src/providers/agent/gemini-deep-research.ts
+++ b/services/mana-research/src/providers/agent/gemini-deep-research.ts
@ -162,7 +162,20 @@ export async function pollGeminiDeepResearch(
 	}
 	const data = (await res.json()) as GeminiInteractionPollResponse;
 	return parseInteractionResponse(data);
 }
 /**
 * Pure parser for the `/v1beta/interactions/:id` response. Extracted so
 * the edge cases (flat `outputs` array, url_citation annotations, usage
 * field names) can be unit-tested without mocking global fetch.
 *
 * Exported for tests only — production callers should go through
 * pollGeminiDeepResearch().
 */
 export function parseInteractionResponse(
 	data: GeminiInteractionPollResponse
 ): GeminiDeepPollResult {
 	if (data.status === 'queued') return { status: 'queued' };
 	if (data.status === 'in_progress') return { status: 'running' };
 	if (data.status === 'failed' || data.status === 'incomplete' || data.status === 'cancelled') {