From 3a7bc7f1c3377880f9bf1cabd58b2fb8563b728f Mon Sep 17 00:00:00 2001
From: Till JS <tills95@gmail.com>
Date: Wed, 22 Apr 2026 18:44:21 +0200
Subject: [PATCH] test(mana-research): fixture-based tests for Gemini
 poll-response parser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Re-commit of c413ab7dd (reverted in c31dcdd66) without the unrelated
files that accidentally got swept into the original stage. Parser
content is identical.

The real Gemini /v1beta/interactions/:id completed shape bit us once
already during the initial smoke-test (we had OpenAI-style nested
`output.message.content[]` coded; reality is a flat `outputs` array
of thought|text|image items, with url_citations that carry no title
and usage fields named `total_input_tokens` rather than `input_tokens`).

This test pins the parser against a synthetic fixture covering the
cases we saw in the wild plus the failure modes that are hard to
provoke from a live API call:

  - status dispatch (queued, in_progress, failed, cancelled, incomplete)
  - completed body concatenated across text items, skipping thought/image
  - empty/missing `outputs` without crashing
  - missing usage
  - citations deduped by url, hostname extracted as title
  - wrong-type annotations and those without url skipped
  - real vertexaisearch redirect URLs Gemini emits
  - fallback to url as title when the URL is unparseable
  - trimming of leading/trailing whitespace

To make this testable I pulled the completed-branch of
pollGeminiDeepResearch into a standalone parseInteractionResponse
helper — same behaviour, now reachable without mocking global fetch.

Also adds the `test` script to package.json so `pnpm --filter
@mana/research-service test` works.

17 pass / 0 fail.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 services/mana-research/package.json           |   1 +
 .../agent/gemini-deep-research.test.ts        | 194 ++++++++++++++++++
 .../providers/agent/gemini-deep-research.ts   |  13 ++
 3 files changed, 208 insertions(+)
 create mode 100644 services/mana-research/src/providers/agent/gemini-deep-research.test.ts
diff --git a/services/mana-research/package.json b/services/mana-research/package.json
index 8d5515a6f..cecebe74e 100644
--- a/services/mana-research/package.json
+++ b/services/mana-research/package.json
@@ -6,6 +6,7 @@
 	"scripts": {
 		"dev": "bun run --watch src/index.ts",
 		"start": "bun run src/index.ts",
+		"test": "bun test",
 		"db:push": "drizzle-kit push",
 		"db:generate": "drizzle-kit generate",
 		"db:studio": "drizzle-kit studio",
diff --git a/services/mana-research/src/providers/agent/gemini-deep-research.test.ts b/services/mana-research/src/providers/agent/gemini-deep-research.test.ts
new file mode 100644
index 000000000..d77b784f6
--- /dev/null
+++ b/services/mana-research/src/providers/agent/gemini-deep-research.test.ts
@@ -0,0 +1,194 @@
+/**
+ * Parser tests for the Gemini Deep Research `/v1beta/interactions/:id`
+ * response. Shape was derived from a real smoke-test on 2026-04-22 —
+ * see docs/reports/gemini-deep-research.md §1.3.
+ *
+ * We test the pure `parseInteractionResponse` helper, not the full
+ * poll function, so there's no fetch mocking and the fixtures can
+ * exercise edge cases the live API might not hand back on demand
+ * (empty output items, duplicate citations, wrong annotation types).
+ */
+
+import { describe, expect, it } from 'bun:test';
+import { parseInteractionResponse } from './gemini-deep-research';
+
+// Typed as `any` because we want to feed the parser shapes that
+// deliberately don't match the happy-path TS interface (e.g. missing
+// fields, wrong annotation types) to verify defensive handling.
+type Fixture = Parameters<typeof parseInteractionResponse>[0];
+
+describe('parseInteractionResponse — status dispatch', () => {
+	it('maps queued → queued', () => {
+		const r = parseInteractionResponse({ status: 'queued' } as Fixture);
+		expect(r).toEqual({ status: 'queued' });
+	});
+
+	it('maps in_progress → running', () => {
+		const r = parseInteractionResponse({ status: 'in_progress' } as Fixture);
+		expect(r).toEqual({ status: 'running' });
+	});
+
+	it('maps failed → failed with error message', () => {
+		const r = parseInteractionResponse({
+			status: 'failed',
+			error: { message: 'model timeout' },
+		} as Fixture);
+		expect(r).toEqual({ status: 'failed', error: 'model timeout' });
+	});
+
+	it('maps cancelled → failed (uses status string as fallback error)', () => {
+		const r = parseInteractionResponse({ status: 'cancelled' } as Fixture);
+		expect(r).toEqual({ status: 'failed', error: 'cancelled' });
+	});
+
+	it('maps incomplete → failed', () => {
+		const r = parseInteractionResponse({ status: 'incomplete' } as Fixture);
+		expect(r.status).toBe('failed');
+	});
+});
+
+describe('parseInteractionResponse — completed response', () => {
+	const completed: Fixture = {
+		id: 'test_interaction_123',
+		status: 'completed',
+		outputs: [
+			// thought item — should be ignored entirely
+			{
+				type: 'thought',
+				text: undefined, // thought uses `summary`, not `text` — irrelevant, we skip anyway
+			} as never,
+			// empty item Google occasionally emits — must not crash the loop
+			{} as never,
+			// primary text item with url_citations (including a duplicate and a non-url_citation)
+			{
+				type: 'text',
+				text: '# Main Report\n\nThis is the body with [cite: 1, 2].',
+				annotations: [
+					{ type: 'url_citation', url: 'https://example.com/a', start_index: 0, end_index: 10 },
+					{ type: 'url_citation', url: 'https://example.com/b', start_index: 15, end_index: 25 },
+					// duplicate of /a — must be deduped
+					{ type: 'url_citation', url: 'https://example.com/a', start_index: 30, end_index: 40 },
+					// wrong type — must be skipped
+					{ type: 'other_citation', url: 'https://should-not-capture.com' },
+					// missing url — must be skipped
+					{ type: 'url_citation' },
+				],
+			},
+			// image — skipped (lives in providerRaw)
+			{ type: 'image', mime_type: 'image/png', data: 'aGVsbG8=' } as never,
+			// second text block without annotations — must be concatenated
+			{ type: 'text', text: '\n\n**Sources above.**' },
+		],
+		usage: {
+			total_tokens: 1000,
+			total_input_tokens: 700,
+			total_output_tokens: 300,
+			total_cached_tokens: 100,
+		},
+	} as Fixture;
+
+	const result = parseInteractionResponse(completed);
+
+	it('returns completed status with an answer body', () => {
+		expect(result.status).toBe('completed');
+		expect(result.answer).toBeDefined();
+	});
+
+	it('concatenates all text items, skipping thoughts/images/empty', () => {
+		expect(result.answer?.answer).toBe(
+			'# Main Report\n\nThis is the body with [cite: 1, 2].\n\n**Sources above.**'
+		);
+	});
+
+	it('leaves `query` empty — caller fills it in', () => {
+		expect(result.answer?.query).toBe('');
+	});
+
+	it('extracts url_citations deduped by url, using hostname as title', () => {
+		expect(result.answer?.citations).toEqual([
+			{ url: 'https://example.com/a', title: 'example.com' },
+			{ url: 'https://example.com/b', title: 'example.com' },
+		]);
+	});
+
+	it('maps usage.total_input_tokens / total_output_tokens to tokenUsage', () => {
+		expect(result.answer?.tokenUsage).toEqual({ input: 700, output: 300 });
+	});
+
+	it('preserves the raw response for downstream consumers', () => {
+		expect(result.answer?.providerRaw).toBe(completed);
+	});
+});
+
+describe('parseInteractionResponse — completed edge cases', () => {
+	it('handles completely empty outputs', () => {
+		const r = parseInteractionResponse({ status: 'completed', outputs: [] } as Fixture);
+		expect(r.status).toBe('completed');
+		expect(r.answer?.answer).toBe('');
+		expect(r.answer?.citations).toEqual([]);
+	});
+
+	it('handles missing outputs field entirely', () => {
+		const r = parseInteractionResponse({ status: 'completed' } as Fixture);
+		expect(r.status).toBe('completed');
+		expect(r.answer?.answer).toBe('');
+	});
+
+	it('handles missing usage', () => {
+		const r = parseInteractionResponse({
+			status: 'completed',
+			outputs: [{ type: 'text', text: 'hi' }],
+		} as Fixture);
+		expect(r.answer?.tokenUsage).toBeUndefined();
+	});
+
+	it('trims leading/trailing whitespace on the concatenated answer', () => {
+		const r = parseInteractionResponse({
+			status: 'completed',
+			outputs: [
+				{ type: 'text', text: '   \n\n' },
+				{ type: 'text', text: 'Report body' },
+				{ type: 'text', text: '\n\n   ' },
+			],
+		} as Fixture);
+		expect(r.answer?.answer).toBe('Report body');
+	});
+
+	it('falls back to url as title when hostname parse fails', () => {
+		const r = parseInteractionResponse({
+			status: 'completed',
+			outputs: [
+				{
+					type: 'text',
+					text: 'x',
+					annotations: [{ type: 'url_citation', url: 'not a valid url' }],
+				},
+			],
+		} as Fixture);
+		expect(r.answer?.citations[0]).toEqual({
+			url: 'not a valid url',
+			title: 'not a valid url',
+		});
+	});
+
+	it('handles the real vertexaisearch redirect URLs Gemini emits', () => {
+		const r = parseInteractionResponse({
+			status: 'completed',
+			outputs: [
+				{
+					type: 'text',
+					text: 'Hono is ...',
+					annotations: [
+						{
+							type: 'url_citation',
+							url: 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQF...',
+							start_index: 268,
+							end_index: 283,
+						},
+					],
+				},
+			],
+		} as Fixture);
+		expect(r.answer?.citations[0]?.title).toBe('vertexaisearch.cloud.google.com');
+	});
+});
diff --git a/services/mana-research/src/providers/agent/gemini-deep-research.ts b/services/mana-research/src/providers/agent/gemini-deep-research.ts
index 58d0c3fa2..cf44eb154 100644
--- a/services/mana-research/src/providers/agent/gemini-deep-research.ts
+++ b/services/mana-research/src/providers/agent/gemini-deep-research.ts
@@ -162,7 +162,20 @@ export async function pollGeminiDeepResearch(
 	}
 
 	const data = (await res.json()) as GeminiInteractionPollResponse;
+	return parseInteractionResponse(data);
+}
 
+/**
+ * Pure parser for the `/v1beta/interactions/:id` response. Extracted so
+ * the edge cases (flat `outputs` array, url_citation annotations, usage
+ * field names) can be unit-tested without mocking global fetch.
+ *
+ * Exported for tests only — production callers should go through
+ * pollGeminiDeepResearch().
+ */
+export function parseInteractionResponse(
+	data: GeminiInteractionPollResponse
+): GeminiDeepPollResult {
 	if (data.status === 'queued') return { status: 'queued' };
 	if (data.status === 'in_progress') return { status: 'running' };
 	if (data.status === 'failed' || data.status === 'incomplete' || data.status === 'cancelled') {