feat(questions): deep-research module — mana-search + mana-llm pipeline

End-to-end deep-research feature for the questions module: a fire-and- forget orchestrator in apps/api that plans sub-queries with mana-llm, retrieves sources via mana-search (with optional Readability extraction), and streams a structured synthesis back to the web app over SSE. Backend (apps/api/src/modules/research): - schema.ts: pgSchema('research') with research_results + sources - orchestrator.ts: three-phase pipeline (plan / retrieve / synthesise) with depth-aware config (quick=1×, standard=3×, deep=6× sub-queries) - pubsub.ts: in-process event bus, single-node, swappable for Redis - routes.ts: POST /start (202, fire-and-forget), GET /:id/stream (SSE), POST /start-sync (test only), GET /:id, GET /:id/sources - Credit gating via @mana/shared-hono/credits — validate up-front, consume best-effort on `done`. Failed runs cost nothing. Helpers (apps/api/src/lib): - llm.ts: llmJson() + llmStream() over mana-llm OpenAI-compat API - search.ts: webSearch() + bulkExtract() over mana-search Go service - responses.ts: shared errorResponse / listResponse / validationError Schema deployment: - drizzle.config.ts (research-scoped) + drizzle/research/0000_init.sql hand-authored migration, deployable via psql -f or drizzle-kit push. - drizzle-kit added as devDep with db:generate / db:push scripts. Web client (apps/mana/apps/web/src/lib/api/research.ts): - Typed start() / get() / listSources() / streamProgress(). The stream uses fetch + ReadableStream (not EventSource) so we can attach the JWT via Authorization header. Special-cases 402 for friendly toast. - New PUBLIC_MANA_API_URL plumbing in hooks.server.ts + config.ts. Module store (modules/questions/stores/answers.svelte.ts): - New write-side store with createManual / startResearch / accept / softDelete. startResearch creates an optimistic empty answer, opens the SSE stream, debounces token deltas in 100ms batches into the encrypted local row, and on `done` replaces the streamed text with the parsed { summary, keyPoints, followUps } payload + citations resolved against research.sources.id. Citation rendering (modules/questions/components/AnswerCitations.svelte): - Tokenises [n] markers in the answer body into clickable pills with hover popovers showing title / host / snippet / external link. - Lazy-loaded via a session-scoped source cache (stores/sources.svelte.ts) that deduplicates concurrent fetches. UI (routes/(app)/questions/[id]/+page.svelte): - Recherche card with three-state button (start / cancel / re-run), animated phase indicator, source counter. - Confirmation dialog warning about web/LLM transmission since the question itself is locally encrypted. - Toasts for success / error / cancel via @mana/shared-ui/toast. - Re-run flow soft-deletes prior research-driven answers but keeps manual ones intact. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-18 00:49:40 +02:00 · 2026-04-08 22:15:35 +02:00 · 2026-04-08 22:15:35 +02:00 · e82851985b
commit e82851985b
parent 30787e36d2
18 changed files with 2221 additions and 4 deletions
--- a/apps/api/src/modules/research/orchestrator.ts
+++ b/apps/api/src/modules/research/orchestrator.ts
@ -0,0 +1,366 @@
+/**
+ * Research orchestrator — three linear phases:
+ *
+ *   1. Plan        — mana-llm produces N sub-queries (JSON)
+ *   2. Retrieve    — mana-search runs each sub-query in parallel,
+ *                    deduplicates, optionally extracts full text
+ *   3. Synthesise  — mana-llm streams a structured answer (summary,
+ *                    key points, follow-ups) over the source corpus
+ *
+ * Each phase persists its progress to research_results/sources so a
+ * caller can either await the whole thing (sync mode) or subscribe to
+ * progress events (will land in routes.ts via a small in-process pubsub).
+ *
+ * Errors flip status='error' and surface errorMessage; they never throw
+ * past runPipeline() so background invocations don't crash the worker.
+ */
+
+import { eq } from 'drizzle-orm';
+import { db, researchResults, sources, type ResearchDepth } from './schema';
+import { llmJson, llmStream, LlmError } from '../../lib/llm';
+import { webSearch, bulkExtract, type SearchHit, SearchError } from '../../lib/search';
+
+// ─── Depth configuration ────────────────────────────────────
+
+interface DepthConfig {
+	subQueryCount: number;
+	hitsPerQuery: number;
+	maxSources: number;
+	extract: boolean;
+	categories: string[];
+	planModel: string;
+	synthModel: string;
+}
+
+const DEPTH_CONFIG: Record<ResearchDepth, DepthConfig> = {
+	quick: {
+		subQueryCount: 1,
+		hitsPerQuery: 5,
+		maxSources: 5,
+		extract: false,
+		categories: ['general'],
+		planModel: 'ollama/gemma3:4b',
+		synthModel: 'ollama/gemma3:4b',
+	},
+	standard: {
+		subQueryCount: 3,
+		hitsPerQuery: 8,
+		maxSources: 15,
+		extract: true,
+		categories: ['general', 'news'],
+		planModel: 'ollama/gemma3:4b',
+		synthModel: 'ollama/gemma3:12b',
+	},
+	deep: {
+		subQueryCount: 6,
+		hitsPerQuery: 8,
+		maxSources: 30,
+		extract: true,
+		categories: ['general', 'news', 'science', 'it'],
+		planModel: 'ollama/gemma3:12b',
+		synthModel: 'ollama/gemma3:12b',
+	},
+};
+
+// ─── Progress events (consumed by routes.ts pubsub later) ───
+
+export type ProgressEvent =
+	| { type: 'status'; status: 'planning' | 'searching' | 'extracting' | 'synthesizing' }
+	| { type: 'plan'; subQueries: string[] }
+	| { type: 'sources'; count: number }
+	| { type: 'token'; delta: string }
+	| { type: 'done'; researchResultId: string }
+	| { type: 'error'; message: string };
+
+export type ProgressEmitter = (event: ProgressEvent) => void;
+
+const noop: ProgressEmitter = () => {};
+
+// ─── Pipeline input ─────────────────────────────────────────
+
+export interface PipelineInput {
+	researchResultId: string;
+	questionTitle: string;
+	questionDescription?: string;
+	depth: ResearchDepth;
+}
+
+// ─── Synthesis JSON shape ───────────────────────────────────
+
+interface SynthesisPayload {
+	summary: string;
+	keyPoints: string[];
+	followUps: string[];
+}
+
+// ─── Public entrypoint ──────────────────────────────────────
+
+/**
+ * Run the full pipeline. Resolves once the row is in `done` or `error`
+ * state. Never throws — all failures are caught and persisted.
+ */
+export async function runPipeline(
+	input: PipelineInput,
+	emit: ProgressEmitter = noop
+): Promise<void> {
+	const cfg = DEPTH_CONFIG[input.depth];
+	const id = input.researchResultId;
+
+	try {
+		// ─── Phase 1: Plan ─────────────────────────────────
+		await setStatus(id, 'planning');
+		emit({ type: 'status', status: 'planning' });
+
+		const subQueries = await planSubQueries(input, cfg);
+		await db.update(researchResults).set({ subQueries }).where(eq(researchResults.id, id));
+		emit({ type: 'plan', subQueries });
+
+		// ─── Phase 2: Retrieve ─────────────────────────────
+		await setStatus(id, 'searching');
+		emit({ type: 'status', status: 'searching' });
+
+		const hits = await runSearches(subQueries, cfg);
+		const ranked = dedupeAndRank(hits).slice(0, cfg.maxSources);
+
+		let enriched = ranked.map((h) => ({
+			hit: h,
+			extractedText: undefined as string | undefined,
+		}));
+
+		if (cfg.extract && ranked.length > 0) {
+			await setStatus(id, 'extracting');
+			emit({ type: 'status', status: 'extracting' });
+
+			const extracts = await bulkExtract(
+				ranked.map((h) => h.url),
+				{ maxLength: 8000 }
+			);
+			const byUrl = new Map(extracts.map((e) => [e.url, e]));
+			enriched = ranked.map((h) => ({
+				hit: h,
+				extractedText: byUrl.get(h.url)?.content?.text,
+			}));
+		}
+
+		// Persist sources with stable rank order so citations [n] map to sources[n-1].
+		await db.insert(sources).values(
+			enriched.map((e, idx) => ({
+				researchResultId: id,
+				url: e.hit.url,
+				title: e.hit.title,
+				snippet: e.hit.snippet,
+				extractedContent: e.extractedText,
+				category: e.hit.category,
+				rank: idx + 1,
+			}))
+		);
+		emit({ type: 'sources', count: enriched.length });
+
+		// ─── Phase 3: Synthesise ───────────────────────────
+		await setStatus(id, 'synthesizing');
+		emit({ type: 'status', status: 'synthesizing' });
+
+		const synthesis = await synthesise(input, enriched, cfg, emit);
+
+		await db
+			.update(researchResults)
+			.set({
+				status: 'done',
+				summary: synthesis.summary,
+				keyPoints: synthesis.keyPoints,
+				followUpQuestions: synthesis.followUps,
+				finishedAt: new Date(),
+			})
+			.where(eq(researchResults.id, id));
+
+		emit({ type: 'done', researchResultId: id });
+	} catch (err) {
+		const message = formatError(err);
+		console.error(`[research:${id}] pipeline failed:`, err);
+		await db
+			.update(researchResults)
+			.set({ status: 'error', errorMessage: message, finishedAt: new Date() })
+			.where(eq(researchResults.id, id))
+			.catch(() => {});
+		emit({ type: 'error', message });
+	}
+}
+
+// ─── Phase 1: Plan ──────────────────────────────────────────
+
+async function planSubQueries(input: PipelineInput, cfg: DepthConfig): Promise<string[]> {
+	if (cfg.subQueryCount === 1) {
+		// Cheap path: skip the LLM round-trip, just use the question itself.
+		return [input.questionTitle];
+	}
+
+	const system =
+		'Du planst eine Web-Recherche. Antworte ausschließlich als JSON-Objekt mit dem Schlüssel "subQueries" (Array aus Strings). Kein Fließtext, kein Markdown.';
+
+	const user = [
+		`Frage: ${input.questionTitle}`,
+		input.questionDescription ? `Kontext: ${input.questionDescription}` : null,
+		'',
+		`Erzeuge genau ${cfg.subQueryCount} präzise, sich gegenseitig ergänzende Web-Suchanfragen.`,
+		'Mische deutsche und englische Anfragen, wenn das die Trefferqualität verbessert.',
+		'Jede Anfrage soll einen anderen Aspekt der Frage abdecken.',
+	]
+		.filter(Boolean)
+		.join('\n');
+
+	const result = await llmJson<{ subQueries?: unknown }>({
+		model: cfg.planModel,
+		system,
+		user,
+		temperature: 0.3,
+		maxTokens: 400,
+	});
+
+	const queries = Array.isArray(result.subQueries)
+		? result.subQueries.filter((q): q is string => typeof q === 'string' && q.trim().length > 0)
+		: [];
+
+	if (queries.length === 0) {
+		// Fallback: don't fail the whole run because the planner produced garbage.
+		return [input.questionTitle];
+	}
+
+	return queries.slice(0, cfg.subQueryCount);
+}
+
+// ─── Phase 2: Retrieve ──────────────────────────────────────
+
+async function runSearches(queries: string[], cfg: DepthConfig): Promise<SearchHit[]> {
+	const results = await Promise.allSettled(
+		queries.map((q) =>
+			webSearch({
+				query: q,
+				limit: cfg.hitsPerQuery,
+				categories: cfg.categories,
+			})
+		)
+	);
+
+	const hits: SearchHit[] = [];
+	for (const r of results) {
+		if (r.status === 'fulfilled') hits.push(...r.value);
+		else console.warn('[research] sub-query failed:', r.reason);
+	}
+	return hits;
+}
+
+/**
+ * Deduplicate by URL, keeping the highest-scored hit per URL.
+ * Sort by score descending so the best sources land at the top of the prompt.
+ */
+function dedupeAndRank(hits: SearchHit[]): SearchHit[] {
+	const byUrl = new Map<string, SearchHit>();
+	for (const h of hits) {
+		const existing = byUrl.get(h.url);
+		if (!existing || h.score > existing.score) byUrl.set(h.url, h);
+	}
+	return [...byUrl.values()].sort((a, b) => b.score - a.score);
+}
+
+// ─── Phase 3: Synthesise ────────────────────────────────────
+
+async function synthesise(
+	input: PipelineInput,
+	enriched: Array<{ hit: SearchHit; extractedText?: string }>,
+	cfg: DepthConfig,
+	emit: ProgressEmitter
+): Promise<SynthesisPayload> {
+	const context = enriched
+		.map((e, i) => {
+			const body = e.extractedText ?? e.hit.snippet ?? '';
+			return `[${i + 1}] ${e.hit.title}\n${e.hit.url}\n${truncate(body, 2000)}`;
+		})
+		.join('\n\n---\n\n');
+
+	const system = [
+		'Du bist ein gründlicher Research-Assistent.',
+		'Antworte ausschließlich als JSON-Objekt mit dieser exakten Form:',
+		'{ "summary": string, "keyPoints": string[], "followUps": string[] }',
+		'',
+		'Regeln:',
+		'- summary: 2–4 Absätze auf Deutsch, jeder belegbare Claim bekommt eine Citation [n], die auf die Quellen-Nummer verweist.',
+		'- keyPoints: 3–6 Stichpunkte, jeweils mit mindestens einer [n]-Citation.',
+		'- followUps: 2–4 weiterführende Fragen, ohne Citations.',
+		'- Verwende ausschließlich Informationen aus den bereitgestellten Quellen. Wenn die Quellen die Frage nicht beantworten, sag das im summary.',
+		'- Kein Markdown, keine Code-Fences, nur reines JSON.',
+	].join('\n');
+
+	const user = [
+		`Frage: ${input.questionTitle}`,
+		input.questionDescription ? `Kontext: ${input.questionDescription}` : null,
+		'',
+		'Quellen:',
+		context,
+	]
+		.filter(Boolean)
+		.join('\n');
+
+	// We stream tokens to the client for live UI feedback, then parse the
+	// fully-collected text as JSON. The final structured payload is what
+	// gets persisted; the live tokens are just visual progress.
+	const fullText = await llmStream({
+		model: cfg.synthModel,
+		system,
+		user,
+		temperature: 0.4,
+		maxTokens: 2000,
+		onToken: (delta) => emit({ type: 'token', delta }),
+	});
+
+	return parseSynthesis(fullText);
+}
+
+function parseSynthesis(raw: string): SynthesisPayload {
+	const trimmed = stripCodeFence(raw.trim());
+	let parsed: unknown;
+	try {
+		parsed = JSON.parse(trimmed);
+	} catch {
+		// Last-ditch fallback: surface the raw text as the summary so the
+		// user at least sees what the model produced.
+		return { summary: raw.trim(), keyPoints: [], followUps: [] };
+	}
+
+	const obj = (parsed ?? {}) as Record<string, unknown>;
+	return {
+		summary: typeof obj.summary === 'string' ? obj.summary : '',
+		keyPoints: Array.isArray(obj.keyPoints)
+			? obj.keyPoints.filter((k): k is string => typeof k === 'string')
+			: [],
+		followUps: Array.isArray(obj.followUps)
+			? obj.followUps.filter((k): k is string => typeof k === 'string')
+			: [],
+	};
+}
+
+// ─── Helpers ────────────────────────────────────────────────
+
+async function setStatus(
+	id: string,
+	status: 'planning' | 'searching' | 'extracting' | 'synthesizing'
+): Promise<void> {
+	await db.update(researchResults).set({ status }).where(eq(researchResults.id, id));
+}
+
+function truncate(s: string, max: number): string {
+	if (s.length <= max) return s;
+	return s.slice(0, max) + '…';
+}
+
+function stripCodeFence(text: string): string {
+	if (!text.startsWith('```')) return text;
+	const withoutOpen = text.replace(/^```(?:json)?\s*\n?/, '');
+	return withoutOpen.replace(/\n?```\s*$/, '');
+}
+
+function formatError(err: unknown): string {
+	if (err instanceof LlmError) return `LLM: ${err.message}`;
+	if (err instanceof SearchError) return `Search: ${err.message}`;
+	if (err instanceof Error) return err.message;
+	return String(err);
+}