mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-16 04:39:39 +02:00
End-to-end deep-research feature for the questions module: a fire-and-
forget orchestrator in apps/api that plans sub-queries with mana-llm,
retrieves sources via mana-search (with optional Readability extraction),
and streams a structured synthesis back to the web app over SSE.
Backend (apps/api/src/modules/research):
- schema.ts: pgSchema('research') with research_results + sources
- orchestrator.ts: three-phase pipeline (plan / retrieve / synthesise)
with depth-aware config (quick=1×, standard=3×, deep=6× sub-queries)
- pubsub.ts: in-process event bus, single-node, swappable for Redis
- routes.ts: POST /start (202, fire-and-forget), GET /:id/stream (SSE),
POST /start-sync (test only), GET /:id, GET /:id/sources
- Credit gating via @mana/shared-hono/credits — validate up-front,
consume best-effort on `done`. Failed runs cost nothing.
Helpers (apps/api/src/lib):
- llm.ts: llmJson() + llmStream() over mana-llm OpenAI-compat API
- search.ts: webSearch() + bulkExtract() over mana-search Go service
- responses.ts: shared errorResponse / listResponse / validationError
Schema deployment:
- drizzle.config.ts (research-scoped) + drizzle/research/0000_init.sql
hand-authored migration, deployable via psql -f or drizzle-kit push.
- drizzle-kit added as devDep with db:generate / db:push scripts.
Web client (apps/mana/apps/web/src/lib/api/research.ts):
- Typed start() / get() / listSources() / streamProgress(). The stream
uses fetch + ReadableStream (not EventSource) so we can attach the
JWT via Authorization header. Special-cases 402 for friendly toast.
- New PUBLIC_MANA_API_URL plumbing in hooks.server.ts + config.ts.
Module store (modules/questions/stores/answers.svelte.ts):
- New write-side store with createManual / startResearch / accept /
softDelete. startResearch creates an optimistic empty answer, opens
the SSE stream, debounces token deltas in 100ms batches into the
encrypted local row, and on `done` replaces the streamed text with
the parsed { summary, keyPoints, followUps } payload + citations
resolved against research.sources.id.
Citation rendering (modules/questions/components/AnswerCitations.svelte):
- Tokenises [n] markers in the answer body into clickable pills with
hover popovers showing title / host / snippet / external link.
- Lazy-loaded via a session-scoped source cache (stores/sources.svelte.ts)
that deduplicates concurrent fetches.
UI (routes/(app)/questions/[id]/+page.svelte):
- Recherche card with three-state button (start / cancel / re-run),
animated phase indicator, source counter.
- Confirmation dialog warning about web/LLM transmission since the
question itself is locally encrypted.
- Toasts for success / error / cancel via @mana/shared-ui/toast.
- Re-run flow soft-deletes prior research-driven answers but keeps
manual ones intact.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
120 lines
3 KiB
TypeScript
120 lines
3 KiB
TypeScript
/**
|
|
* Thin client for the mana-search Go service.
|
|
*
|
|
* Two helpers, scoped to what the research orchestrator needs:
|
|
*
|
|
* webSearch() — POST /api/v1/search, returns ranked SearXNG results.
|
|
* bulkExtract() — POST /api/v1/extract/bulk, returns Readability text per URL.
|
|
*
|
|
* Internal service-to-service calls — no auth on the wire (private network).
|
|
*/
|
|
|
|
const SEARCH_URL = process.env.MANA_SEARCH_URL || 'http://localhost:3021';
|
|
|
|
export interface SearchHit {
|
|
url: string;
|
|
title: string;
|
|
snippet: string;
|
|
engine: string;
|
|
score: number;
|
|
publishedDate?: string;
|
|
category: string;
|
|
}
|
|
|
|
export interface ExtractedContent {
|
|
title: string;
|
|
description?: string;
|
|
author?: string;
|
|
publishedDate?: string;
|
|
siteName?: string;
|
|
text: string;
|
|
wordCount: number;
|
|
}
|
|
|
|
export interface BulkExtractResult {
|
|
url: string;
|
|
success: boolean;
|
|
content?: ExtractedContent;
|
|
error?: string;
|
|
}
|
|
|
|
export class SearchError extends Error {
|
|
constructor(
|
|
message: string,
|
|
public readonly status?: number
|
|
) {
|
|
super(message);
|
|
this.name = 'SearchError';
|
|
}
|
|
}
|
|
|
|
export interface WebSearchOptions {
|
|
query: string;
|
|
limit?: number;
|
|
categories?: string[]; // 'general' | 'news' | 'science' | 'it'
|
|
language?: string;
|
|
signal?: AbortSignal;
|
|
}
|
|
|
|
/** Run one SearXNG query via mana-search and return normalised hits. */
|
|
export async function webSearch(opts: WebSearchOptions): Promise<SearchHit[]> {
|
|
const res = await fetch(`${SEARCH_URL}/api/v1/search`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
query: opts.query,
|
|
options: {
|
|
limit: opts.limit ?? 10,
|
|
categories: opts.categories,
|
|
language: opts.language ?? 'de-DE',
|
|
},
|
|
}),
|
|
signal: opts.signal,
|
|
});
|
|
|
|
if (!res.ok) {
|
|
throw new SearchError(`mana-search returned ${res.status}`, res.status);
|
|
}
|
|
|
|
const data = (await res.json()) as { results?: SearchHit[] };
|
|
return data.results ?? [];
|
|
}
|
|
|
|
/**
|
|
* Extract Readability content for a batch of URLs in parallel server-side.
|
|
* mana-search caps at 20 URLs per call; we slice if more come in.
|
|
*/
|
|
export async function bulkExtract(
|
|
urls: string[],
|
|
opts: { maxLength?: number; concurrency?: number; signal?: AbortSignal } = {}
|
|
): Promise<BulkExtractResult[]> {
|
|
if (urls.length === 0) return [];
|
|
|
|
const batches: string[][] = [];
|
|
for (let i = 0; i < urls.length; i += 20) batches.push(urls.slice(i, i + 20));
|
|
|
|
const all: BulkExtractResult[] = [];
|
|
for (const batch of batches) {
|
|
const res = await fetch(`${SEARCH_URL}/api/v1/extract/bulk`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
urls: batch,
|
|
concurrency: opts.concurrency ?? 5,
|
|
options: {
|
|
maxLength: opts.maxLength ?? 8000,
|
|
},
|
|
}),
|
|
signal: opts.signal,
|
|
});
|
|
|
|
if (!res.ok) {
|
|
throw new SearchError(`mana-search bulk extract returned ${res.status}`, res.status);
|
|
}
|
|
|
|
const data = (await res.json()) as { results?: BulkExtractResult[] };
|
|
if (data.results) all.push(...data.results);
|
|
}
|
|
|
|
return all;
|
|
}
|