mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-17 00:19:41 +02:00
feat(questions): deep-research module — mana-search + mana-llm pipeline
End-to-end deep-research feature for the questions module: a fire-and-
forget orchestrator in apps/api that plans sub-queries with mana-llm,
retrieves sources via mana-search (with optional Readability extraction),
and streams a structured synthesis back to the web app over SSE.
Backend (apps/api/src/modules/research):
- schema.ts: pgSchema('research') with research_results + sources
- orchestrator.ts: three-phase pipeline (plan / retrieve / synthesise)
with depth-aware config (quick=1×, standard=3×, deep=6× sub-queries)
- pubsub.ts: in-process event bus, single-node, swappable for Redis
- routes.ts: POST /start (202, fire-and-forget), GET /:id/stream (SSE),
POST /start-sync (test only), GET /:id, GET /:id/sources
- Credit gating via @mana/shared-hono/credits — validate up-front,
consume best-effort on `done`. Failed runs cost nothing.
Helpers (apps/api/src/lib):
- llm.ts: llmJson() + llmStream() over mana-llm OpenAI-compat API
- search.ts: webSearch() + bulkExtract() over mana-search Go service
- responses.ts: shared errorResponse / listResponse / validationError
Schema deployment:
- drizzle.config.ts (research-scoped) + drizzle/research/0000_init.sql
hand-authored migration, deployable via psql -f or drizzle-kit push.
- drizzle-kit added as devDep with db:generate / db:push scripts.
Web client (apps/mana/apps/web/src/lib/api/research.ts):
- Typed start() / get() / listSources() / streamProgress(). The stream
uses fetch + ReadableStream (not EventSource) so we can attach the
JWT via Authorization header. Special-cases 402 for friendly toast.
- New PUBLIC_MANA_API_URL plumbing in hooks.server.ts + config.ts.
Module store (modules/questions/stores/answers.svelte.ts):
- New write-side store with createManual / startResearch / accept /
softDelete. startResearch creates an optimistic empty answer, opens
the SSE stream, debounces token deltas in 100ms batches into the
encrypted local row, and on `done` replaces the streamed text with
the parsed { summary, keyPoints, followUps } payload + citations
resolved against research.sources.id.
Citation rendering (modules/questions/components/AnswerCitations.svelte):
- Tokenises [n] markers in the answer body into clickable pills with
hover popovers showing title / host / snippet / external link.
- Lazy-loaded via a session-scoped source cache (stores/sources.svelte.ts)
that deduplicates concurrent fetches.
UI (routes/(app)/questions/[id]/+page.svelte):
- Recherche card with three-state button (start / cancel / re-run),
animated phase indicator, source counter.
- Confirmation dialog warning about web/LLM transmission since the
question itself is locally encrypted.
- Toasts for success / error / cancel via @mana/shared-ui/toast.
- Re-run flow soft-deletes prior research-driven answers but keeps
manual ones intact.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
30787e36d2
commit
e82851985b
18 changed files with 2221 additions and 4 deletions
175
apps/api/src/lib/llm.ts
Normal file
175
apps/api/src/lib/llm.ts
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
/**
|
||||
* Thin client for the mana-llm gateway.
|
||||
*
|
||||
* Two helpers, deliberately small:
|
||||
*
|
||||
* llmJson() — non-streaming, parses the model response as JSON.
|
||||
* Used for plan/structuring steps where we need a typed object.
|
||||
*
|
||||
* llmStream() — streaming, calls onToken() for each delta and returns
|
||||
* the full concatenated text at the end. Used for synthesis.
|
||||
*
|
||||
* mana-llm exposes an OpenAI-compatible /api/v1/chat/completions endpoint
|
||||
* (see services/mana-llm). Models are namespaced as `provider/model`, e.g.
|
||||
* `ollama/gemma3:4b`, `openrouter/meta-llama/llama-3.1-70b-instruct`.
|
||||
*
|
||||
* Internal service-to-service calls — no auth on the wire (private network).
|
||||
*/
|
||||
|
||||
const LLM_URL = process.env.MANA_LLM_URL || 'http://localhost:3025';
|
||||
|
||||
export interface LlmMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
export interface LlmJsonOptions {
|
||||
model: string;
|
||||
system?: string;
|
||||
user: string;
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
}
|
||||
|
||||
export interface LlmStreamOptions {
|
||||
model: string;
|
||||
system?: string;
|
||||
user: string;
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
onToken: (delta: string) => void | Promise<void>;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
export class LlmError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status?: number,
|
||||
public readonly body?: string
|
||||
) {
|
||||
super(message);
|
||||
this.name = 'LlmError';
|
||||
}
|
||||
}
|
||||
|
||||
function buildMessages(system: string | undefined, user: string): LlmMessage[] {
|
||||
const msgs: LlmMessage[] = [];
|
||||
if (system) msgs.push({ role: 'system', content: system });
|
||||
msgs.push({ role: 'user', content: user });
|
||||
return msgs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call the LLM and parse the response as JSON.
|
||||
*
|
||||
* Strips markdown code fences if the model wraps its output in ```json ... ```.
|
||||
* Throws LlmError on transport/HTTP failure or if the body isn't valid JSON.
|
||||
*/
|
||||
export async function llmJson<T = unknown>(opts: LlmJsonOptions): Promise<T> {
|
||||
const res = await fetch(`${LLM_URL}/api/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: opts.model,
|
||||
messages: buildMessages(opts.system, opts.user),
|
||||
temperature: opts.temperature ?? 0.2,
|
||||
max_tokens: opts.maxTokens ?? 1000,
|
||||
response_format: { type: 'json_object' },
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
throw new LlmError(`mana-llm returned ${res.status}`, res.status, body);
|
||||
}
|
||||
|
||||
const data = (await res.json()) as {
|
||||
choices?: Array<{ message?: { content?: string } }>;
|
||||
};
|
||||
const raw = data.choices?.[0]?.message?.content;
|
||||
if (!raw) throw new LlmError('mana-llm response missing content');
|
||||
|
||||
const cleaned = stripCodeFence(raw);
|
||||
try {
|
||||
return JSON.parse(cleaned) as T;
|
||||
} catch (err) {
|
||||
throw new LlmError(
|
||||
`mana-llm returned non-JSON content: ${(err as Error).message}`,
|
||||
undefined,
|
||||
raw
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Call the LLM in streaming mode. Invokes onToken() for each delta and
|
||||
* returns the full concatenated text once the stream completes.
|
||||
*
|
||||
* Parses OpenAI-style SSE: lines beginning with `data: ` and the
|
||||
* sentinel `data: [DONE]`.
|
||||
*/
|
||||
export async function llmStream(opts: LlmStreamOptions): Promise<string> {
|
||||
const res = await fetch(`${LLM_URL}/api/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: opts.model,
|
||||
messages: buildMessages(opts.system, opts.user),
|
||||
temperature: opts.temperature ?? 0.5,
|
||||
max_tokens: opts.maxTokens ?? 2000,
|
||||
stream: true,
|
||||
}),
|
||||
signal: opts.signal,
|
||||
});
|
||||
|
||||
if (!res.ok || !res.body) {
|
||||
const body = await res.text().catch(() => '');
|
||||
throw new LlmError(`mana-llm stream returned ${res.status}`, res.status, body);
|
||||
}
|
||||
|
||||
const reader = res.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
let full = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
// SSE frames are separated by blank lines, but mana-llm forwards
|
||||
// line-by-line — split on \n and keep the last (possibly partial) line.
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data: ')) continue;
|
||||
const payload = line.slice(6).trim();
|
||||
if (!payload || payload === '[DONE]') continue;
|
||||
|
||||
try {
|
||||
const chunk = JSON.parse(payload) as {
|
||||
choices?: Array<{ delta?: { content?: string } }>;
|
||||
};
|
||||
const delta = chunk.choices?.[0]?.delta?.content;
|
||||
if (delta) {
|
||||
full += delta;
|
||||
await opts.onToken(delta);
|
||||
}
|
||||
} catch {
|
||||
// ignore malformed frames — keepalives, comments, etc.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return full;
|
||||
}
|
||||
|
||||
function stripCodeFence(text: string): string {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed.startsWith('```')) return trimmed;
|
||||
// ```json\n...\n``` or ```\n...\n```
|
||||
const withoutOpen = trimmed.replace(/^```(?:json)?\s*\n?/, '');
|
||||
return withoutOpen.replace(/\n?```\s*$/, '');
|
||||
}
|
||||
118
apps/api/src/lib/responses.ts
Normal file
118
apps/api/src/lib/responses.ts
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
/**
|
||||
* Standard response helpers for mana-api modules.
|
||||
*
|
||||
* Background: A pre-launch audit (April 2026, see
|
||||
* `docs/REFACTORING_AUDIT_2026_04.md` item #5) flagged that error and
|
||||
* list responses were inconsistent across the 15+ modules. The actual
|
||||
* inconsistency turned out to be smaller than reported — every module
|
||||
* already returns errors as `{ error: 'message' }` — but using these
|
||||
* helpers gives us:
|
||||
*
|
||||
* 1. **Type-safe status codes** — TS catches stray `c.json(..., 999)`
|
||||
* 2. **One place to enrich the envelope** — when we add `code`,
|
||||
* `requestId`, or `details` later, we change one file instead of
|
||||
* grepping 79 callsites.
|
||||
* 3. **Consistent list shape** — `{ items, count }` regardless of
|
||||
* what the items are. Frontend `apps/mana/apps/web` doesn't have
|
||||
* to special-case `events` vs `contacts` vs `occurrences`.
|
||||
*
|
||||
* The shape is wire-compatible with the existing inline `c.json(...)`
|
||||
* calls, so adoption can be incremental: new code uses these helpers,
|
||||
* old code keeps working until someone touches the file.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* import { errorResponse, listResponse, validationError } from '../../lib/responses';
|
||||
*
|
||||
* routes.get('/things', async (c) => {
|
||||
* const things = await db.select().from(thingsTable);
|
||||
* return listResponse(c, things);
|
||||
* });
|
||||
*
|
||||
* routes.post('/things', async (c) => {
|
||||
* const parsed = thingSchema.safeParse(await c.req.json());
|
||||
* if (!parsed.success) return validationError(c, parsed.error.issues);
|
||||
* // ...
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
|
||||
import type { Context } from 'hono';
|
||||
import type { ContentfulStatusCode } from 'hono/utils/http-status';
|
||||
|
||||
/**
|
||||
* Standard error response envelope.
|
||||
*
|
||||
* Wire-compatible with the inline `c.json({ error: '...' }, status)`
|
||||
* pattern that already dominates the codebase. Future fields like
|
||||
* `code` (machine-readable error code) and `details` (validation issues,
|
||||
* etc.) can be added without touching callsites.
|
||||
*/
|
||||
export type ErrorBody = {
|
||||
error: string;
|
||||
code?: string;
|
||||
details?: unknown;
|
||||
};
|
||||
|
||||
/**
|
||||
* Standard list response envelope.
|
||||
*
|
||||
* Always uses `items` as the field name, regardless of what's inside.
|
||||
* The frontend hits a stable shape: `{ items: T[], count: number }`.
|
||||
*/
|
||||
export type ListBody<T> = {
|
||||
items: T[];
|
||||
count: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a structured error response.
|
||||
*
|
||||
* @param c Hono context
|
||||
* @param error Human-readable message (also used as fallback for code)
|
||||
* @param status HTTP status (default 500)
|
||||
* @param extra Optional extra fields — `code` for machine-readable
|
||||
* identification, `details` for validation issues, etc.
|
||||
*/
|
||||
export function errorResponse(
|
||||
c: Context,
|
||||
error: string,
|
||||
status: ContentfulStatusCode = 500,
|
||||
extra?: { code?: string; details?: unknown }
|
||||
) {
|
||||
const body: ErrorBody = { error };
|
||||
if (extra?.code) body.code = extra.code;
|
||||
if (extra?.details !== undefined) body.details = extra.details;
|
||||
return c.json(body, status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a validation error response (400) with structured issues.
|
||||
*
|
||||
* Convenience over `errorResponse` for the common Zod case — extracts
|
||||
* the first error message as the human string and attaches the full
|
||||
* issue list under `details`.
|
||||
*/
|
||||
export function validationError(c: Context, issues: unknown[], status: ContentfulStatusCode = 400) {
|
||||
const firstMessage =
|
||||
Array.isArray(issues) &&
|
||||
issues.length > 0 &&
|
||||
typeof issues[0] === 'object' &&
|
||||
issues[0] !== null &&
|
||||
'message' in issues[0]
|
||||
? String((issues[0] as { message: unknown }).message)
|
||||
: 'Invalid input';
|
||||
return errorResponse(c, firstMessage, status, { code: 'VALIDATION', details: issues });
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a standard list response. Always wraps in `{ items, count }`,
|
||||
* regardless of what `items` are. This is the *opposite* of the current
|
||||
* convention where each module names its own field
|
||||
* (`{ events, count }`, `{ contacts, count }`) — frontends benefit
|
||||
* from a single uniform unwrap step.
|
||||
*/
|
||||
export function listResponse<T>(c: Context, items: T[], status: ContentfulStatusCode = 200) {
|
||||
const body: ListBody<T> = { items, count: items.length };
|
||||
return c.json(body, status);
|
||||
}
|
||||
120
apps/api/src/lib/search.ts
Normal file
120
apps/api/src/lib/search.ts
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
/**
|
||||
* Thin client for the mana-search Go service.
|
||||
*
|
||||
* Two helpers, scoped to what the research orchestrator needs:
|
||||
*
|
||||
* webSearch() — POST /api/v1/search, returns ranked SearXNG results.
|
||||
* bulkExtract() — POST /api/v1/extract/bulk, returns Readability text per URL.
|
||||
*
|
||||
* Internal service-to-service calls — no auth on the wire (private network).
|
||||
*/
|
||||
|
||||
const SEARCH_URL = process.env.MANA_SEARCH_URL || 'http://localhost:3021';
|
||||
|
||||
export interface SearchHit {
|
||||
url: string;
|
||||
title: string;
|
||||
snippet: string;
|
||||
engine: string;
|
||||
score: number;
|
||||
publishedDate?: string;
|
||||
category: string;
|
||||
}
|
||||
|
||||
export interface ExtractedContent {
|
||||
title: string;
|
||||
description?: string;
|
||||
author?: string;
|
||||
publishedDate?: string;
|
||||
siteName?: string;
|
||||
text: string;
|
||||
wordCount: number;
|
||||
}
|
||||
|
||||
export interface BulkExtractResult {
|
||||
url: string;
|
||||
success: boolean;
|
||||
content?: ExtractedContent;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export class SearchError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status?: number
|
||||
) {
|
||||
super(message);
|
||||
this.name = 'SearchError';
|
||||
}
|
||||
}
|
||||
|
||||
export interface WebSearchOptions {
|
||||
query: string;
|
||||
limit?: number;
|
||||
categories?: string[]; // 'general' | 'news' | 'science' | 'it'
|
||||
language?: string;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
/** Run one SearXNG query via mana-search and return normalised hits. */
|
||||
export async function webSearch(opts: WebSearchOptions): Promise<SearchHit[]> {
|
||||
const res = await fetch(`${SEARCH_URL}/api/v1/search`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
query: opts.query,
|
||||
options: {
|
||||
limit: opts.limit ?? 10,
|
||||
categories: opts.categories,
|
||||
language: opts.language ?? 'de-DE',
|
||||
},
|
||||
}),
|
||||
signal: opts.signal,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new SearchError(`mana-search returned ${res.status}`, res.status);
|
||||
}
|
||||
|
||||
const data = (await res.json()) as { results?: SearchHit[] };
|
||||
return data.results ?? [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract Readability content for a batch of URLs in parallel server-side.
|
||||
* mana-search caps at 20 URLs per call; we slice if more come in.
|
||||
*/
|
||||
export async function bulkExtract(
|
||||
urls: string[],
|
||||
opts: { maxLength?: number; concurrency?: number; signal?: AbortSignal } = {}
|
||||
): Promise<BulkExtractResult[]> {
|
||||
if (urls.length === 0) return [];
|
||||
|
||||
const batches: string[][] = [];
|
||||
for (let i = 0; i < urls.length; i += 20) batches.push(urls.slice(i, i + 20));
|
||||
|
||||
const all: BulkExtractResult[] = [];
|
||||
for (const batch of batches) {
|
||||
const res = await fetch(`${SEARCH_URL}/api/v1/extract/bulk`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
urls: batch,
|
||||
concurrency: opts.concurrency ?? 5,
|
||||
options: {
|
||||
maxLength: opts.maxLength ?? 8000,
|
||||
},
|
||||
}),
|
||||
signal: opts.signal,
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
throw new SearchError(`mana-search bulk extract returned ${res.status}`, res.status);
|
||||
}
|
||||
|
||||
const data = (await res.json()) as { results?: BulkExtractResult[] };
|
||||
if (data.results) all.push(...data.results);
|
||||
}
|
||||
|
||||
return all;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue