feat(research): add mana-research service — Phase 1 + 2

New Bun/Hono service on port 3068 that bundles many web-research providers
behind a unified interface for side-by-side comparison. All eval runs
persist in research.* (mana_platform) so quality can be reviewed later.

Providers (Phase 1+2):
  search:  searxng, duckduckgo, brave, tavily, exa, serper
  extract: readability (via mana-search), jina-reader, firecrawl

Endpoints:
  POST /v1/search, /v1/search/compare       — single + fan-out
  POST /v1/extract, /v1/extract/compare     — single + fan-out
  GET  /v1/runs, /v1/runs/:id               — history
  POST /v1/runs/:run/results/:id/rate       — manual eval
  GET  /v1/providers, /v1/providers/health  — catalog + readiness

Auto-routing: when `provider` is omitted, queries are classified via regex
(fast path, 0ms) with optional mana-llm fallback, then routed to the first
available provider for that query type (news → tavily, academic → exa,
semantic → exa, etc.).

Credits: server-key calls go through mana-credits reserve → commit/refund
so failed provider calls don't charge the user. BYO-keys supported via
research.provider_configs (UI arrives in Phase 4).

Cache: Redis with graceful degradation (1h TTL for search, 24h for
extract). Pay-per-use APIs only — no subscription-gated providers.

Docs: docs/plans/mana-research-service.md + docs/reports/web-research-capabilities.md

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-04-17 14:42:25 +02:00
parent 004fc0b2fd
commit 2bdb48bdd1
56 changed files with 4431 additions and 298 deletions

View file

@ -0,0 +1,27 @@
{
"name": "@mana/shared-research",
"version": "0.1.0",
"private": true,
"sideEffects": false,
"description": "Shared types and provider interfaces for the Mana research service.",
"main": "./src/index.ts",
"types": "./src/index.ts",
"exports": {
".": "./src/index.ts",
"./types": "./src/types.ts",
"./providers": "./src/providers.ts",
"./options": "./src/options.ts",
"./ids": "./src/ids.ts"
},
"scripts": {
"type-check": "tsc --noEmit",
"clean": "rm -rf dist"
},
"dependencies": {
"zod": "^3.24.0"
},
"devDependencies": {
"@types/node": "^24.10.1",
"typescript": "^5.9.3"
}
}

View file

@ -0,0 +1,38 @@
export const SEARCH_PROVIDER_IDS = [
'searxng',
'brave',
'tavily',
'exa',
'serper',
'duckduckgo',
] as const;
export const EXTRACT_PROVIDER_IDS = [
'readability',
'jina-reader',
'firecrawl',
'scrapingbee',
] as const;
export const AGENT_PROVIDER_IDS = [
'perplexity-sonar',
'claude-web-search',
'openai-responses',
'gemini-grounding',
'openai-deep-research',
] as const;
export type SearchProviderId = (typeof SEARCH_PROVIDER_IDS)[number];
export type ExtractProviderId = (typeof EXTRACT_PROVIDER_IDS)[number];
export type AgentProviderId = (typeof AGENT_PROVIDER_IDS)[number];
export type ProviderId = SearchProviderId | ExtractProviderId | AgentProviderId;
export type ProviderCategory = 'search' | 'extract' | 'agent';
export type BillingMode = 'server-key' | 'byo-key' | 'free';
export function providerCategory(id: ProviderId): ProviderCategory {
if ((SEARCH_PROVIDER_IDS as readonly string[]).includes(id)) return 'search';
if ((EXTRACT_PROVIDER_IDS as readonly string[]).includes(id)) return 'extract';
return 'agent';
}

View file

@ -0,0 +1,38 @@
export type {
ProviderId,
SearchProviderId,
ExtractProviderId,
AgentProviderId,
ProviderCategory,
BillingMode,
} from './ids';
export {
SEARCH_PROVIDER_IDS,
EXTRACT_PROVIDER_IDS,
AGENT_PROVIDER_IDS,
providerCategory,
} from './ids';
export type {
ProviderMeta,
SearchHit,
ExtractedContent,
Citation,
AgentAnswer,
SearchResponse,
ExtractResponse,
AgentResponse,
CompareResponse,
} from './types';
export { searchHitSchema, citationSchema } from './types';
export type { SearchOptions, ExtractOptions, AgentOptions } from './options';
export { searchOptionsSchema, extractOptionsSchema, agentOptionsSchema } from './options';
export type {
ProviderCapabilities,
ProviderCallContext,
SearchProvider,
ExtractProvider,
ResearchAgent,
} from './providers';

View file

@ -0,0 +1,27 @@
import { z } from 'zod';
export const searchOptionsSchema = z.object({
limit: z.number().int().min(1).max(50).optional(),
language: z.string().optional(),
categories: z.array(z.enum(['general', 'news', 'science', 'it'])).optional(),
timeRange: z.enum(['day', 'week', 'month', 'year']).optional(),
safeSearch: z.number().int().min(0).max(2).optional(),
});
export const extractOptionsSchema = z.object({
maxLength: z.number().int().positive().optional(),
includeHtml: z.boolean().optional(),
includeMarkdown: z.boolean().optional(),
timeoutMs: z.number().int().positive().optional(),
});
export const agentOptionsSchema = z.object({
model: z.string().optional(),
temperature: z.number().min(0).max(2).optional(),
maxTokens: z.number().int().positive().optional(),
systemPrompt: z.string().optional(),
});
export type SearchOptions = z.infer<typeof searchOptionsSchema>;
export type ExtractOptions = z.infer<typeof extractOptionsSchema>;
export type AgentOptions = z.infer<typeof agentOptionsSchema>;

View file

@ -0,0 +1,61 @@
import type { SearchProviderId, ExtractProviderId, AgentProviderId } from './ids';
import type { AgentOptions, ExtractOptions, SearchOptions } from './options';
import type { AgentResponse, ExtractResponse, SearchResponse } from './types';
export interface ProviderCapabilities {
webSearch?: boolean;
newsSearch?: boolean;
scholarSearch?: boolean;
semanticSearch?: boolean;
contentInResults?: boolean;
jsRendering?: boolean;
pdfSupport?: boolean;
markdownOutput?: boolean;
multiStep?: boolean;
async?: boolean;
withCitations?: boolean;
}
export interface ProviderCallContext {
apiKey: string | null;
userId?: string;
signal?: AbortSignal;
}
export interface SearchProvider {
id: SearchProviderId;
capabilities: ProviderCapabilities;
requiresApiKey: boolean;
search(
query: string,
options: SearchOptions,
ctx: ProviderCallContext
): Promise<Omit<SearchResponse, 'meta'> & { rawLatencyMs: number }>;
}
export interface ExtractProvider {
id: ExtractProviderId;
capabilities: ProviderCapabilities;
requiresApiKey: boolean;
extract(
url: string,
options: ExtractOptions,
ctx: ProviderCallContext
): Promise<Omit<ExtractResponse, 'meta'> & { rawLatencyMs: number }>;
}
export interface ResearchAgent {
id: AgentProviderId;
capabilities: ProviderCapabilities;
requiresApiKey: boolean;
research(
query: string,
options: AgentOptions,
ctx: ProviderCallContext
): Promise<
Omit<AgentResponse, 'meta'> & {
rawLatencyMs: number;
tokenUsage?: { input: number; output: number };
}
>;
}

View file

@ -0,0 +1,93 @@
import { z } from 'zod';
import type { BillingMode, ProviderCategory, ProviderId } from './ids';
export interface ProviderMeta {
provider: ProviderId;
category: ProviderCategory;
latencyMs: number;
costCredits: number;
cacheHit: boolean;
billingMode: BillingMode;
errorCode?: string;
}
export interface SearchHit {
url: string;
title: string;
snippet: string;
publishedAt?: string;
author?: string;
score?: number;
content?: string;
providerRaw?: unknown;
}
export interface ExtractedContent {
url: string;
title: string;
content: string;
excerpt?: string;
author?: string;
siteName?: string;
publishedAt?: string;
wordCount: number;
providerRaw?: unknown;
}
export interface Citation {
url: string;
title: string;
snippet?: string;
}
export interface AgentAnswer {
query: string;
answer: string;
citations: Citation[];
followUpQueries?: string[];
tokenUsage?: { input: number; output: number };
providerRaw?: unknown;
}
export interface SearchResponse {
results: SearchHit[];
meta: ProviderMeta;
}
export interface ExtractResponse {
content: ExtractedContent;
meta: ProviderMeta;
}
export interface AgentResponse {
answer: AgentAnswer;
meta: ProviderMeta;
}
export interface CompareResponse<T> {
runId: string;
query: string;
results: Array<{
provider: ProviderId;
success: boolean;
data?: T;
meta: ProviderMeta;
}>;
}
export const searchHitSchema = z.object({
url: z.string().url(),
title: z.string(),
snippet: z.string(),
publishedAt: z.string().optional(),
author: z.string().optional(),
score: z.number().optional(),
content: z.string().optional(),
providerRaw: z.unknown().optional(),
});
export const citationSchema = z.object({
url: z.string().url(),
title: z.string(),
snippet: z.string().optional(),
});

View file

@ -0,0 +1,14 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"declaration": true,
"noEmit": true,
"types": ["node"]
},
"include": ["src/**/*.ts"]
}