mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 22:01:09 +02:00
feat(research): add mana-research service — Phase 1 + 2
New Bun/Hono service on port 3068 that bundles many web-research providers behind a unified interface for side-by-side comparison. All eval runs persist in research.* (mana_platform) so quality can be reviewed later. Providers (Phase 1+2): search: searxng, duckduckgo, brave, tavily, exa, serper extract: readability (via mana-search), jina-reader, firecrawl Endpoints: POST /v1/search, /v1/search/compare — single + fan-out POST /v1/extract, /v1/extract/compare — single + fan-out GET /v1/runs, /v1/runs/:id — history POST /v1/runs/:run/results/:id/rate — manual eval GET /v1/providers, /v1/providers/health — catalog + readiness Auto-routing: when `provider` is omitted, queries are classified via regex (fast path, 0ms) with optional mana-llm fallback, then routed to the first available provider for that query type (news → tavily, academic → exa, semantic → exa, etc.). Credits: server-key calls go through mana-credits reserve → commit/refund so failed provider calls don't charge the user. BYO-keys supported via research.provider_configs (UI arrives in Phase 4). Cache: Redis with graceful degradation (1h TTL for search, 24h for extract). Pay-per-use APIs only — no subscription-gated providers. Docs: docs/plans/mana-research-service.md + docs/reports/web-research-capabilities.md Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
004fc0b2fd
commit
2bdb48bdd1
56 changed files with 4431 additions and 298 deletions
27
packages/shared-research/package.json
Normal file
27
packages/shared-research/package.json
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
"name": "@mana/shared-research",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"sideEffects": false,
|
||||
"description": "Shared types and provider interfaces for the Mana research service.",
|
||||
"main": "./src/index.ts",
|
||||
"types": "./src/index.ts",
|
||||
"exports": {
|
||||
".": "./src/index.ts",
|
||||
"./types": "./src/types.ts",
|
||||
"./providers": "./src/providers.ts",
|
||||
"./options": "./src/options.ts",
|
||||
"./ids": "./src/ids.ts"
|
||||
},
|
||||
"scripts": {
|
||||
"type-check": "tsc --noEmit",
|
||||
"clean": "rm -rf dist"
|
||||
},
|
||||
"dependencies": {
|
||||
"zod": "^3.24.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^24.10.1",
|
||||
"typescript": "^5.9.3"
|
||||
}
|
||||
}
|
||||
38
packages/shared-research/src/ids.ts
Normal file
38
packages/shared-research/src/ids.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
export const SEARCH_PROVIDER_IDS = [
|
||||
'searxng',
|
||||
'brave',
|
||||
'tavily',
|
||||
'exa',
|
||||
'serper',
|
||||
'duckduckgo',
|
||||
] as const;
|
||||
|
||||
export const EXTRACT_PROVIDER_IDS = [
|
||||
'readability',
|
||||
'jina-reader',
|
||||
'firecrawl',
|
||||
'scrapingbee',
|
||||
] as const;
|
||||
|
||||
export const AGENT_PROVIDER_IDS = [
|
||||
'perplexity-sonar',
|
||||
'claude-web-search',
|
||||
'openai-responses',
|
||||
'gemini-grounding',
|
||||
'openai-deep-research',
|
||||
] as const;
|
||||
|
||||
export type SearchProviderId = (typeof SEARCH_PROVIDER_IDS)[number];
|
||||
export type ExtractProviderId = (typeof EXTRACT_PROVIDER_IDS)[number];
|
||||
export type AgentProviderId = (typeof AGENT_PROVIDER_IDS)[number];
|
||||
export type ProviderId = SearchProviderId | ExtractProviderId | AgentProviderId;
|
||||
|
||||
export type ProviderCategory = 'search' | 'extract' | 'agent';
|
||||
|
||||
export type BillingMode = 'server-key' | 'byo-key' | 'free';
|
||||
|
||||
export function providerCategory(id: ProviderId): ProviderCategory {
|
||||
if ((SEARCH_PROVIDER_IDS as readonly string[]).includes(id)) return 'search';
|
||||
if ((EXTRACT_PROVIDER_IDS as readonly string[]).includes(id)) return 'extract';
|
||||
return 'agent';
|
||||
}
|
||||
38
packages/shared-research/src/index.ts
Normal file
38
packages/shared-research/src/index.ts
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
export type {
|
||||
ProviderId,
|
||||
SearchProviderId,
|
||||
ExtractProviderId,
|
||||
AgentProviderId,
|
||||
ProviderCategory,
|
||||
BillingMode,
|
||||
} from './ids';
|
||||
export {
|
||||
SEARCH_PROVIDER_IDS,
|
||||
EXTRACT_PROVIDER_IDS,
|
||||
AGENT_PROVIDER_IDS,
|
||||
providerCategory,
|
||||
} from './ids';
|
||||
|
||||
export type {
|
||||
ProviderMeta,
|
||||
SearchHit,
|
||||
ExtractedContent,
|
||||
Citation,
|
||||
AgentAnswer,
|
||||
SearchResponse,
|
||||
ExtractResponse,
|
||||
AgentResponse,
|
||||
CompareResponse,
|
||||
} from './types';
|
||||
export { searchHitSchema, citationSchema } from './types';
|
||||
|
||||
export type { SearchOptions, ExtractOptions, AgentOptions } from './options';
|
||||
export { searchOptionsSchema, extractOptionsSchema, agentOptionsSchema } from './options';
|
||||
|
||||
export type {
|
||||
ProviderCapabilities,
|
||||
ProviderCallContext,
|
||||
SearchProvider,
|
||||
ExtractProvider,
|
||||
ResearchAgent,
|
||||
} from './providers';
|
||||
27
packages/shared-research/src/options.ts
Normal file
27
packages/shared-research/src/options.ts
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import { z } from 'zod';
|
||||
|
||||
export const searchOptionsSchema = z.object({
|
||||
limit: z.number().int().min(1).max(50).optional(),
|
||||
language: z.string().optional(),
|
||||
categories: z.array(z.enum(['general', 'news', 'science', 'it'])).optional(),
|
||||
timeRange: z.enum(['day', 'week', 'month', 'year']).optional(),
|
||||
safeSearch: z.number().int().min(0).max(2).optional(),
|
||||
});
|
||||
|
||||
export const extractOptionsSchema = z.object({
|
||||
maxLength: z.number().int().positive().optional(),
|
||||
includeHtml: z.boolean().optional(),
|
||||
includeMarkdown: z.boolean().optional(),
|
||||
timeoutMs: z.number().int().positive().optional(),
|
||||
});
|
||||
|
||||
export const agentOptionsSchema = z.object({
|
||||
model: z.string().optional(),
|
||||
temperature: z.number().min(0).max(2).optional(),
|
||||
maxTokens: z.number().int().positive().optional(),
|
||||
systemPrompt: z.string().optional(),
|
||||
});
|
||||
|
||||
export type SearchOptions = z.infer<typeof searchOptionsSchema>;
|
||||
export type ExtractOptions = z.infer<typeof extractOptionsSchema>;
|
||||
export type AgentOptions = z.infer<typeof agentOptionsSchema>;
|
||||
61
packages/shared-research/src/providers.ts
Normal file
61
packages/shared-research/src/providers.ts
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
import type { SearchProviderId, ExtractProviderId, AgentProviderId } from './ids';
|
||||
import type { AgentOptions, ExtractOptions, SearchOptions } from './options';
|
||||
import type { AgentResponse, ExtractResponse, SearchResponse } from './types';
|
||||
|
||||
export interface ProviderCapabilities {
|
||||
webSearch?: boolean;
|
||||
newsSearch?: boolean;
|
||||
scholarSearch?: boolean;
|
||||
semanticSearch?: boolean;
|
||||
contentInResults?: boolean;
|
||||
jsRendering?: boolean;
|
||||
pdfSupport?: boolean;
|
||||
markdownOutput?: boolean;
|
||||
multiStep?: boolean;
|
||||
async?: boolean;
|
||||
withCitations?: boolean;
|
||||
}
|
||||
|
||||
export interface ProviderCallContext {
|
||||
apiKey: string | null;
|
||||
userId?: string;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
export interface SearchProvider {
|
||||
id: SearchProviderId;
|
||||
capabilities: ProviderCapabilities;
|
||||
requiresApiKey: boolean;
|
||||
search(
|
||||
query: string,
|
||||
options: SearchOptions,
|
||||
ctx: ProviderCallContext
|
||||
): Promise<Omit<SearchResponse, 'meta'> & { rawLatencyMs: number }>;
|
||||
}
|
||||
|
||||
export interface ExtractProvider {
|
||||
id: ExtractProviderId;
|
||||
capabilities: ProviderCapabilities;
|
||||
requiresApiKey: boolean;
|
||||
extract(
|
||||
url: string,
|
||||
options: ExtractOptions,
|
||||
ctx: ProviderCallContext
|
||||
): Promise<Omit<ExtractResponse, 'meta'> & { rawLatencyMs: number }>;
|
||||
}
|
||||
|
||||
export interface ResearchAgent {
|
||||
id: AgentProviderId;
|
||||
capabilities: ProviderCapabilities;
|
||||
requiresApiKey: boolean;
|
||||
research(
|
||||
query: string,
|
||||
options: AgentOptions,
|
||||
ctx: ProviderCallContext
|
||||
): Promise<
|
||||
Omit<AgentResponse, 'meta'> & {
|
||||
rawLatencyMs: number;
|
||||
tokenUsage?: { input: number; output: number };
|
||||
}
|
||||
>;
|
||||
}
|
||||
93
packages/shared-research/src/types.ts
Normal file
93
packages/shared-research/src/types.ts
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
import { z } from 'zod';
|
||||
import type { BillingMode, ProviderCategory, ProviderId } from './ids';
|
||||
|
||||
export interface ProviderMeta {
|
||||
provider: ProviderId;
|
||||
category: ProviderCategory;
|
||||
latencyMs: number;
|
||||
costCredits: number;
|
||||
cacheHit: boolean;
|
||||
billingMode: BillingMode;
|
||||
errorCode?: string;
|
||||
}
|
||||
|
||||
export interface SearchHit {
|
||||
url: string;
|
||||
title: string;
|
||||
snippet: string;
|
||||
publishedAt?: string;
|
||||
author?: string;
|
||||
score?: number;
|
||||
content?: string;
|
||||
providerRaw?: unknown;
|
||||
}
|
||||
|
||||
export interface ExtractedContent {
|
||||
url: string;
|
||||
title: string;
|
||||
content: string;
|
||||
excerpt?: string;
|
||||
author?: string;
|
||||
siteName?: string;
|
||||
publishedAt?: string;
|
||||
wordCount: number;
|
||||
providerRaw?: unknown;
|
||||
}
|
||||
|
||||
export interface Citation {
|
||||
url: string;
|
||||
title: string;
|
||||
snippet?: string;
|
||||
}
|
||||
|
||||
export interface AgentAnswer {
|
||||
query: string;
|
||||
answer: string;
|
||||
citations: Citation[];
|
||||
followUpQueries?: string[];
|
||||
tokenUsage?: { input: number; output: number };
|
||||
providerRaw?: unknown;
|
||||
}
|
||||
|
||||
export interface SearchResponse {
|
||||
results: SearchHit[];
|
||||
meta: ProviderMeta;
|
||||
}
|
||||
|
||||
export interface ExtractResponse {
|
||||
content: ExtractedContent;
|
||||
meta: ProviderMeta;
|
||||
}
|
||||
|
||||
export interface AgentResponse {
|
||||
answer: AgentAnswer;
|
||||
meta: ProviderMeta;
|
||||
}
|
||||
|
||||
export interface CompareResponse<T> {
|
||||
runId: string;
|
||||
query: string;
|
||||
results: Array<{
|
||||
provider: ProviderId;
|
||||
success: boolean;
|
||||
data?: T;
|
||||
meta: ProviderMeta;
|
||||
}>;
|
||||
}
|
||||
|
||||
export const searchHitSchema = z.object({
|
||||
url: z.string().url(),
|
||||
title: z.string(),
|
||||
snippet: z.string(),
|
||||
publishedAt: z.string().optional(),
|
||||
author: z.string().optional(),
|
||||
score: z.number().optional(),
|
||||
content: z.string().optional(),
|
||||
providerRaw: z.unknown().optional(),
|
||||
});
|
||||
|
||||
export const citationSchema = z.object({
|
||||
url: z.string().url(),
|
||||
title: z.string(),
|
||||
snippet: z.string().optional(),
|
||||
});
|
||||
14
packages/shared-research/tsconfig.json
Normal file
14
packages/shared-research/tsconfig.json
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"declaration": true,
|
||||
"noEmit": true,
|
||||
"types": ["node"]
|
||||
},
|
||||
"include": ["src/**/*.ts"]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue