mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-23 17:46:42 +02:00
feat(research): add mana-research service — Phase 1 + 2
New Bun/Hono service on port 3068 that bundles many web-research providers behind a unified interface for side-by-side comparison. All eval runs persist in research.* (mana_platform) so quality can be reviewed later. Providers (Phase 1+2): search: searxng, duckduckgo, brave, tavily, exa, serper extract: readability (via mana-search), jina-reader, firecrawl Endpoints: POST /v1/search, /v1/search/compare — single + fan-out POST /v1/extract, /v1/extract/compare — single + fan-out GET /v1/runs, /v1/runs/:id — history POST /v1/runs/:run/results/:id/rate — manual eval GET /v1/providers, /v1/providers/health — catalog + readiness Auto-routing: when `provider` is omitted, queries are classified via regex (fast path, 0ms) with optional mana-llm fallback, then routed to the first available provider for that query type (news → tavily, academic → exa, semantic → exa, etc.). Credits: server-key calls go through mana-credits reserve → commit/refund so failed provider calls don't charge the user. BYO-keys supported via research.provider_configs (UI arrives in Phase 4). Cache: Redis with graceful degradation (1h TTL for search, 24h for extract). Pay-per-use APIs only — no subscription-gated providers. Docs: docs/plans/mana-research-service.md + docs/reports/web-research-capabilities.md Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
004fc0b2fd
commit
2bdb48bdd1
56 changed files with 4431 additions and 298 deletions
25
services/mana-research/src/executor/env-map.ts
Normal file
25
services/mana-research/src/executor/env-map.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import type { ProviderId } from '@mana/shared-research';
|
||||
import type { Config } from '../config';
|
||||
|
||||
/**
|
||||
* Maps a ProviderId to the corresponding env-key slot on Config.providerKeys.
|
||||
* Extract/agent providers that share a key with search (e.g. openai agents)
|
||||
* reuse the same slot.
|
||||
*/
|
||||
export function mapEnvKey(providerId: ProviderId): keyof Config['providerKeys'] {
|
||||
const map: Partial<Record<ProviderId, keyof Config['providerKeys']>> = {
|
||||
brave: 'brave',
|
||||
tavily: 'tavily',
|
||||
exa: 'exa',
|
||||
serper: 'serper',
|
||||
'perplexity-sonar': 'perplexity',
|
||||
'claude-web-search': 'anthropic',
|
||||
'openai-responses': 'openai',
|
||||
'openai-deep-research': 'openai',
|
||||
'gemini-grounding': 'googleGenai',
|
||||
'jina-reader': 'jina',
|
||||
firecrawl: 'firecrawl',
|
||||
scrapingbee: 'scrapingbee',
|
||||
};
|
||||
return map[providerId] ?? 'brave';
|
||||
}
|
||||
150
services/mana-research/src/executor/execute-extract.ts
Normal file
150
services/mana-research/src/executor/execute-extract.ts
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
/**
|
||||
* Extract-side executor. Same shape as executeSearch but for URL extraction.
|
||||
*/
|
||||
|
||||
import type {
|
||||
BillingMode,
|
||||
ExtractedContent,
|
||||
ExtractOptions,
|
||||
ExtractProvider,
|
||||
ProviderId,
|
||||
ProviderMeta,
|
||||
} from '@mana/shared-research';
|
||||
import type { CreditsClient } from '../clients/mana-credits';
|
||||
import type { Config } from '../config';
|
||||
import { ProviderNotConfiguredError } from '../lib/errors';
|
||||
import { priceFor } from '../lib/pricing';
|
||||
import type { ConfigStorage } from '../storage/configs';
|
||||
import { cacheGet, cacheKey, cacheSet } from '../lib/cache';
|
||||
import { mapEnvKey } from './env-map';
|
||||
|
||||
export interface ExecuteExtractInput {
|
||||
provider: ExtractProvider;
|
||||
url: string;
|
||||
options: ExtractOptions;
|
||||
userId: string;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
export interface ExecuteExtractOutput {
|
||||
success: boolean;
|
||||
data?: { content: ExtractedContent };
|
||||
meta: ProviderMeta;
|
||||
}
|
||||
|
||||
export interface ExecutorDeps {
|
||||
credits: CreditsClient;
|
||||
configs: ConfigStorage;
|
||||
config: Config;
|
||||
}
|
||||
|
||||
export async function executeExtract(
|
||||
input: ExecuteExtractInput,
|
||||
deps: ExecutorDeps
|
||||
): Promise<ExecuteExtractOutput> {
|
||||
const { provider, url, options, userId, signal } = input;
|
||||
const providerId = provider.id;
|
||||
const t0 = performance.now();
|
||||
|
||||
// Resolve API key (BYO → server → none)
|
||||
let apiKey: string | null = null;
|
||||
let billingMode: BillingMode = 'free';
|
||||
|
||||
if (provider.requiresApiKey) {
|
||||
const userConfig = await deps.configs.getForUser(userId, providerId);
|
||||
if (userConfig?.enabled && userConfig.apiKeyEncrypted) {
|
||||
apiKey = await deps.configs.decryptKey(userConfig);
|
||||
if (apiKey) billingMode = 'byo-key';
|
||||
}
|
||||
if (!apiKey) {
|
||||
apiKey = deps.config.providerKeys[mapEnvKey(providerId)] ?? null;
|
||||
if (apiKey) billingMode = 'server-key';
|
||||
}
|
||||
if (!apiKey) {
|
||||
return makeError(providerId, t0, new ProviderNotConfiguredError(providerId));
|
||||
}
|
||||
} else if (providerId === 'jina-reader' && deps.config.providerKeys.jina) {
|
||||
// jina-reader is zero-auth but a key lifts the rate limit
|
||||
apiKey = deps.config.providerKeys.jina;
|
||||
}
|
||||
|
||||
const price = billingMode === 'server-key' ? priceFor(providerId, 'extract') : 0;
|
||||
|
||||
const ckey = cacheKey('extract', providerId, url, options);
|
||||
const cached = await cacheGet<{ content: ExtractedContent }>(ckey);
|
||||
if (cached) {
|
||||
return {
|
||||
success: true,
|
||||
data: cached,
|
||||
meta: {
|
||||
provider: providerId,
|
||||
category: 'extract',
|
||||
latencyMs: Math.round(performance.now() - t0),
|
||||
costCredits: 0,
|
||||
cacheHit: true,
|
||||
billingMode,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
let reservationId: string | null = null;
|
||||
if (price > 0 && billingMode === 'server-key') {
|
||||
try {
|
||||
const reservation = await deps.credits.reserve(
|
||||
userId,
|
||||
price,
|
||||
`research:${providerId}:extract`
|
||||
);
|
||||
reservationId = reservation.reservationId;
|
||||
} catch (err) {
|
||||
return makeError(providerId, t0, err as Error);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await provider.extract(url, options, { apiKey, userId, signal });
|
||||
await cacheSet(ckey, { content: res.content }, deps.config.cacheTtlSeconds * 24);
|
||||
|
||||
if (reservationId) {
|
||||
await deps.credits
|
||||
.commit(reservationId, `extract ${providerId}`)
|
||||
.catch((err) => console.warn('[executor] commit failed:', err));
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: { content: res.content },
|
||||
meta: {
|
||||
provider: providerId,
|
||||
category: 'extract',
|
||||
latencyMs: Math.round(performance.now() - t0),
|
||||
costCredits: price,
|
||||
cacheHit: false,
|
||||
billingMode,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
if (reservationId) {
|
||||
await deps.credits
|
||||
.refund(reservationId)
|
||||
.catch((refundErr) => console.warn('[executor] refund failed:', refundErr));
|
||||
}
|
||||
return makeError(providerId, t0, err as Error);
|
||||
}
|
||||
}
|
||||
|
||||
function makeError(providerId: ProviderId, t0: number, err: Error): ExecuteExtractOutput {
|
||||
const code = (err as { code?: string }).code ?? err.name ?? 'ERROR';
|
||||
return {
|
||||
success: false,
|
||||
meta: {
|
||||
provider: providerId,
|
||||
category: 'extract',
|
||||
latencyMs: Math.round(performance.now() - t0),
|
||||
costCredits: 0,
|
||||
cacheHit: false,
|
||||
billingMode: 'free',
|
||||
errorCode: code,
|
||||
},
|
||||
};
|
||||
}
|
||||
153
services/mana-research/src/executor/execute.ts
Normal file
153
services/mana-research/src/executor/execute.ts
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
/**
|
||||
* Core execution path: resolve key → reserve credits → call provider →
|
||||
* commit/refund → persist result.
|
||||
*
|
||||
* Used by both /v1/search (single) and /v1/search/compare (fan-out).
|
||||
*/
|
||||
|
||||
import type {
|
||||
BillingMode,
|
||||
ProviderId,
|
||||
ProviderMeta,
|
||||
SearchHit,
|
||||
SearchOptions,
|
||||
SearchProvider,
|
||||
} from '@mana/shared-research';
|
||||
import type { CreditsClient } from '../clients/mana-credits';
|
||||
import type { Config } from '../config';
|
||||
import { ProviderNotConfiguredError } from '../lib/errors';
|
||||
import { priceFor } from '../lib/pricing';
|
||||
import type { ConfigStorage } from '../storage/configs';
|
||||
import { cacheGet, cacheKey, cacheSet } from '../lib/cache';
|
||||
import { mapEnvKey } from './env-map';
|
||||
|
||||
export interface ExecuteSearchInput {
|
||||
provider: SearchProvider;
|
||||
query: string;
|
||||
options: SearchOptions;
|
||||
userId: string;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
export interface ExecuteSearchOutput {
|
||||
success: boolean;
|
||||
data?: { results: SearchHit[] };
|
||||
meta: ProviderMeta;
|
||||
}
|
||||
|
||||
export interface ExecutorDeps {
|
||||
credits: CreditsClient;
|
||||
configs: ConfigStorage;
|
||||
config: Config;
|
||||
}
|
||||
|
||||
export async function executeSearch(
|
||||
input: ExecuteSearchInput,
|
||||
deps: ExecutorDeps
|
||||
): Promise<ExecuteSearchOutput> {
|
||||
const { provider, query, options, userId, signal } = input;
|
||||
const providerId = provider.id;
|
||||
const t0 = performance.now();
|
||||
|
||||
// Resolve API key (BYO first, then server)
|
||||
let apiKey: string | null = null;
|
||||
let billingMode: BillingMode = 'free';
|
||||
|
||||
if (provider.requiresApiKey) {
|
||||
const userConfig = await deps.configs.getForUser(userId, providerId);
|
||||
if (userConfig?.enabled && userConfig.apiKeyEncrypted) {
|
||||
apiKey = await deps.configs.decryptKey(userConfig);
|
||||
if (apiKey) billingMode = 'byo-key';
|
||||
}
|
||||
if (!apiKey) {
|
||||
apiKey = deps.config.providerKeys[mapEnvKey(providerId)] ?? null;
|
||||
if (apiKey) billingMode = 'server-key';
|
||||
}
|
||||
if (!apiKey) {
|
||||
return makeError(providerId, t0, new ProviderNotConfiguredError(providerId));
|
||||
}
|
||||
}
|
||||
|
||||
const price = billingMode === 'server-key' ? priceFor(providerId, 'search') : 0;
|
||||
|
||||
// Cache check
|
||||
const ckey = cacheKey('search', providerId, query, options);
|
||||
const cached = await cacheGet<{ results: SearchHit[] }>(ckey);
|
||||
if (cached) {
|
||||
return {
|
||||
success: true,
|
||||
data: cached,
|
||||
meta: {
|
||||
provider: providerId,
|
||||
category: 'search',
|
||||
latencyMs: Math.round(performance.now() - t0),
|
||||
costCredits: 0,
|
||||
cacheHit: true,
|
||||
billingMode,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Reserve credits for paid server-key calls
|
||||
let reservationId: string | null = null;
|
||||
if (price > 0 && billingMode === 'server-key') {
|
||||
try {
|
||||
const reservation = await deps.credits.reserve(
|
||||
userId,
|
||||
price,
|
||||
`research:${providerId}:search`
|
||||
);
|
||||
reservationId = reservation.reservationId;
|
||||
} catch (err) {
|
||||
return makeError(providerId, t0, err as Error);
|
||||
}
|
||||
}
|
||||
|
||||
// Execute provider
|
||||
try {
|
||||
const res = await provider.search(query, options, { apiKey, userId, signal });
|
||||
await cacheSet(ckey, { results: res.results }, deps.config.cacheTtlSeconds);
|
||||
|
||||
if (reservationId) {
|
||||
await deps.credits
|
||||
.commit(reservationId, `search ${providerId}`)
|
||||
.catch((err) => console.warn('[executor] commit failed:', err));
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: { results: res.results },
|
||||
meta: {
|
||||
provider: providerId,
|
||||
category: 'search',
|
||||
latencyMs: Math.round(performance.now() - t0),
|
||||
costCredits: price,
|
||||
cacheHit: false,
|
||||
billingMode,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
if (reservationId) {
|
||||
await deps.credits
|
||||
.refund(reservationId)
|
||||
.catch((refundErr) => console.warn('[executor] refund failed:', refundErr));
|
||||
}
|
||||
return makeError(providerId, t0, err as Error);
|
||||
}
|
||||
}
|
||||
|
||||
function makeError(providerId: ProviderId, t0: number, err: Error): ExecuteSearchOutput {
|
||||
const code = (err as { code?: string }).code ?? err.name ?? 'ERROR';
|
||||
return {
|
||||
success: false,
|
||||
meta: {
|
||||
provider: providerId,
|
||||
category: 'search',
|
||||
latencyMs: Math.round(performance.now() - t0),
|
||||
costCredits: 0,
|
||||
cacheHit: false,
|
||||
billingMode: 'free',
|
||||
errorCode: code,
|
||||
},
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue