mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 22:21:10 +02:00
New Bun/Hono service on port 3068 that bundles many web-research providers behind a unified interface for side-by-side comparison. All eval runs persist in research.* (mana_platform) so quality can be reviewed later. Providers (Phase 1+2): search: searxng, duckduckgo, brave, tavily, exa, serper extract: readability (via mana-search), jina-reader, firecrawl Endpoints: POST /v1/search, /v1/search/compare — single + fan-out POST /v1/extract, /v1/extract/compare — single + fan-out GET /v1/runs, /v1/runs/:id — history POST /v1/runs/:run/results/:id/rate — manual eval GET /v1/providers, /v1/providers/health — catalog + readiness Auto-routing: when `provider` is omitted, queries are classified via regex (fast path, 0ms) with optional mana-llm fallback, then routed to the first available provider for that query type (news → tavily, academic → exa, semantic → exa, etc.). Credits: server-key calls go through mana-credits reserve → commit/refund so failed provider calls don't charge the user. BYO-keys supported via research.provider_configs (UI arrives in Phase 4). Cache: Redis with graceful degradation (1h TTL for search, 24h for extract). Pay-per-use APIs only — no subscription-gated providers. Docs: docs/plans/mana-research-service.md + docs/reports/web-research-capabilities.md Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
150 lines
3.9 KiB
TypeScript
150 lines
3.9 KiB
TypeScript
/**
|
|
* Extract-side executor. Same shape as executeSearch but for URL extraction.
|
|
*/
|
|
|
|
import type {
|
|
BillingMode,
|
|
ExtractedContent,
|
|
ExtractOptions,
|
|
ExtractProvider,
|
|
ProviderId,
|
|
ProviderMeta,
|
|
} from '@mana/shared-research';
|
|
import type { CreditsClient } from '../clients/mana-credits';
|
|
import type { Config } from '../config';
|
|
import { ProviderNotConfiguredError } from '../lib/errors';
|
|
import { priceFor } from '../lib/pricing';
|
|
import type { ConfigStorage } from '../storage/configs';
|
|
import { cacheGet, cacheKey, cacheSet } from '../lib/cache';
|
|
import { mapEnvKey } from './env-map';
|
|
|
|
export interface ExecuteExtractInput {
|
|
provider: ExtractProvider;
|
|
url: string;
|
|
options: ExtractOptions;
|
|
userId: string;
|
|
signal?: AbortSignal;
|
|
}
|
|
|
|
export interface ExecuteExtractOutput {
|
|
success: boolean;
|
|
data?: { content: ExtractedContent };
|
|
meta: ProviderMeta;
|
|
}
|
|
|
|
export interface ExecutorDeps {
|
|
credits: CreditsClient;
|
|
configs: ConfigStorage;
|
|
config: Config;
|
|
}
|
|
|
|
export async function executeExtract(
|
|
input: ExecuteExtractInput,
|
|
deps: ExecutorDeps
|
|
): Promise<ExecuteExtractOutput> {
|
|
const { provider, url, options, userId, signal } = input;
|
|
const providerId = provider.id;
|
|
const t0 = performance.now();
|
|
|
|
// Resolve API key (BYO → server → none)
|
|
let apiKey: string | null = null;
|
|
let billingMode: BillingMode = 'free';
|
|
|
|
if (provider.requiresApiKey) {
|
|
const userConfig = await deps.configs.getForUser(userId, providerId);
|
|
if (userConfig?.enabled && userConfig.apiKeyEncrypted) {
|
|
apiKey = await deps.configs.decryptKey(userConfig);
|
|
if (apiKey) billingMode = 'byo-key';
|
|
}
|
|
if (!apiKey) {
|
|
apiKey = deps.config.providerKeys[mapEnvKey(providerId)] ?? null;
|
|
if (apiKey) billingMode = 'server-key';
|
|
}
|
|
if (!apiKey) {
|
|
return makeError(providerId, t0, new ProviderNotConfiguredError(providerId));
|
|
}
|
|
} else if (providerId === 'jina-reader' && deps.config.providerKeys.jina) {
|
|
// jina-reader is zero-auth but a key lifts the rate limit
|
|
apiKey = deps.config.providerKeys.jina;
|
|
}
|
|
|
|
const price = billingMode === 'server-key' ? priceFor(providerId, 'extract') : 0;
|
|
|
|
const ckey = cacheKey('extract', providerId, url, options);
|
|
const cached = await cacheGet<{ content: ExtractedContent }>(ckey);
|
|
if (cached) {
|
|
return {
|
|
success: true,
|
|
data: cached,
|
|
meta: {
|
|
provider: providerId,
|
|
category: 'extract',
|
|
latencyMs: Math.round(performance.now() - t0),
|
|
costCredits: 0,
|
|
cacheHit: true,
|
|
billingMode,
|
|
},
|
|
};
|
|
}
|
|
|
|
let reservationId: string | null = null;
|
|
if (price > 0 && billingMode === 'server-key') {
|
|
try {
|
|
const reservation = await deps.credits.reserve(
|
|
userId,
|
|
price,
|
|
`research:${providerId}:extract`
|
|
);
|
|
reservationId = reservation.reservationId;
|
|
} catch (err) {
|
|
return makeError(providerId, t0, err as Error);
|
|
}
|
|
}
|
|
|
|
try {
|
|
const res = await provider.extract(url, options, { apiKey, userId, signal });
|
|
await cacheSet(ckey, { content: res.content }, deps.config.cacheTtlSeconds * 24);
|
|
|
|
if (reservationId) {
|
|
await deps.credits
|
|
.commit(reservationId, `extract ${providerId}`)
|
|
.catch((err) => console.warn('[executor] commit failed:', err));
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
data: { content: res.content },
|
|
meta: {
|
|
provider: providerId,
|
|
category: 'extract',
|
|
latencyMs: Math.round(performance.now() - t0),
|
|
costCredits: price,
|
|
cacheHit: false,
|
|
billingMode,
|
|
},
|
|
};
|
|
} catch (err) {
|
|
if (reservationId) {
|
|
await deps.credits
|
|
.refund(reservationId)
|
|
.catch((refundErr) => console.warn('[executor] refund failed:', refundErr));
|
|
}
|
|
return makeError(providerId, t0, err as Error);
|
|
}
|
|
}
|
|
|
|
function makeError(providerId: ProviderId, t0: number, err: Error): ExecuteExtractOutput {
|
|
const code = (err as { code?: string }).code ?? err.name ?? 'ERROR';
|
|
return {
|
|
success: false,
|
|
meta: {
|
|
provider: providerId,
|
|
category: 'extract',
|
|
latencyMs: Math.round(performance.now() - t0),
|
|
costCredits: 0,
|
|
cacheHit: false,
|
|
billingMode: 'free',
|
|
errorCode: code,
|
|
},
|
|
};
|
|
}
|