managarten/services/mana-research/src/executor/execute-extract.ts
Till JS 2bdb48bdd1 feat(research): add mana-research service — Phase 1 + 2
New Bun/Hono service on port 3068 that bundles many web-research providers
behind a unified interface for side-by-side comparison. All eval runs
persist in research.* (mana_platform) so quality can be reviewed later.

Providers (Phase 1+2):
  search:  searxng, duckduckgo, brave, tavily, exa, serper
  extract: readability (via mana-search), jina-reader, firecrawl

Endpoints:
  POST /v1/search, /v1/search/compare       — single + fan-out
  POST /v1/extract, /v1/extract/compare     — single + fan-out
  GET  /v1/runs, /v1/runs/:id               — history
  POST /v1/runs/:run/results/:id/rate       — manual eval
  GET  /v1/providers, /v1/providers/health  — catalog + readiness

Auto-routing: when `provider` is omitted, queries are classified via regex
(fast path, 0ms) with optional mana-llm fallback, then routed to the first
available provider for that query type (news → tavily, academic → exa,
semantic → exa, etc.).

Credits: server-key calls go through mana-credits reserve → commit/refund
so failed provider calls don't charge the user. BYO-keys supported via
research.provider_configs (UI arrives in Phase 4).

Cache: Redis with graceful degradation (1h TTL for search, 24h for
extract). Pay-per-use APIs only — no subscription-gated providers.

Docs: docs/plans/mana-research-service.md + docs/reports/web-research-capabilities.md

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 14:42:25 +02:00

150 lines
3.9 KiB
TypeScript

/**
* Extract-side executor. Same shape as executeSearch but for URL extraction.
*/
import type {
BillingMode,
ExtractedContent,
ExtractOptions,
ExtractProvider,
ProviderId,
ProviderMeta,
} from '@mana/shared-research';
import type { CreditsClient } from '../clients/mana-credits';
import type { Config } from '../config';
import { ProviderNotConfiguredError } from '../lib/errors';
import { priceFor } from '../lib/pricing';
import type { ConfigStorage } from '../storage/configs';
import { cacheGet, cacheKey, cacheSet } from '../lib/cache';
import { mapEnvKey } from './env-map';
export interface ExecuteExtractInput {
provider: ExtractProvider;
url: string;
options: ExtractOptions;
userId: string;
signal?: AbortSignal;
}
export interface ExecuteExtractOutput {
success: boolean;
data?: { content: ExtractedContent };
meta: ProviderMeta;
}
export interface ExecutorDeps {
credits: CreditsClient;
configs: ConfigStorage;
config: Config;
}
export async function executeExtract(
input: ExecuteExtractInput,
deps: ExecutorDeps
): Promise<ExecuteExtractOutput> {
const { provider, url, options, userId, signal } = input;
const providerId = provider.id;
const t0 = performance.now();
// Resolve API key (BYO → server → none)
let apiKey: string | null = null;
let billingMode: BillingMode = 'free';
if (provider.requiresApiKey) {
const userConfig = await deps.configs.getForUser(userId, providerId);
if (userConfig?.enabled && userConfig.apiKeyEncrypted) {
apiKey = await deps.configs.decryptKey(userConfig);
if (apiKey) billingMode = 'byo-key';
}
if (!apiKey) {
apiKey = deps.config.providerKeys[mapEnvKey(providerId)] ?? null;
if (apiKey) billingMode = 'server-key';
}
if (!apiKey) {
return makeError(providerId, t0, new ProviderNotConfiguredError(providerId));
}
} else if (providerId === 'jina-reader' && deps.config.providerKeys.jina) {
// jina-reader is zero-auth but a key lifts the rate limit
apiKey = deps.config.providerKeys.jina;
}
const price = billingMode === 'server-key' ? priceFor(providerId, 'extract') : 0;
const ckey = cacheKey('extract', providerId, url, options);
const cached = await cacheGet<{ content: ExtractedContent }>(ckey);
if (cached) {
return {
success: true,
data: cached,
meta: {
provider: providerId,
category: 'extract',
latencyMs: Math.round(performance.now() - t0),
costCredits: 0,
cacheHit: true,
billingMode,
},
};
}
let reservationId: string | null = null;
if (price > 0 && billingMode === 'server-key') {
try {
const reservation = await deps.credits.reserve(
userId,
price,
`research:${providerId}:extract`
);
reservationId = reservation.reservationId;
} catch (err) {
return makeError(providerId, t0, err as Error);
}
}
try {
const res = await provider.extract(url, options, { apiKey, userId, signal });
await cacheSet(ckey, { content: res.content }, deps.config.cacheTtlSeconds * 24);
if (reservationId) {
await deps.credits
.commit(reservationId, `extract ${providerId}`)
.catch((err) => console.warn('[executor] commit failed:', err));
}
return {
success: true,
data: { content: res.content },
meta: {
provider: providerId,
category: 'extract',
latencyMs: Math.round(performance.now() - t0),
costCredits: price,
cacheHit: false,
billingMode,
},
};
} catch (err) {
if (reservationId) {
await deps.credits
.refund(reservationId)
.catch((refundErr) => console.warn('[executor] refund failed:', refundErr));
}
return makeError(providerId, t0, err as Error);
}
}
function makeError(providerId: ProviderId, t0: number, err: Error): ExecuteExtractOutput {
const code = (err as { code?: string }).code ?? err.name ?? 'ERROR';
return {
success: false,
meta: {
provider: providerId,
category: 'extract',
latencyMs: Math.round(performance.now() - t0),
costCredits: 0,
cacheHit: false,
billingMode: 'free',
errorCode: code,
},
};
}