mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 21:01:08 +02:00
End-to-end testing surfaced a 404 from the synth path. mana-llm (services/mana-llm/src/main.py) mounts the OpenAI-compatible API at /v1/* — there's no /api prefix. The first quick-depth e2e run only worked because the planner is skipped on quick (it just uses the question itself), so llmJson never fired; only llmStream did, and the streaming path also used the wrong prefix but the test happened to land before this was caught. The other apps/api modules (chat, guides, context, traces) all use the wrong /api/v1/ path too — that's a separate, pre-existing bug to be addressed in their own commits. Verified by re-running a standard-depth research run end-to-end against mana-llm pointed at the GPU server's ollama with gemma3:4b/12b: plan + retrieve + extract + synth all succeed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
175 lines
4.9 KiB
TypeScript
175 lines
4.9 KiB
TypeScript
/**
|
|
* Thin client for the mana-llm gateway.
|
|
*
|
|
* Two helpers, deliberately small:
|
|
*
|
|
* llmJson() — non-streaming, parses the model response as JSON.
|
|
* Used for plan/structuring steps where we need a typed object.
|
|
*
|
|
* llmStream() — streaming, calls onToken() for each delta and returns
|
|
* the full concatenated text at the end. Used for synthesis.
|
|
*
|
|
* mana-llm exposes an OpenAI-compatible /v1/chat/completions endpoint
|
|
* (see services/mana-llm). Models are namespaced as `provider/model`, e.g.
|
|
* `ollama/gemma3:4b`, `openrouter/meta-llama/llama-3.1-70b-instruct`.
|
|
*
|
|
* Internal service-to-service calls — no auth on the wire (private network).
|
|
*/
|
|
|
|
const LLM_URL = process.env.MANA_LLM_URL || 'http://localhost:3025';
|
|
|
|
export interface LlmMessage {
|
|
role: 'system' | 'user' | 'assistant';
|
|
content: string;
|
|
}
|
|
|
|
export interface LlmJsonOptions {
|
|
model: string;
|
|
system?: string;
|
|
user: string;
|
|
temperature?: number;
|
|
maxTokens?: number;
|
|
}
|
|
|
|
export interface LlmStreamOptions {
|
|
model: string;
|
|
system?: string;
|
|
user: string;
|
|
temperature?: number;
|
|
maxTokens?: number;
|
|
onToken: (delta: string) => void | Promise<void>;
|
|
signal?: AbortSignal;
|
|
}
|
|
|
|
export class LlmError extends Error {
|
|
constructor(
|
|
message: string,
|
|
public readonly status?: number,
|
|
public readonly body?: string
|
|
) {
|
|
super(message);
|
|
this.name = 'LlmError';
|
|
}
|
|
}
|
|
|
|
function buildMessages(system: string | undefined, user: string): LlmMessage[] {
|
|
const msgs: LlmMessage[] = [];
|
|
if (system) msgs.push({ role: 'system', content: system });
|
|
msgs.push({ role: 'user', content: user });
|
|
return msgs;
|
|
}
|
|
|
|
/**
|
|
* Call the LLM and parse the response as JSON.
|
|
*
|
|
* Strips markdown code fences if the model wraps its output in ```json ... ```.
|
|
* Throws LlmError on transport/HTTP failure or if the body isn't valid JSON.
|
|
*/
|
|
export async function llmJson<T = unknown>(opts: LlmJsonOptions): Promise<T> {
|
|
const res = await fetch(`${LLM_URL}/v1/chat/completions`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model: opts.model,
|
|
messages: buildMessages(opts.system, opts.user),
|
|
temperature: opts.temperature ?? 0.2,
|
|
max_tokens: opts.maxTokens ?? 1000,
|
|
response_format: { type: 'json_object' },
|
|
}),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const body = await res.text().catch(() => '');
|
|
throw new LlmError(`mana-llm returned ${res.status}`, res.status, body);
|
|
}
|
|
|
|
const data = (await res.json()) as {
|
|
choices?: Array<{ message?: { content?: string } }>;
|
|
};
|
|
const raw = data.choices?.[0]?.message?.content;
|
|
if (!raw) throw new LlmError('mana-llm response missing content');
|
|
|
|
const cleaned = stripCodeFence(raw);
|
|
try {
|
|
return JSON.parse(cleaned) as T;
|
|
} catch (err) {
|
|
throw new LlmError(
|
|
`mana-llm returned non-JSON content: ${(err as Error).message}`,
|
|
undefined,
|
|
raw
|
|
);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Call the LLM in streaming mode. Invokes onToken() for each delta and
|
|
* returns the full concatenated text once the stream completes.
|
|
*
|
|
* Parses OpenAI-style SSE: lines beginning with `data: ` and the
|
|
* sentinel `data: [DONE]`.
|
|
*/
|
|
export async function llmStream(opts: LlmStreamOptions): Promise<string> {
|
|
const res = await fetch(`${LLM_URL}/v1/chat/completions`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model: opts.model,
|
|
messages: buildMessages(opts.system, opts.user),
|
|
temperature: opts.temperature ?? 0.5,
|
|
max_tokens: opts.maxTokens ?? 2000,
|
|
stream: true,
|
|
}),
|
|
signal: opts.signal,
|
|
});
|
|
|
|
if (!res.ok || !res.body) {
|
|
const body = await res.text().catch(() => '');
|
|
throw new LlmError(`mana-llm stream returned ${res.status}`, res.status, body);
|
|
}
|
|
|
|
const reader = res.body.getReader();
|
|
const decoder = new TextDecoder();
|
|
let buffer = '';
|
|
let full = '';
|
|
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
|
|
buffer += decoder.decode(value, { stream: true });
|
|
|
|
// SSE frames are separated by blank lines, but mana-llm forwards
|
|
// line-by-line — split on \n and keep the last (possibly partial) line.
|
|
const lines = buffer.split('\n');
|
|
buffer = lines.pop() ?? '';
|
|
|
|
for (const line of lines) {
|
|
if (!line.startsWith('data: ')) continue;
|
|
const payload = line.slice(6).trim();
|
|
if (!payload || payload === '[DONE]') continue;
|
|
|
|
try {
|
|
const chunk = JSON.parse(payload) as {
|
|
choices?: Array<{ delta?: { content?: string } }>;
|
|
};
|
|
const delta = chunk.choices?.[0]?.delta?.content;
|
|
if (delta) {
|
|
full += delta;
|
|
await opts.onToken(delta);
|
|
}
|
|
} catch {
|
|
// ignore malformed frames — keepalives, comments, etc.
|
|
}
|
|
}
|
|
}
|
|
|
|
return full;
|
|
}
|
|
|
|
function stripCodeFence(text: string): string {
|
|
const trimmed = text.trim();
|
|
if (!trimmed.startsWith('```')) return trimmed;
|
|
// ```json\n...\n``` or ```\n...\n```
|
|
const withoutOpen = trimmed.replace(/^```(?:json)?\s*\n?/, '');
|
|
return withoutOpen.replace(/\n?```\s*$/, '');
|
|
}
|