feat: add unified @manacore/shared-llm package and migrate all backends

Create a shared LLM client package that provides a unified interface
to the mana-llm service, replacing 9 individual fetch-based integrations
with consistent error handling, retry logic, and JSON extraction.

Package (@manacore/shared-llm):
- LlmModule with forRoot/forRootAsync (NestJS dynamic module)
- LlmClientService: chat, json, vision, visionJson, embed, stream
- LlmClient standalone class for non-NestJS consumers
- extractJson utility (consolidates 3 markdown-stripping implementations)
- retryFetch with exponential backoff (429, 5xx, network errors)
- 44 unit tests (json-extractor, retry, llm-client)

Migrated backends:
- mana-core-auth: raw fetch → llm.json()
- planta: raw fetch + vision → llm.visionJson()
- nutriphi: raw fetch + regex → llm.visionJson() + llm.json()
- chat: custom OllamaService (175 LOC) → llm.chatMessages()
- context: raw fetch → llm.chat() (keeps token tracking)
- traces: 2x raw fetch → llm.chat()
- manadeck: @google/genai SDK → llm.json() + llm.visionJson()
- bot-services: raw Ollama API → LlmClient standalone
- matrix-ollama-bot: raw fetch → llm.chatMessages() + llm.vision()

New credit operations:
- AI_PLANT_ANALYSIS (2 credits, planta)
- AI_GUIDE_GENERATION (5 credits, traces)
- AI_CONTEXT_GENERATION (2 credits, context)
- AI_BOT_CHAT (0.1 credits, matrix)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-03-23 22:06:30 +01:00
parent e7bf58c5b6
commit e2f144962c
48 changed files with 2476 additions and 1297 deletions

View file

@ -0,0 +1,119 @@
import { describe, it, expect } from 'vitest';
import { extractJson } from '../utils/json-extractor';
describe('extractJson', () => {
it('parses direct JSON object', () => {
const result = extractJson('{"name": "test", "value": 42}');
expect(result).toEqual({ name: 'test', value: 42 });
});
it('parses direct JSON array', () => {
const result = extractJson('[1, 2, 3]');
expect(result).toEqual([1, 2, 3]);
});
it('strips markdown json code fence', () => {
const input = '```json\n{"category": "bug", "title": "Fix login"}\n```';
const result = extractJson(input);
expect(result).toEqual({ category: 'bug', title: 'Fix login' });
});
it('strips markdown code fence without json label', () => {
const input = '```\n{"key": "value"}\n```';
const result = extractJson(input);
expect(result).toEqual({ key: 'value' });
});
it('extracts JSON from surrounding text', () => {
const input =
'Here is the analysis:\n{"confidence": 0.95, "species": "Rose"}\nHope this helps!';
const result = extractJson(input);
expect(result).toEqual({ confidence: 0.95, species: 'Rose' });
});
it('extracts JSON array from surrounding text', () => {
const input = 'The items are: [1, 2, 3] as requested.';
const result = extractJson(input);
expect(result).toEqual([1, 2, 3]);
});
it('handles nested JSON objects', () => {
const input = '{"outer": {"inner": {"deep": true}}, "list": [1, 2]}';
const result = extractJson(input);
expect(result).toEqual({ outer: { inner: { deep: true } }, list: [1, 2] });
});
it('handles JSON with escaped quotes in strings', () => {
const input = '{"text": "He said \\"hello\\""}';
const result = extractJson(input);
expect(result).toEqual({ text: 'He said "hello"' });
});
it('handles JSON with braces inside strings', () => {
const input = 'Result: {"code": "if (x) { return }"}';
const result = extractJson(input);
expect(result).toEqual({ code: 'if (x) { return }' });
});
it('trims whitespace before parsing', () => {
const input = ' \n {"key": "value"} \n ';
const result = extractJson(input);
expect(result).toEqual({ key: 'value' });
});
it('applies validation function on success', () => {
const validate = (data: unknown) => {
const obj = data as { name: string };
if (!obj.name) throw new Error('missing name');
return obj;
};
const result = extractJson('{"name": "test"}', validate);
expect(result).toEqual({ name: 'test' });
});
it('throws when validation fails', () => {
const validate = (data: unknown) => {
const obj = data as { name?: string };
if (!obj.name) throw new Error('missing name');
return obj;
};
expect(() => extractJson('{"value": 123}', validate)).toThrow();
});
it('throws on completely invalid input', () => {
expect(() => extractJson('This is just plain text with no JSON')).toThrow(
'Failed to extract JSON'
);
});
it('throws on empty input', () => {
expect(() => extractJson('')).toThrow('Failed to extract JSON');
});
it('handles real-world LLM response with preamble', () => {
const input = `Based on my analysis, here is the result:
\`\`\`json
{
"foods": [
{"name": "Apple", "calories": 95, "protein": 0.5}
],
"totalCalories": 95,
"confidence": 0.9
}
\`\`\`
This analysis is based on the image provided.`;
const result = extractJson<{ foods: unknown[]; totalCalories: number }>(input);
expect(result.totalCalories).toBe(95);
expect(result.foods).toHaveLength(1);
});
it('prefers object over array when both exist', () => {
// Direct parse fails, fence fails, tries object first
const input = 'Some text {"key": "val"} and [1, 2, 3]';
const result = extractJson(input);
expect(result).toEqual({ key: 'val' });
});
});

View file

@ -0,0 +1,277 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { LlmClient } from '../llm-client';
import type { ResolvedLlmOptions } from '../interfaces/llm-options.interface';
import type { ChatCompletionResponse } from '../types/openai-compat.types';
const mockFetch = vi.fn();
vi.stubGlobal('fetch', mockFetch);
const DEFAULT_OPTIONS: ResolvedLlmOptions = {
manaLlmUrl: 'http://localhost:3025',
defaultModel: 'ollama/gemma3:4b',
defaultVisionModel: 'ollama/llava:7b',
timeout: 30_000,
maxRetries: 0, // No retries in tests for simplicity
debug: false,
};
function mockCompletionResponse(
content: string,
model = 'ollama/gemma3:4b'
): ChatCompletionResponse {
return {
id: 'chatcmpl-test123',
object: 'chat.completion',
created: Date.now(),
model,
choices: [{ index: 0, message: { role: 'assistant', content }, finish_reason: 'stop' }],
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
};
}
function mockFetchOk(body: unknown): void {
mockFetch.mockResolvedValueOnce({
ok: true,
status: 200,
json: () => Promise.resolve(body),
text: () => Promise.resolve(JSON.stringify(body)),
} as unknown as Response);
}
function mockFetchError(status: number, body = ''): void {
mockFetch.mockResolvedValueOnce({
ok: false,
status,
statusText: `Error ${status}`,
json: () => Promise.resolve({}),
text: () => Promise.resolve(body),
} as unknown as Response);
}
describe('LlmClient', () => {
let client: LlmClient;
beforeEach(() => {
vi.clearAllMocks();
client = new LlmClient(DEFAULT_OPTIONS);
});
describe('chat', () => {
it('sends correct request body', async () => {
mockFetchOk(mockCompletionResponse('Hello!'));
await client.chat('Hi there');
expect(mockFetch).toHaveBeenCalledTimes(1);
const [url, init] = mockFetch.mock.calls[0];
expect(url).toBe('http://localhost:3025/v1/chat/completions');
const body = JSON.parse(init.body);
expect(body.model).toBe('ollama/gemma3:4b');
expect(body.messages).toEqual([{ role: 'user', content: 'Hi there' }]);
expect(body.stream).toBe(false);
});
it('includes system prompt when provided', async () => {
mockFetchOk(mockCompletionResponse('Response'));
await client.chat('Question', { systemPrompt: 'You are helpful.' });
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.messages).toEqual([
{ role: 'system', content: 'You are helpful.' },
{ role: 'user', content: 'Question' },
]);
});
it('uses custom model and temperature', async () => {
mockFetchOk(mockCompletionResponse('Response'));
await client.chat('Prompt', { model: 'openrouter/gpt-4o', temperature: 0.3 });
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.model).toBe('openrouter/gpt-4o');
expect(body.temperature).toBe(0.3);
});
it('returns ChatResult with content and usage', async () => {
mockFetchOk(mockCompletionResponse('Generated text'));
const result = await client.chat('Prompt');
expect(result.content).toBe('Generated text');
expect(result.model).toBe('ollama/gemma3:4b');
expect(result.usage.total_tokens).toBe(30);
expect(result.latencyMs).toBeGreaterThanOrEqual(0);
});
it('throws on error response', async () => {
mockFetchError(500, 'Internal Server Error');
await expect(client.chat('Prompt')).rejects.toThrow('mana-llm error 500');
});
});
describe('json', () => {
it('extracts JSON from response', async () => {
mockFetchOk(mockCompletionResponse('{"category": "bug", "title": "Fix it"}'));
const result = await client.json<{ category: string; title: string }>('Analyze this');
expect(result.data).toEqual({ category: 'bug', title: 'Fix it' });
expect(result.content).toBe('{"category": "bug", "title": "Fix it"}');
});
it('extracts JSON from markdown-wrapped response', async () => {
mockFetchOk(mockCompletionResponse('```json\n{"key": "value"}\n```'));
const result = await client.json('Parse this');
expect(result.data).toEqual({ key: 'value' });
});
it('applies validation function', async () => {
mockFetchOk(mockCompletionResponse('{"name": "test"}'));
const validate = (data: unknown) => {
const obj = data as { name: string };
if (typeof obj.name !== 'string') throw new Error('invalid');
return obj;
};
const result = await client.json('Prompt', { validate });
expect(result.data.name).toBe('test');
});
it('retries JSON extraction on parse failure', async () => {
// First attempt returns bad JSON, second returns good
mockFetchOk(mockCompletionResponse('not json at all'));
mockFetchOk(mockCompletionResponse('{"valid": true}'));
const result = await client.json('Prompt', { jsonRetries: 1 });
expect(result.data).toEqual({ valid: true });
expect(mockFetch).toHaveBeenCalledTimes(2);
});
});
describe('vision', () => {
it('builds multimodal message with base64 image', async () => {
mockFetchOk(mockCompletionResponse('A rose'));
await client.vision('What is this?', 'abc123base64', 'image/jpeg');
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.model).toBe('ollama/llava:7b');
expect(body.messages[0].content).toEqual([
{ type: 'text', text: 'What is this?' },
{ type: 'image_url', image_url: { url: 'data:image/jpeg;base64,abc123base64' } },
]);
});
it('uses data URL as-is if already formatted', async () => {
mockFetchOk(mockCompletionResponse('A cat'));
await client.vision('What?', 'data:image/png;base64,xyz');
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
const imageUrl = body.messages[0].content[1].image_url.url;
expect(imageUrl).toBe('data:image/png;base64,xyz');
});
it('uses custom vision model when specified', async () => {
mockFetchOk(mockCompletionResponse('Result'));
await client.vision('Prompt', 'img', 'image/jpeg', {
visionModel: 'ollama/qwen3-vl:4b',
});
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.model).toBe('ollama/qwen3-vl:4b');
});
});
describe('visionJson', () => {
it('extracts JSON from vision response', async () => {
mockFetchOk(mockCompletionResponse('```json\n{"species": "Rose", "confidence": 0.95}\n```'));
const result = await client.visionJson<{ species: string }>(
'Identify plant',
'imgdata',
'image/jpeg'
);
expect(result.data.species).toBe('Rose');
});
});
describe('health', () => {
it('returns health status', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
status: 200,
json: () =>
Promise.resolve({
status: 'healthy',
providers: { ollama: { status: 'healthy' } },
}),
} as unknown as Response);
const health = await client.health();
expect(health.status).toBe('healthy');
});
it('returns unhealthy on network error', async () => {
mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
const health = await client.health();
expect(health.status).toBe('unhealthy');
});
});
describe('listModels', () => {
it('returns model list', async () => {
mockFetch.mockResolvedValueOnce({
ok: true,
status: 200,
json: () =>
Promise.resolve({
data: [{ id: 'ollama/gemma3:4b', object: 'model', created: 0, owned_by: 'ollama' }],
}),
} as unknown as Response);
const models = await client.listModels();
expect(models).toHaveLength(1);
expect(models[0].id).toBe('ollama/gemma3:4b');
});
});
describe('chatMessages', () => {
it('sends full message history', async () => {
mockFetchOk(mockCompletionResponse('Answer'));
await client.chatMessages([
{ role: 'system', content: 'Be brief.' },
{ role: 'user', content: 'Hello' },
{ role: 'assistant', content: 'Hi!' },
{ role: 'user', content: 'How are you?' },
]);
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.messages).toHaveLength(4);
});
});
describe('embed', () => {
it('sends embedding request', async () => {
mockFetchOk({
object: 'list',
data: [{ object: 'embedding', index: 0, embedding: [0.1, 0.2, 0.3] }],
model: 'ollama/gemma3:4b',
usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 },
});
const result = await client.embed('Hello world');
expect(result.embeddings).toHaveLength(1);
expect(result.embeddings[0]).toEqual([0.1, 0.2, 0.3]);
});
});
});

View file

@ -0,0 +1,118 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { retryFetch } from '../utils/retry';
// Mock global fetch
const mockFetch = vi.fn();
vi.stubGlobal('fetch', mockFetch);
function mockResponse(status: number, body = ''): Response {
return {
ok: status >= 200 && status < 300,
status,
statusText: `Status ${status}`,
text: () => Promise.resolve(body),
json: () => Promise.resolve(JSON.parse(body || '{}')),
headers: new Headers(),
} as unknown as Response;
}
describe('retryFetch', () => {
beforeEach(() => {
vi.clearAllMocks();
});
it('returns on first successful attempt', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(200, '{"ok": true}'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.ok).toBe(true);
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it('retries on 503 and succeeds', async () => {
mockFetch
.mockResolvedValueOnce(mockResponse(503))
.mockResolvedValueOnce(mockResponse(200, '{}'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.ok).toBe(true);
expect(mockFetch).toHaveBeenCalledTimes(2);
});
it('retries on 429 rate limit', async () => {
mockFetch
.mockResolvedValueOnce(mockResponse(429))
.mockResolvedValueOnce(mockResponse(200, '{}'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.ok).toBe(true);
expect(mockFetch).toHaveBeenCalledTimes(2);
});
it('retries on network error and succeeds', async () => {
mockFetch
.mockRejectedValueOnce(new Error('ECONNREFUSED'))
.mockResolvedValueOnce(mockResponse(200, '{}'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.ok).toBe(true);
expect(mockFetch).toHaveBeenCalledTimes(2);
});
it('does NOT retry on 400 client error', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(400, 'Bad Request'));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.status).toBe(400);
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it('does NOT retry on 401 unauthorized', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(401));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.status).toBe(401);
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it('does NOT retry on 404 not found', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(404));
const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
expect(response.status).toBe(404);
expect(mockFetch).toHaveBeenCalledTimes(1);
});
it('throws after exhausting all retries', async () => {
mockFetch
.mockResolvedValueOnce(mockResponse(503))
.mockResolvedValueOnce(mockResponse(503))
.mockResolvedValueOnce(mockResponse(503));
await expect(retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 })).rejects.toThrow(
'HTTP 503'
);
expect(mockFetch).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
});
it('throws after exhausting retries on network errors', async () => {
mockFetch
.mockRejectedValueOnce(new Error('ECONNREFUSED'))
.mockRejectedValueOnce(new Error('ECONNREFUSED'));
await expect(retryFetch('http://test', {}, { maxRetries: 1, baseDelay: 10 })).rejects.toThrow(
'ECONNREFUSED'
);
expect(mockFetch).toHaveBeenCalledTimes(2);
});
it('works with maxRetries: 0 (no retries)', async () => {
mockFetch.mockResolvedValueOnce(mockResponse(503));
await expect(retryFetch('http://test', {}, { maxRetries: 0, baseDelay: 10 })).rejects.toThrow();
expect(mockFetch).toHaveBeenCalledTimes(1);
});
});

View file

@ -0,0 +1,35 @@
// Module
export { LlmModule } from './llm.module';
export { LlmClientService } from './llm-client.service';
export { LLM_MODULE_OPTIONS } from './llm.constants';
// Core client (for advanced use cases)
export { LlmClient } from './llm-client';
// Interfaces
export type {
LlmModuleOptions,
LlmModuleAsyncOptions,
LlmOptionsFactory,
ResolvedLlmOptions,
} from './interfaces';
export { resolveOptions } from './interfaces';
// Types
export type {
ChatMessage,
ContentPart,
TextContentPart,
ImageContentPart,
ChatOptions,
JsonOptions,
VisionOptions,
TokenUsage,
ChatResult,
JsonResult,
ModelInfo,
HealthStatus,
} from './types';
// Utilities
export { extractJson } from './utils';

View file

@ -0,0 +1,8 @@
export type {
LlmModuleOptions,
LlmModuleAsyncOptions,
LlmOptionsFactory,
ResolvedLlmOptions,
} from './llm-options.interface';
export { resolveOptions } from './llm-options.interface';

View file

@ -0,0 +1,47 @@
import type { ModuleMetadata, Type } from '@nestjs/common';
export interface LlmModuleOptions {
/** mana-llm service URL (default: http://localhost:3025) */
manaLlmUrl?: string;
/** Default text model (default: ollama/gemma3:4b) */
defaultModel?: string;
/** Default vision model (default: ollama/llava:7b) */
defaultVisionModel?: string;
/** Request timeout in ms (default: 120000) */
timeout?: number;
/** Max retries on transient failures (default: 2) */
maxRetries?: number;
/** Enable debug logging (default: false) */
debug?: boolean;
}
export interface LlmModuleAsyncOptions extends Pick<ModuleMetadata, 'imports'> {
useExisting?: Type<LlmOptionsFactory>;
useClass?: Type<LlmOptionsFactory>;
useFactory?: (...args: any[]) => Promise<LlmModuleOptions> | LlmModuleOptions;
inject?: any[];
}
export interface LlmOptionsFactory {
createLlmOptions(): Promise<LlmModuleOptions> | LlmModuleOptions;
}
export interface ResolvedLlmOptions {
manaLlmUrl: string;
defaultModel: string;
defaultVisionModel: string;
timeout: number;
maxRetries: number;
debug: boolean;
}
export function resolveOptions(options: LlmModuleOptions): ResolvedLlmOptions {
return {
manaLlmUrl: options.manaLlmUrl ?? 'http://localhost:3025',
defaultModel: options.defaultModel ?? 'ollama/gemma3:4b',
defaultVisionModel: options.defaultVisionModel ?? 'ollama/llava:7b',
timeout: options.timeout ?? 120_000,
maxRetries: options.maxRetries ?? 2,
debug: options.debug ?? false,
};
}

View file

@ -0,0 +1,16 @@
import { Inject, Injectable } from '@nestjs/common';
import { LlmClient } from './llm-client';
import { LLM_MODULE_OPTIONS } from './llm.constants';
import type { LlmModuleOptions } from './interfaces/llm-options.interface';
import { resolveOptions } from './interfaces/llm-options.interface';
/**
* NestJS injectable wrapper around LlmClient.
* All logic lives in the framework-agnostic LlmClient base class.
*/
@Injectable()
export class LlmClientService extends LlmClient {
constructor(@Inject(LLM_MODULE_OPTIONS) options: LlmModuleOptions) {
super(resolveOptions(options));
}
}

View file

@ -0,0 +1,350 @@
/**
* Framework-agnostic LLM client that communicates with the mana-llm service.
*
* This is the core implementation shared between the NestJS LlmClientService
* and the standalone LlmClient export (for non-NestJS consumers like bot-services).
*/
import type { ResolvedLlmOptions } from './interfaces/llm-options.interface';
import type {
ChatMessage,
ChatOptions,
ChatResult,
JsonOptions,
JsonResult,
VisionOptions,
TokenUsage,
ModelInfo,
HealthStatus,
} from './types/chat.types';
import type {
ChatCompletionRequest,
ChatCompletionResponse,
EmbeddingResponse,
} from './types/openai-compat.types';
import { extractJson } from './utils/json-extractor';
import { retryFetch } from './utils/retry';
function createTimeoutSignal(ms: number): any {
const controller = new AbortController();
setTimeout(() => controller.abort(), ms);
return controller.signal;
}
export class LlmClient {
private readonly baseUrl: string;
private readonly options: ResolvedLlmOptions;
constructor(options: ResolvedLlmOptions) {
this.options = options;
this.baseUrl = options.manaLlmUrl.replace(/\/+$/, '');
}
// ---------------------------------------------------------------------------
// Text Chat
// ---------------------------------------------------------------------------
/** Simple chat with a single prompt string. */
async chat(prompt: string, opts?: ChatOptions): Promise<ChatResult> {
const messages = this.buildMessages(prompt, opts?.systemPrompt);
return this.chatMessages(messages, opts);
}
/** Chat with full message history. */
async chatMessages(messages: ChatMessage[], opts?: ChatOptions): Promise<ChatResult> {
const body = this.buildRequest(messages, opts, false);
const start = Date.now();
const response = await this.fetchCompletion(body, opts?.timeout);
const latencyMs = Date.now() - start;
return {
content: response.choices[0]?.message?.content ?? '',
model: response.model,
usage: response.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
latencyMs,
};
}
// ---------------------------------------------------------------------------
// Streaming
// ---------------------------------------------------------------------------
/** Streaming chat - returns an async iterable of text tokens. */
async *chatStream(prompt: string, opts?: ChatOptions): AsyncIterable<string> {
const messages = this.buildMessages(prompt, opts?.systemPrompt);
yield* this.chatStreamMessages(messages, opts);
}
/** Streaming chat with full message history. */
async *chatStreamMessages(messages: ChatMessage[], opts?: ChatOptions): AsyncIterable<string> {
const body = this.buildRequest(messages, opts, true);
const timeout = opts?.timeout ?? this.options.timeout;
const response = await retryFetch(
`${this.baseUrl}/v1/chat/completions`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal: createTimeoutSignal(timeout),
},
{ maxRetries: this.options.maxRetries }
);
if (!response.ok) {
const text = await response.text().catch(() => '');
throw new Error(`mana-llm stream error ${response.status}: ${text}`);
}
if (!response.body) {
throw new Error('mana-llm returned no response body for stream');
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n');
buffer = lines.pop() ?? '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed || !trimmed.startsWith('data: ')) continue;
const data = trimmed.slice(6);
if (data === '[DONE]') return;
try {
const chunk = JSON.parse(data);
const content = chunk.choices?.[0]?.delta?.content;
if (content) yield content;
} catch {
// Skip unparseable chunks
}
}
}
} finally {
reader.releaseLock();
}
}
// ---------------------------------------------------------------------------
// Structured JSON Output
// ---------------------------------------------------------------------------
/** Chat that extracts and parses JSON from the response. */
async json<T = unknown>(prompt: string, opts?: JsonOptions<T>): Promise<JsonResult<T>> {
const messages = this.buildMessages(prompt, opts?.systemPrompt);
return this.jsonMessages<T>(messages, opts);
}
/** JSON extraction from full message history. */
async jsonMessages<T = unknown>(
messages: ChatMessage[],
opts?: JsonOptions<T>
): Promise<JsonResult<T>> {
const maxAttempts = (opts?.jsonRetries ?? 1) + 1;
let lastError: Error | undefined;
for (let attempt = 0; attempt < maxAttempts; attempt++) {
const result = await this.chatMessages(messages, opts);
try {
const data = extractJson<T>(result.content, opts?.validate);
return { ...result, data };
} catch (error) {
lastError = error instanceof Error ? error : new Error(String(error));
if (this.options.debug) {
console.warn(
`[shared-llm] JSON extraction attempt ${attempt + 1}/${maxAttempts} failed:`,
lastError.message
);
}
}
}
throw lastError ?? new Error('JSON extraction failed');
}
// ---------------------------------------------------------------------------
// Vision
// ---------------------------------------------------------------------------
/** Analyze an image with a text prompt. */
async vision(
prompt: string,
imageBase64: string,
mimeType?: string,
opts?: VisionOptions
): Promise<ChatResult> {
const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
const model = opts?.visionModel ?? this.options.defaultVisionModel;
return this.chatMessages(messages, { ...opts, model });
}
/** Vision + JSON extraction. */
async visionJson<T = unknown>(
prompt: string,
imageBase64: string,
mimeType?: string,
opts?: VisionOptions & JsonOptions<T>
): Promise<JsonResult<T>> {
const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
const model = opts?.visionModel ?? this.options.defaultVisionModel;
return this.jsonMessages<T>(messages, { ...opts, model });
}
// ---------------------------------------------------------------------------
// Embeddings
// ---------------------------------------------------------------------------
/** Generate embeddings for text input. */
async embed(
input: string | string[],
model?: string
): Promise<{ embeddings: number[][]; usage: TokenUsage }> {
const response = await retryFetch(
`${this.baseUrl}/v1/embeddings`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: model ?? this.options.defaultModel,
input,
}),
signal: createTimeoutSignal(this.options.timeout),
},
{ maxRetries: this.options.maxRetries }
);
if (!response.ok) {
const text = await response.text().catch(() => '');
throw new Error(`mana-llm embeddings error ${response.status}: ${text}`);
}
const data = (await response.json()) as EmbeddingResponse;
return {
embeddings: data.data.map((d) => d.embedding),
usage: data.usage,
};
}
// ---------------------------------------------------------------------------
// Health & Models
// ---------------------------------------------------------------------------
/** Check mana-llm health and provider status. */
async health(): Promise<HealthStatus> {
try {
const response = await fetch(`${this.baseUrl}/health`, {
signal: createTimeoutSignal(5_000),
});
if (!response.ok) {
return { status: 'unhealthy', providers: {} };
}
return (await response.json()) as HealthStatus;
} catch {
return { status: 'unhealthy', providers: {} };
}
}
/** List available models from all providers. */
async listModels(): Promise<ModelInfo[]> {
const response = await fetch(`${this.baseUrl}/v1/models`, {
signal: createTimeoutSignal(10_000),
});
if (!response.ok) {
throw new Error(`mana-llm models error ${response.status}`);
}
const data = (await response.json()) as { data: ModelInfo[] };
return data.data ?? [];
}
// ---------------------------------------------------------------------------
// Private helpers
// ---------------------------------------------------------------------------
private buildMessages(prompt: string, systemPrompt?: string): ChatMessage[] {
const messages: ChatMessage[] = [];
if (systemPrompt) {
messages.push({ role: 'system', content: systemPrompt });
}
messages.push({ role: 'user', content: prompt });
return messages;
}
private buildVisionMessages(
prompt: string,
imageBase64: string,
mimeType?: string,
systemPrompt?: string
): ChatMessage[] {
const mime = mimeType ?? 'image/jpeg';
const dataUrl = imageBase64.startsWith('data:')
? imageBase64
: `data:${mime};base64,${imageBase64}`;
const messages: ChatMessage[] = [];
if (systemPrompt) {
messages.push({ role: 'system', content: systemPrompt });
}
messages.push({
role: 'user',
content: [
{ type: 'text', text: prompt },
{ type: 'image_url', image_url: { url: dataUrl } },
],
});
return messages;
}
private buildRequest(
messages: ChatMessage[],
opts: ChatOptions | undefined,
stream: boolean
): ChatCompletionRequest {
const request: ChatCompletionRequest = {
model: opts?.model ?? this.options.defaultModel,
messages,
stream,
};
if (opts?.temperature !== undefined) request.temperature = opts.temperature;
if (opts?.maxTokens !== undefined) request.max_tokens = opts.maxTokens;
return request;
}
private async fetchCompletion(
body: ChatCompletionRequest,
timeoutOverride?: number
): Promise<ChatCompletionResponse> {
const timeout = timeoutOverride ?? this.options.timeout;
const response = await retryFetch(
`${this.baseUrl}/v1/chat/completions`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
signal: createTimeoutSignal(timeout),
},
{ maxRetries: this.options.maxRetries }
);
if (!response.ok) {
const text = await response.text().catch(() => '');
throw new Error(`mana-llm error ${response.status}: ${text}`);
}
return (await response.json()) as ChatCompletionResponse;
}
}

View file

@ -0,0 +1 @@
export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';

View file

@ -0,0 +1,80 @@
import { DynamicModule, Module, Global, Provider } from '@nestjs/common';
import type {
LlmModuleOptions,
LlmModuleAsyncOptions,
LlmOptionsFactory,
} from './interfaces/llm-options.interface';
import { LlmClientService } from './llm-client.service';
import { LLM_MODULE_OPTIONS } from './llm.constants';
@Global()
@Module({})
export class LlmModule {
static forRoot(options: LlmModuleOptions): DynamicModule {
return {
module: LlmModule,
providers: [
{
provide: LLM_MODULE_OPTIONS,
useValue: options,
},
LlmClientService,
],
exports: [LLM_MODULE_OPTIONS, LlmClientService],
};
}
static forRootAsync(options: LlmModuleAsyncOptions): DynamicModule {
const asyncProviders = this.createAsyncProviders(options);
return {
module: LlmModule,
imports: options.imports || [],
providers: [...asyncProviders, LlmClientService],
exports: [LLM_MODULE_OPTIONS, LlmClientService],
};
}
private static createAsyncProviders(options: LlmModuleAsyncOptions): Provider[] {
if (options.useFactory) {
return [
{
provide: LLM_MODULE_OPTIONS,
useFactory: options.useFactory,
inject: options.inject || [],
},
];
}
const useClass = options.useClass;
const useExisting = options.useExisting;
if (useClass) {
return [
{
provide: LLM_MODULE_OPTIONS,
useFactory: async (optionsFactory: LlmOptionsFactory) =>
await optionsFactory.createLlmOptions(),
inject: [useClass],
},
{
provide: useClass,
useClass,
},
];
}
if (useExisting) {
return [
{
provide: LLM_MODULE_OPTIONS,
useFactory: async (optionsFactory: LlmOptionsFactory) =>
await optionsFactory.createLlmOptions(),
inject: [useExisting],
},
];
}
return [];
}
}

View file

@ -0,0 +1,30 @@
/**
* Standalone exports for non-NestJS consumers (e.g. bot-services).
*
* Usage:
* import { LlmClient } from '@manacore/shared-llm/standalone';
* const llm = new LlmClient({ manaLlmUrl: 'http://localhost:3025' });
*/
export { LlmClient } from './llm-client';
export { resolveOptions } from './interfaces/llm-options.interface';
export type { LlmModuleOptions, ResolvedLlmOptions } from './interfaces/llm-options.interface';
// Types
export type {
ChatMessage,
ContentPart,
TextContentPart,
ImageContentPart,
ChatOptions,
JsonOptions,
VisionOptions,
TokenUsage,
ChatResult,
JsonResult,
ModelInfo,
HealthStatus,
} from './types';
// Utilities
export { extractJson } from './utils';

View file

@ -0,0 +1,100 @@
/**
* Core chat types for the LLM client.
* These are the high-level types that consumers interact with.
*/
// ---------------------------------------------------------------------------
// Messages
// ---------------------------------------------------------------------------
export interface TextContentPart {
type: 'text';
text: string;
}
export interface ImageContentPart {
type: 'image_url';
image_url: { url: string };
}
export type ContentPart = TextContentPart | ImageContentPart;
export interface ChatMessage {
role: 'system' | 'user' | 'assistant';
content: string | ContentPart[];
}
// ---------------------------------------------------------------------------
// Options
// ---------------------------------------------------------------------------
export interface ChatOptions {
/** Model to use (default from module config, e.g. "ollama/gemma3:4b") */
model?: string;
/** Sampling temperature 0.0-2.0 */
temperature?: number;
/** Max tokens to generate */
maxTokens?: number;
/** System prompt prepended to messages */
systemPrompt?: string;
/** Request timeout in ms (overrides module default) */
timeout?: number;
}
export interface JsonOptions<T = unknown> extends ChatOptions {
/** Validation function applied to parsed JSON. Should throw on invalid data. */
validate?: (data: unknown) => T;
/** Number of extraction retries on parse failure (default: 1) */
jsonRetries?: number;
}
export interface VisionOptions extends ChatOptions {
/** Vision model override (default from module config, e.g. "ollama/llava:7b") */
visionModel?: string;
}
// ---------------------------------------------------------------------------
// Results
// ---------------------------------------------------------------------------
export interface TokenUsage {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
}
export interface ChatResult {
/** Generated text content */
content: string;
/** Model that was actually used */
model: string;
/** Token usage statistics */
usage: TokenUsage;
/** Request latency in milliseconds */
latencyMs: number;
}
export interface JsonResult<T = unknown> extends ChatResult {
/** Parsed and optionally validated data */
data: T;
}
// ---------------------------------------------------------------------------
// Models
// ---------------------------------------------------------------------------
export interface ModelInfo {
id: string;
object: 'model';
created: number;
owned_by: string;
}
// ---------------------------------------------------------------------------
// Health
// ---------------------------------------------------------------------------
export interface HealthStatus {
status: 'healthy' | 'degraded' | 'unhealthy';
providers: Record<string, unknown>;
}

View file

@ -0,0 +1,26 @@
export type {
ChatMessage,
ContentPart,
TextContentPart,
ImageContentPart,
ChatOptions,
JsonOptions,
VisionOptions,
TokenUsage,
ChatResult,
JsonResult,
ModelInfo,
HealthStatus,
} from './chat.types';
export type {
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionChoice,
ChatCompletionStreamChunk,
StreamChoice,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingData,
ModelsListResponse,
} from './openai-compat.types';

View file

@ -0,0 +1,97 @@
/**
* OpenAI-compatible wire format types matching the mana-llm API contract.
* These are internal types used for HTTP communication - consumers should
* use the high-level types from chat.types.ts instead.
*/
import type { ChatMessage, TokenUsage } from './chat.types';
// ---------------------------------------------------------------------------
// Request (POST /v1/chat/completions)
// ---------------------------------------------------------------------------
export interface ChatCompletionRequest {
model: string;
messages: ChatMessage[];
stream?: boolean;
temperature?: number;
max_tokens?: number;
top_p?: number;
frequency_penalty?: number;
presence_penalty?: number;
stop?: string | string[];
}
// ---------------------------------------------------------------------------
// Response (non-streaming)
// ---------------------------------------------------------------------------
export interface ChatCompletionResponse {
id: string;
object: 'chat.completion';
created: number;
model: string;
choices: ChatCompletionChoice[];
usage: TokenUsage;
}
export interface ChatCompletionChoice {
index: number;
message: { role: 'assistant'; content: string };
finish_reason: 'stop' | 'length' | 'content_filter' | null;
}
// ---------------------------------------------------------------------------
// Response (streaming)
// ---------------------------------------------------------------------------
export interface ChatCompletionStreamChunk {
id: string;
object: 'chat.completion.chunk';
created: number;
model: string;
choices: StreamChoice[];
}
export interface StreamChoice {
index: number;
delta: { role?: 'assistant'; content?: string };
finish_reason: string | null;
}
// ---------------------------------------------------------------------------
// Embeddings
// ---------------------------------------------------------------------------
export interface EmbeddingRequest {
model: string;
input: string | string[];
encoding_format?: 'float' | 'base64';
}
export interface EmbeddingResponse {
object: 'list';
data: EmbeddingData[];
model: string;
usage: TokenUsage;
}
export interface EmbeddingData {
object: 'embedding';
index: number;
embedding: number[];
}
// ---------------------------------------------------------------------------
// Models (GET /v1/models)
// ---------------------------------------------------------------------------
export interface ModelsListResponse {
object: 'list';
data: Array<{
id: string;
object: 'model';
created: number;
owned_by: string;
}>;
}

View file

@ -0,0 +1,3 @@
export { extractJson } from './json-extractor';
export { retryFetch } from './retry';
export type { RetryOptions } from './retry';

View file

@ -0,0 +1,94 @@
/**
* Extract and parse JSON from LLM responses.
*
* LLMs often wrap JSON in markdown code fences or include extra text.
* This utility handles all common patterns:
* 1. Direct JSON parse
* 2. Markdown ```json ... ``` fences
* 3. First { ... } or [ ... ] block in text
*/
export function extractJson<T = unknown>(text: string, validate?: (data: unknown) => T): T {
const trimmed = text.trim();
// Step 1: Try direct parse
const direct = tryParse<T>(trimmed, validate);
if (direct !== undefined) return direct;
// Step 2: Strip markdown code fences
const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
if (fenceMatch) {
const fenced = tryParse<T>(fenceMatch[1].trim(), validate);
if (fenced !== undefined) return fenced;
}
// Step 3: Find first JSON object
const objectStart = trimmed.indexOf('{');
if (objectStart !== -1) {
const objectStr = extractBalanced(trimmed, objectStart, '{', '}');
if (objectStr) {
const obj = tryParse<T>(objectStr, validate);
if (obj !== undefined) return obj;
}
}
// Step 4: Find first JSON array
const arrayStart = trimmed.indexOf('[');
if (arrayStart !== -1) {
const arrayStr = extractBalanced(trimmed, arrayStart, '[', ']');
if (arrayStr) {
const arr = tryParse<T>(arrayStr, validate);
if (arr !== undefined) return arr;
}
}
throw new Error(`Failed to extract JSON from LLM response: ${trimmed.slice(0, 200)}...`);
}
function tryParse<T>(text: string, validate?: (data: unknown) => T): T | undefined {
try {
const parsed = JSON.parse(text);
return validate ? validate(parsed) : parsed;
} catch {
return undefined;
}
}
/**
* Extract a balanced block starting from the given position.
* Handles nested braces/brackets but not strings with escaped delimiters.
*/
function extractBalanced(text: string, start: number, open: string, close: string): string | null {
let depth = 0;
let inString = false;
let escape = false;
for (let i = start; i < text.length; i++) {
const ch = text[i];
if (escape) {
escape = false;
continue;
}
if (ch === '\\') {
escape = true;
continue;
}
if (ch === '"') {
inString = !inString;
continue;
}
if (inString) continue;
if (ch === open) depth++;
if (ch === close) depth--;
if (depth === 0) {
return text.slice(start, i + 1);
}
}
return null;
}

View file

@ -0,0 +1,51 @@
/**
* Fetch wrapper with exponential backoff retry for transient failures.
*
* Retries on: 429 (rate limit), 502, 503, 504 (server errors), network errors.
* Does NOT retry on: 400, 401, 403, 404 (client errors).
*/
const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]);
export interface RetryOptions {
maxRetries: number;
/** Base delay in ms (doubles each retry). Default: 200 */
baseDelay?: number;
}
export async function retryFetch(
url: string,
init: RequestInit,
options: RetryOptions
): Promise<Response> {
const { maxRetries, baseDelay = 200 } = options;
let lastError: Error | undefined;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const response = await fetch(url, init);
if (response.ok || !RETRYABLE_STATUS_CODES.has(response.status)) {
return response;
}
// Retryable status code
lastError = new Error(`HTTP ${response.status}: ${response.statusText}`);
} catch (error) {
// Network error (connection refused, timeout, etc.)
lastError = error instanceof Error ? error : new Error(String(error));
}
// Don't sleep after the last attempt
if (attempt < maxRetries) {
const delay = baseDelay * Math.pow(2, attempt);
await sleep(delay);
}
}
throw lastError ?? new Error('retryFetch exhausted all retries');
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}