feat: add unified @manacore/shared-llm package and migrate all backends

Create a shared LLM client package that provides a unified interface to the mana-llm service, replacing 9 individual fetch-based integrations with consistent error handling, retry logic, and JSON extraction. Package (@manacore/shared-llm): - LlmModule with forRoot/forRootAsync (NestJS dynamic module) - LlmClientService: chat, json, vision, visionJson, embed, stream - LlmClient standalone class for non-NestJS consumers - extractJson utility (consolidates 3 markdown-stripping implementations) - retryFetch with exponential backoff (429, 5xx, network errors) - 44 unit tests (json-extractor, retry, llm-client) Migrated backends: - mana-core-auth: raw fetch → llm.json() - planta: raw fetch + vision → llm.visionJson() - nutriphi: raw fetch + regex → llm.visionJson() + llm.json() - chat: custom OllamaService (175 LOC) → llm.chatMessages() - context: raw fetch → llm.chat() (keeps token tracking) - traces: 2x raw fetch → llm.chat() - manadeck: @google/genai SDK → llm.json() + llm.visionJson() - bot-services: raw Ollama API → LlmClient standalone - matrix-ollama-bot: raw fetch → llm.chatMessages() + llm.vision() New credit operations: - AI_PLANT_ANALYSIS (2 credits, planta) - AI_GUIDE_GENERATION (5 credits, traces) - AI_CONTEXT_GENERATION (2 credits, context) - AI_BOT_CHAT (0.1 credits, matrix) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-23 19:06:42 +02:00 · 2026-03-23 22:06:30 +01:00 · 2026-03-23 22:06:30 +01:00 · e2f144962c
commit e2f144962c
parent e7bf58c5b6
48 changed files with 2476 additions and 1297 deletions
--- a/packages/shared-llm/src/tests/json-extractor.spec.ts
+++ b/packages/shared-llm/src/tests/json-extractor.spec.ts
@ -0,0 +1,119 @@
+import { describe, it, expect } from 'vitest';
+import { extractJson } from '../utils/json-extractor';
+
+describe('extractJson', () => {
+	it('parses direct JSON object', () => {
+		const result = extractJson('{"name": "test", "value": 42}');
+		expect(result).toEqual({ name: 'test', value: 42 });
+	});
+
+	it('parses direct JSON array', () => {
+		const result = extractJson('[1, 2, 3]');
+		expect(result).toEqual([1, 2, 3]);
+	});
+
+	it('strips markdown json code fence', () => {
+		const input = '```json\n{"category": "bug", "title": "Fix login"}\n```';
+		const result = extractJson(input);
+		expect(result).toEqual({ category: 'bug', title: 'Fix login' });
+	});
+
+	it('strips markdown code fence without json label', () => {
+		const input = '```\n{"key": "value"}\n```';
+		const result = extractJson(input);
+		expect(result).toEqual({ key: 'value' });
+	});
+
+	it('extracts JSON from surrounding text', () => {
+		const input =
+			'Here is the analysis:\n{"confidence": 0.95, "species": "Rose"}\nHope this helps!';
+		const result = extractJson(input);
+		expect(result).toEqual({ confidence: 0.95, species: 'Rose' });
+	});
+
+	it('extracts JSON array from surrounding text', () => {
+		const input = 'The items are: [1, 2, 3] as requested.';
+		const result = extractJson(input);
+		expect(result).toEqual([1, 2, 3]);
+	});
+
+	it('handles nested JSON objects', () => {
+		const input = '{"outer": {"inner": {"deep": true}}, "list": [1, 2]}';
+		const result = extractJson(input);
+		expect(result).toEqual({ outer: { inner: { deep: true } }, list: [1, 2] });
+	});
+
+	it('handles JSON with escaped quotes in strings', () => {
+		const input = '{"text": "He said \\"hello\\""}';
+		const result = extractJson(input);
+		expect(result).toEqual({ text: 'He said "hello"' });
+	});
+
+	it('handles JSON with braces inside strings', () => {
+		const input = 'Result: {"code": "if (x) { return }"}';
+		const result = extractJson(input);
+		expect(result).toEqual({ code: 'if (x) { return }' });
+	});
+
+	it('trims whitespace before parsing', () => {
+		const input = '  \n  {"key": "value"}  \n  ';
+		const result = extractJson(input);
+		expect(result).toEqual({ key: 'value' });
+	});
+
+	it('applies validation function on success', () => {
+		const validate = (data: unknown) => {
+			const obj = data as { name: string };
+			if (!obj.name) throw new Error('missing name');
+			return obj;
+		};
+		const result = extractJson('{"name": "test"}', validate);
+		expect(result).toEqual({ name: 'test' });
+	});
+
+	it('throws when validation fails', () => {
+		const validate = (data: unknown) => {
+			const obj = data as { name?: string };
+			if (!obj.name) throw new Error('missing name');
+			return obj;
+		};
+		expect(() => extractJson('{"value": 123}', validate)).toThrow();
+	});
+
+	it('throws on completely invalid input', () => {
+		expect(() => extractJson('This is just plain text with no JSON')).toThrow(
+			'Failed to extract JSON'
+		);
+	});
+
+	it('throws on empty input', () => {
+		expect(() => extractJson('')).toThrow('Failed to extract JSON');
+	});
+
+	it('handles real-world LLM response with preamble', () => {
+		const input = `Based on my analysis, here is the result:
+
+\`\`\`json
+{
+  "foods": [
+    {"name": "Apple", "calories": 95, "protein": 0.5}
+  ],
+  "totalCalories": 95,
+  "confidence": 0.9
+}
+\`\`\`
+
+This analysis is based on the image provided.`;
+
+		const result = extractJson<{ foods: unknown[]; totalCalories: number }>(input);
+		expect(result.totalCalories).toBe(95);
+		expect(result.foods).toHaveLength(1);
+	});
+
+	it('prefers object over array when both exist', () => {
+		// Direct parse fails, fence fails, tries object first
+		const input = 'Some text {"key": "val"} and [1, 2, 3]';
+		const result = extractJson(input);
+		expect(result).toEqual({ key: 'val' });
+	});
+});
--- a/packages/shared-llm/src/tests/llm-client.spec.ts
+++ b/packages/shared-llm/src/tests/llm-client.spec.ts
@ -0,0 +1,277 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { LlmClient } from '../llm-client';
+import type { ResolvedLlmOptions } from '../interfaces/llm-options.interface';
+import type { ChatCompletionResponse } from '../types/openai-compat.types';
+
+const mockFetch = vi.fn();
+vi.stubGlobal('fetch', mockFetch);
+
+const DEFAULT_OPTIONS: ResolvedLlmOptions = {
+	manaLlmUrl: 'http://localhost:3025',
+	defaultModel: 'ollama/gemma3:4b',
+	defaultVisionModel: 'ollama/llava:7b',
+	timeout: 30_000,
+	maxRetries: 0, // No retries in tests for simplicity
+	debug: false,
+};
+
+function mockCompletionResponse(
+	content: string,
+	model = 'ollama/gemma3:4b'
+): ChatCompletionResponse {
+	return {
+		id: 'chatcmpl-test123',
+		object: 'chat.completion',
+		created: Date.now(),
+		model,
+		choices: [{ index: 0, message: { role: 'assistant', content }, finish_reason: 'stop' }],
+		usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
+	};
+}
+
+function mockFetchOk(body: unknown): void {
+	mockFetch.mockResolvedValueOnce({
+		ok: true,
+		status: 200,
+		json: () => Promise.resolve(body),
+		text: () => Promise.resolve(JSON.stringify(body)),
+	} as unknown as Response);
+}
+
+function mockFetchError(status: number, body = ''): void {
+	mockFetch.mockResolvedValueOnce({
+		ok: false,
+		status,
+		statusText: `Error ${status}`,
+		json: () => Promise.resolve({}),
+		text: () => Promise.resolve(body),
+	} as unknown as Response);
+}
+
+describe('LlmClient', () => {
+	let client: LlmClient;
+
+	beforeEach(() => {
+		vi.clearAllMocks();
+		client = new LlmClient(DEFAULT_OPTIONS);
+	});
+
+	describe('chat', () => {
+		it('sends correct request body', async () => {
+			mockFetchOk(mockCompletionResponse('Hello!'));
+
+			await client.chat('Hi there');
+
+			expect(mockFetch).toHaveBeenCalledTimes(1);
+			const [url, init] = mockFetch.mock.calls[0];
+			expect(url).toBe('http://localhost:3025/v1/chat/completions');
+
+			const body = JSON.parse(init.body);
+			expect(body.model).toBe('ollama/gemma3:4b');
+			expect(body.messages).toEqual([{ role: 'user', content: 'Hi there' }]);
+			expect(body.stream).toBe(false);
+		});
+
+		it('includes system prompt when provided', async () => {
+			mockFetchOk(mockCompletionResponse('Response'));
+
+			await client.chat('Question', { systemPrompt: 'You are helpful.' });
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.messages).toEqual([
+				{ role: 'system', content: 'You are helpful.' },
+				{ role: 'user', content: 'Question' },
+			]);
+		});
+
+		it('uses custom model and temperature', async () => {
+			mockFetchOk(mockCompletionResponse('Response'));
+
+			await client.chat('Prompt', { model: 'openrouter/gpt-4o', temperature: 0.3 });
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.model).toBe('openrouter/gpt-4o');
+			expect(body.temperature).toBe(0.3);
+		});
+
+		it('returns ChatResult with content and usage', async () => {
+			mockFetchOk(mockCompletionResponse('Generated text'));
+
+			const result = await client.chat('Prompt');
+
+			expect(result.content).toBe('Generated text');
+			expect(result.model).toBe('ollama/gemma3:4b');
+			expect(result.usage.total_tokens).toBe(30);
+			expect(result.latencyMs).toBeGreaterThanOrEqual(0);
+		});
+
+		it('throws on error response', async () => {
+			mockFetchError(500, 'Internal Server Error');
+
+			await expect(client.chat('Prompt')).rejects.toThrow('mana-llm error 500');
+		});
+	});
+
+	describe('json', () => {
+		it('extracts JSON from response', async () => {
+			mockFetchOk(mockCompletionResponse('{"category": "bug", "title": "Fix it"}'));
+
+			const result = await client.json<{ category: string; title: string }>('Analyze this');
+
+			expect(result.data).toEqual({ category: 'bug', title: 'Fix it' });
+			expect(result.content).toBe('{"category": "bug", "title": "Fix it"}');
+		});
+
+		it('extracts JSON from markdown-wrapped response', async () => {
+			mockFetchOk(mockCompletionResponse('```json\n{"key": "value"}\n```'));
+
+			const result = await client.json('Parse this');
+			expect(result.data).toEqual({ key: 'value' });
+		});
+
+		it('applies validation function', async () => {
+			mockFetchOk(mockCompletionResponse('{"name": "test"}'));
+
+			const validate = (data: unknown) => {
+				const obj = data as { name: string };
+				if (typeof obj.name !== 'string') throw new Error('invalid');
+				return obj;
+			};
+
+			const result = await client.json('Prompt', { validate });
+			expect(result.data.name).toBe('test');
+		});
+
+		it('retries JSON extraction on parse failure', async () => {
+			// First attempt returns bad JSON, second returns good
+			mockFetchOk(mockCompletionResponse('not json at all'));
+			mockFetchOk(mockCompletionResponse('{"valid": true}'));
+
+			const result = await client.json('Prompt', { jsonRetries: 1 });
+			expect(result.data).toEqual({ valid: true });
+			expect(mockFetch).toHaveBeenCalledTimes(2);
+		});
+	});
+
+	describe('vision', () => {
+		it('builds multimodal message with base64 image', async () => {
+			mockFetchOk(mockCompletionResponse('A rose'));
+
+			await client.vision('What is this?', 'abc123base64', 'image/jpeg');
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.model).toBe('ollama/llava:7b');
+			expect(body.messages[0].content).toEqual([
+				{ type: 'text', text: 'What is this?' },
+				{ type: 'image_url', image_url: { url: 'data:image/jpeg;base64,abc123base64' } },
+			]);
+		});
+
+		it('uses data URL as-is if already formatted', async () => {
+			mockFetchOk(mockCompletionResponse('A cat'));
+
+			await client.vision('What?', 'data:image/png;base64,xyz');
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			const imageUrl = body.messages[0].content[1].image_url.url;
+			expect(imageUrl).toBe('data:image/png;base64,xyz');
+		});
+
+		it('uses custom vision model when specified', async () => {
+			mockFetchOk(mockCompletionResponse('Result'));
+
+			await client.vision('Prompt', 'img', 'image/jpeg', {
+				visionModel: 'ollama/qwen3-vl:4b',
+			});
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.model).toBe('ollama/qwen3-vl:4b');
+		});
+	});
+
+	describe('visionJson', () => {
+		it('extracts JSON from vision response', async () => {
+			mockFetchOk(mockCompletionResponse('```json\n{"species": "Rose", "confidence": 0.95}\n```'));
+
+			const result = await client.visionJson<{ species: string }>(
+				'Identify plant',
+				'imgdata',
+				'image/jpeg'
+			);
+
+			expect(result.data.species).toBe('Rose');
+		});
+	});
+
+	describe('health', () => {
+		it('returns health status', async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				status: 200,
+				json: () =>
+					Promise.resolve({
+						status: 'healthy',
+						providers: { ollama: { status: 'healthy' } },
+					}),
+			} as unknown as Response);
+
+			const health = await client.health();
+			expect(health.status).toBe('healthy');
+		});
+
+		it('returns unhealthy on network error', async () => {
+			mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
+
+			const health = await client.health();
+			expect(health.status).toBe('unhealthy');
+		});
+	});
+
+	describe('listModels', () => {
+		it('returns model list', async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				status: 200,
+				json: () =>
+					Promise.resolve({
+						data: [{ id: 'ollama/gemma3:4b', object: 'model', created: 0, owned_by: 'ollama' }],
+					}),
+			} as unknown as Response);
+
+			const models = await client.listModels();
+			expect(models).toHaveLength(1);
+			expect(models[0].id).toBe('ollama/gemma3:4b');
+		});
+	});
+
+	describe('chatMessages', () => {
+		it('sends full message history', async () => {
+			mockFetchOk(mockCompletionResponse('Answer'));
+
+			await client.chatMessages([
+				{ role: 'system', content: 'Be brief.' },
+				{ role: 'user', content: 'Hello' },
+				{ role: 'assistant', content: 'Hi!' },
+				{ role: 'user', content: 'How are you?' },
+			]);
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.messages).toHaveLength(4);
+		});
+	});
+
+	describe('embed', () => {
+		it('sends embedding request', async () => {
+			mockFetchOk({
+				object: 'list',
+				data: [{ object: 'embedding', index: 0, embedding: [0.1, 0.2, 0.3] }],
+				model: 'ollama/gemma3:4b',
+				usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 },
+			});
+
+			const result = await client.embed('Hello world');
+			expect(result.embeddings).toHaveLength(1);
+			expect(result.embeddings[0]).toEqual([0.1, 0.2, 0.3]);
+		});
+	});
+});
--- a/packages/shared-llm/src/tests/retry.spec.ts
+++ b/packages/shared-llm/src/tests/retry.spec.ts
@ -0,0 +1,118 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { retryFetch } from '../utils/retry';
+
+// Mock global fetch
+const mockFetch = vi.fn();
+vi.stubGlobal('fetch', mockFetch);
+
+function mockResponse(status: number, body = ''): Response {
+	return {
+		ok: status >= 200 && status < 300,
+		status,
+		statusText: `Status ${status}`,
+		text: () => Promise.resolve(body),
+		json: () => Promise.resolve(JSON.parse(body || '{}')),
+		headers: new Headers(),
+	} as unknown as Response;
+}
+
+describe('retryFetch', () => {
+	beforeEach(() => {
+		vi.clearAllMocks();
+	});
+
+	it('returns on first successful attempt', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(200, '{"ok": true}'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.ok).toBe(true);
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+
+	it('retries on 503 and succeeds', async () => {
+		mockFetch
+			.mockResolvedValueOnce(mockResponse(503))
+			.mockResolvedValueOnce(mockResponse(200, '{}'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.ok).toBe(true);
+		expect(mockFetch).toHaveBeenCalledTimes(2);
+	});
+
+	it('retries on 429 rate limit', async () => {
+		mockFetch
+			.mockResolvedValueOnce(mockResponse(429))
+			.mockResolvedValueOnce(mockResponse(200, '{}'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.ok).toBe(true);
+		expect(mockFetch).toHaveBeenCalledTimes(2);
+	});
+
+	it('retries on network error and succeeds', async () => {
+		mockFetch
+			.mockRejectedValueOnce(new Error('ECONNREFUSED'))
+			.mockResolvedValueOnce(mockResponse(200, '{}'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.ok).toBe(true);
+		expect(mockFetch).toHaveBeenCalledTimes(2);
+	});
+
+	it('does NOT retry on 400 client error', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(400, 'Bad Request'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.status).toBe(400);
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+
+	it('does NOT retry on 401 unauthorized', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(401));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.status).toBe(401);
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+
+	it('does NOT retry on 404 not found', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(404));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.status).toBe(404);
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+
+	it('throws after exhausting all retries', async () => {
+		mockFetch
+			.mockResolvedValueOnce(mockResponse(503))
+			.mockResolvedValueOnce(mockResponse(503))
+			.mockResolvedValueOnce(mockResponse(503));
+
+		await expect(retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 })).rejects.toThrow(
+			'HTTP 503'
+		);
+
+		expect(mockFetch).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
+	});
+
+	it('throws after exhausting retries on network errors', async () => {
+		mockFetch
+			.mockRejectedValueOnce(new Error('ECONNREFUSED'))
+			.mockRejectedValueOnce(new Error('ECONNREFUSED'));
+
+		await expect(retryFetch('http://test', {}, { maxRetries: 1, baseDelay: 10 })).rejects.toThrow(
+			'ECONNREFUSED'
+		);
+
+		expect(mockFetch).toHaveBeenCalledTimes(2);
+	});
+
+	it('works with maxRetries: 0 (no retries)', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(503));
+
+		await expect(retryFetch('http://test', {}, { maxRetries: 0, baseDelay: 10 })).rejects.toThrow();
+
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+});
--- a/packages/shared-llm/src/index.ts
+++ b/packages/shared-llm/src/index.ts
@ -0,0 +1,35 @@
+// Module
+export { LlmModule } from './llm.module';
+export { LlmClientService } from './llm-client.service';
+export { LLM_MODULE_OPTIONS } from './llm.constants';
+
+// Core client (for advanced use cases)
+export { LlmClient } from './llm-client';
+
+// Interfaces
+export type {
+	LlmModuleOptions,
+	LlmModuleAsyncOptions,
+	LlmOptionsFactory,
+	ResolvedLlmOptions,
+} from './interfaces';
+export { resolveOptions } from './interfaces';
+
+// Types
+export type {
+	ChatMessage,
+	ContentPart,
+	TextContentPart,
+	ImageContentPart,
+	ChatOptions,
+	JsonOptions,
+	VisionOptions,
+	TokenUsage,
+	ChatResult,
+	JsonResult,
+	ModelInfo,
+	HealthStatus,
+} from './types';
+
+// Utilities
+export { extractJson } from './utils';
--- a/packages/shared-llm/src/interfaces/index.ts
+++ b/packages/shared-llm/src/interfaces/index.ts
@ -0,0 +1,8 @@
+export type {
+	LlmModuleOptions,
+	LlmModuleAsyncOptions,
+	LlmOptionsFactory,
+	ResolvedLlmOptions,
+} from './llm-options.interface';
+
+export { resolveOptions } from './llm-options.interface';
--- a/packages/shared-llm/src/interfaces/llm-options.interface.ts
+++ b/packages/shared-llm/src/interfaces/llm-options.interface.ts
@ -0,0 +1,47 @@
+import type { ModuleMetadata, Type } from '@nestjs/common';
+
+export interface LlmModuleOptions {
+	/** mana-llm service URL (default: http://localhost:3025) */
+	manaLlmUrl?: string;
+	/** Default text model (default: ollama/gemma3:4b) */
+	defaultModel?: string;
+	/** Default vision model (default: ollama/llava:7b) */
+	defaultVisionModel?: string;
+	/** Request timeout in ms (default: 120000) */
+	timeout?: number;
+	/** Max retries on transient failures (default: 2) */
+	maxRetries?: number;
+	/** Enable debug logging (default: false) */
+	debug?: boolean;
+}
+
+export interface LlmModuleAsyncOptions extends Pick<ModuleMetadata, 'imports'> {
+	useExisting?: Type<LlmOptionsFactory>;
+	useClass?: Type<LlmOptionsFactory>;
+	useFactory?: (...args: any[]) => Promise<LlmModuleOptions> | LlmModuleOptions;
+	inject?: any[];
+}
+
+export interface LlmOptionsFactory {
+	createLlmOptions(): Promise<LlmModuleOptions> | LlmModuleOptions;
+}
+
+export interface ResolvedLlmOptions {
+	manaLlmUrl: string;
+	defaultModel: string;
+	defaultVisionModel: string;
+	timeout: number;
+	maxRetries: number;
+	debug: boolean;
+}
+
+export function resolveOptions(options: LlmModuleOptions): ResolvedLlmOptions {
+	return {
+		manaLlmUrl: options.manaLlmUrl ?? 'http://localhost:3025',
+		defaultModel: options.defaultModel ?? 'ollama/gemma3:4b',
+		defaultVisionModel: options.defaultVisionModel ?? 'ollama/llava:7b',
+		timeout: options.timeout ?? 120_000,
+		maxRetries: options.maxRetries ?? 2,
+		debug: options.debug ?? false,
+	};
+}
--- a/packages/shared-llm/src/llm-client.service.ts
+++ b/packages/shared-llm/src/llm-client.service.ts
@ -0,0 +1,16 @@
+import { Inject, Injectable } from '@nestjs/common';
+import { LlmClient } from './llm-client';
+import { LLM_MODULE_OPTIONS } from './llm.constants';
+import type { LlmModuleOptions } from './interfaces/llm-options.interface';
+import { resolveOptions } from './interfaces/llm-options.interface';
+
+/**
+ * NestJS injectable wrapper around LlmClient.
+ * All logic lives in the framework-agnostic LlmClient base class.
+ */
+@Injectable()
+export class LlmClientService extends LlmClient {
+	constructor(@Inject(LLM_MODULE_OPTIONS) options: LlmModuleOptions) {
+		super(resolveOptions(options));
+	}
+}
--- a/packages/shared-llm/src/llm-client.ts
+++ b/packages/shared-llm/src/llm-client.ts
@ -0,0 +1,350 @@
+/**
+ * Framework-agnostic LLM client that communicates with the mana-llm service.
+ *
+ * This is the core implementation shared between the NestJS LlmClientService
+ * and the standalone LlmClient export (for non-NestJS consumers like bot-services).
+ */
+
+import type { ResolvedLlmOptions } from './interfaces/llm-options.interface';
+import type {
+	ChatMessage,
+	ChatOptions,
+	ChatResult,
+	JsonOptions,
+	JsonResult,
+	VisionOptions,
+	TokenUsage,
+	ModelInfo,
+	HealthStatus,
+} from './types/chat.types';
+import type {
+	ChatCompletionRequest,
+	ChatCompletionResponse,
+	EmbeddingResponse,
+} from './types/openai-compat.types';
+import { extractJson } from './utils/json-extractor';
+import { retryFetch } from './utils/retry';
+
+function createTimeoutSignal(ms: number): any {
+	const controller = new AbortController();
+	setTimeout(() => controller.abort(), ms);
+	return controller.signal;
+}
+
+export class LlmClient {
+	private readonly baseUrl: string;
+	private readonly options: ResolvedLlmOptions;
+
+	constructor(options: ResolvedLlmOptions) {
+		this.options = options;
+		this.baseUrl = options.manaLlmUrl.replace(/\/+$/, '');
+	}
+
+	// ---------------------------------------------------------------------------
+	// Text Chat
+	// ---------------------------------------------------------------------------
+
+	/** Simple chat with a single prompt string. */
+	async chat(prompt: string, opts?: ChatOptions): Promise<ChatResult> {
+		const messages = this.buildMessages(prompt, opts?.systemPrompt);
+		return this.chatMessages(messages, opts);
+	}
+
+	/** Chat with full message history. */
+	async chatMessages(messages: ChatMessage[], opts?: ChatOptions): Promise<ChatResult> {
+		const body = this.buildRequest(messages, opts, false);
+		const start = Date.now();
+		const response = await this.fetchCompletion(body, opts?.timeout);
+		const latencyMs = Date.now() - start;
+
+		return {
+			content: response.choices[0]?.message?.content ?? '',
+			model: response.model,
+			usage: response.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
+			latencyMs,
+		};
+	}
+
+	// ---------------------------------------------------------------------------
+	// Streaming
+	// ---------------------------------------------------------------------------
+
+	/** Streaming chat - returns an async iterable of text tokens. */
+	async *chatStream(prompt: string, opts?: ChatOptions): AsyncIterable<string> {
+		const messages = this.buildMessages(prompt, opts?.systemPrompt);
+		yield* this.chatStreamMessages(messages, opts);
+	}
+
+	/** Streaming chat with full message history. */
+	async *chatStreamMessages(messages: ChatMessage[], opts?: ChatOptions): AsyncIterable<string> {
+		const body = this.buildRequest(messages, opts, true);
+		const timeout = opts?.timeout ?? this.options.timeout;
+
+		const response = await retryFetch(
+			`${this.baseUrl}/v1/chat/completions`,
+			{
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify(body),
+				signal: createTimeoutSignal(timeout),
+			},
+			{ maxRetries: this.options.maxRetries }
+		);
+
+		if (!response.ok) {
+			const text = await response.text().catch(() => '');
+			throw new Error(`mana-llm stream error ${response.status}: ${text}`);
+		}
+
+		if (!response.body) {
+			throw new Error('mana-llm returned no response body for stream');
+		}
+
+		const reader = response.body.getReader();
+		const decoder = new TextDecoder();
+		let buffer = '';
+
+		try {
+			while (true) {
+				const { done, value } = await reader.read();
+				if (done) break;
+
+				buffer += decoder.decode(value, { stream: true });
+				const lines = buffer.split('\n');
+				buffer = lines.pop() ?? '';
+
+				for (const line of lines) {
+					const trimmed = line.trim();
+					if (!trimmed || !trimmed.startsWith('data: ')) continue;
+
+					const data = trimmed.slice(6);
+					if (data === '[DONE]') return;
+
+					try {
+						const chunk = JSON.parse(data);
+						const content = chunk.choices?.[0]?.delta?.content;
+						if (content) yield content;
+					} catch {
+						// Skip unparseable chunks
+					}
+				}
+			}
+		} finally {
+			reader.releaseLock();
+		}
+	}
+
+	// ---------------------------------------------------------------------------
+	// Structured JSON Output
+	// ---------------------------------------------------------------------------
+
+	/** Chat that extracts and parses JSON from the response. */
+	async json<T = unknown>(prompt: string, opts?: JsonOptions<T>): Promise<JsonResult<T>> {
+		const messages = this.buildMessages(prompt, opts?.systemPrompt);
+		return this.jsonMessages<T>(messages, opts);
+	}
+
+	/** JSON extraction from full message history. */
+	async jsonMessages<T = unknown>(
+		messages: ChatMessage[],
+		opts?: JsonOptions<T>
+	): Promise<JsonResult<T>> {
+		const maxAttempts = (opts?.jsonRetries ?? 1) + 1;
+		let lastError: Error | undefined;
+
+		for (let attempt = 0; attempt < maxAttempts; attempt++) {
+			const result = await this.chatMessages(messages, opts);
+
+			try {
+				const data = extractJson<T>(result.content, opts?.validate);
+				return { ...result, data };
+			} catch (error) {
+				lastError = error instanceof Error ? error : new Error(String(error));
+				if (this.options.debug) {
+					console.warn(
+						`[shared-llm] JSON extraction attempt ${attempt + 1}/${maxAttempts} failed:`,
+						lastError.message
+					);
+				}
+			}
+		}
+
+		throw lastError ?? new Error('JSON extraction failed');
+	}
+
+	// ---------------------------------------------------------------------------
+	// Vision
+	// ---------------------------------------------------------------------------
+
+	/** Analyze an image with a text prompt. */
+	async vision(
+		prompt: string,
+		imageBase64: string,
+		mimeType?: string,
+		opts?: VisionOptions
+	): Promise<ChatResult> {
+		const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
+		const model = opts?.visionModel ?? this.options.defaultVisionModel;
+		return this.chatMessages(messages, { ...opts, model });
+	}
+
+	/** Vision + JSON extraction. */
+	async visionJson<T = unknown>(
+		prompt: string,
+		imageBase64: string,
+		mimeType?: string,
+		opts?: VisionOptions & JsonOptions<T>
+	): Promise<JsonResult<T>> {
+		const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
+		const model = opts?.visionModel ?? this.options.defaultVisionModel;
+		return this.jsonMessages<T>(messages, { ...opts, model });
+	}
+
+	// ---------------------------------------------------------------------------
+	// Embeddings
+	// ---------------------------------------------------------------------------
+
+	/** Generate embeddings for text input. */
+	async embed(
+		input: string | string[],
+		model?: string
+	): Promise<{ embeddings: number[][]; usage: TokenUsage }> {
+		const response = await retryFetch(
+			`${this.baseUrl}/v1/embeddings`,
+			{
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify({
+					model: model ?? this.options.defaultModel,
+					input,
+				}),
+				signal: createTimeoutSignal(this.options.timeout),
+			},
+			{ maxRetries: this.options.maxRetries }
+		);
+
+		if (!response.ok) {
+			const text = await response.text().catch(() => '');
+			throw new Error(`mana-llm embeddings error ${response.status}: ${text}`);
+		}
+
+		const data = (await response.json()) as EmbeddingResponse;
+		return {
+			embeddings: data.data.map((d) => d.embedding),
+			usage: data.usage,
+		};
+	}
+
+	// ---------------------------------------------------------------------------
+	// Health & Models
+	// ---------------------------------------------------------------------------
+
+	/** Check mana-llm health and provider status. */
+	async health(): Promise<HealthStatus> {
+		try {
+			const response = await fetch(`${this.baseUrl}/health`, {
+				signal: createTimeoutSignal(5_000),
+			});
+			if (!response.ok) {
+				return { status: 'unhealthy', providers: {} };
+			}
+			return (await response.json()) as HealthStatus;
+		} catch {
+			return { status: 'unhealthy', providers: {} };
+		}
+	}
+
+	/** List available models from all providers. */
+	async listModels(): Promise<ModelInfo[]> {
+		const response = await fetch(`${this.baseUrl}/v1/models`, {
+			signal: createTimeoutSignal(10_000),
+		});
+
+		if (!response.ok) {
+			throw new Error(`mana-llm models error ${response.status}`);
+		}
+
+		const data = (await response.json()) as { data: ModelInfo[] };
+		return data.data ?? [];
+	}
+
+	// ---------------------------------------------------------------------------
+	// Private helpers
+	// ---------------------------------------------------------------------------
+
+	private buildMessages(prompt: string, systemPrompt?: string): ChatMessage[] {
+		const messages: ChatMessage[] = [];
+		if (systemPrompt) {
+			messages.push({ role: 'system', content: systemPrompt });
+		}
+		messages.push({ role: 'user', content: prompt });
+		return messages;
+	}
+
+	private buildVisionMessages(
+		prompt: string,
+		imageBase64: string,
+		mimeType?: string,
+		systemPrompt?: string
+	): ChatMessage[] {
+		const mime = mimeType ?? 'image/jpeg';
+		const dataUrl = imageBase64.startsWith('data:')
+			? imageBase64
+			: `data:${mime};base64,${imageBase64}`;
+
+		const messages: ChatMessage[] = [];
+		if (systemPrompt) {
+			messages.push({ role: 'system', content: systemPrompt });
+		}
+		messages.push({
+			role: 'user',
+			content: [
+				{ type: 'text', text: prompt },
+				{ type: 'image_url', image_url: { url: dataUrl } },
+			],
+		});
+		return messages;
+	}
+
+	private buildRequest(
+		messages: ChatMessage[],
+		opts: ChatOptions | undefined,
+		stream: boolean
+	): ChatCompletionRequest {
+		const request: ChatCompletionRequest = {
+			model: opts?.model ?? this.options.defaultModel,
+			messages,
+			stream,
+		};
+
+		if (opts?.temperature !== undefined) request.temperature = opts.temperature;
+		if (opts?.maxTokens !== undefined) request.max_tokens = opts.maxTokens;
+
+		return request;
+	}
+
+	private async fetchCompletion(
+		body: ChatCompletionRequest,
+		timeoutOverride?: number
+	): Promise<ChatCompletionResponse> {
+		const timeout = timeoutOverride ?? this.options.timeout;
+
+		const response = await retryFetch(
+			`${this.baseUrl}/v1/chat/completions`,
+			{
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify(body),
+				signal: createTimeoutSignal(timeout),
+			},
+			{ maxRetries: this.options.maxRetries }
+		);
+
+		if (!response.ok) {
+			const text = await response.text().catch(() => '');
+			throw new Error(`mana-llm error ${response.status}: ${text}`);
+		}
+
+		return (await response.json()) as ChatCompletionResponse;
+	}
+}
--- a/packages/shared-llm/src/llm.constants.ts
+++ b/packages/shared-llm/src/llm.constants.ts
@ -0,0 +1 @@
+export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';
--- a/packages/shared-llm/src/llm.module.ts
+++ b/packages/shared-llm/src/llm.module.ts
@ -0,0 +1,80 @@
+import { DynamicModule, Module, Global, Provider } from '@nestjs/common';
+import type {
+	LlmModuleOptions,
+	LlmModuleAsyncOptions,
+	LlmOptionsFactory,
+} from './interfaces/llm-options.interface';
+import { LlmClientService } from './llm-client.service';
+import { LLM_MODULE_OPTIONS } from './llm.constants';
+
+@Global()
+@Module({})
+export class LlmModule {
+	static forRoot(options: LlmModuleOptions): DynamicModule {
+		return {
+			module: LlmModule,
+			providers: [
+				{
+					provide: LLM_MODULE_OPTIONS,
+					useValue: options,
+				},
+				LlmClientService,
+			],
+			exports: [LLM_MODULE_OPTIONS, LlmClientService],
+		};
+	}
+
+	static forRootAsync(options: LlmModuleAsyncOptions): DynamicModule {
+		const asyncProviders = this.createAsyncProviders(options);
+
+		return {
+			module: LlmModule,
+			imports: options.imports || [],
+			providers: [...asyncProviders, LlmClientService],
+			exports: [LLM_MODULE_OPTIONS, LlmClientService],
+		};
+	}
+
+	private static createAsyncProviders(options: LlmModuleAsyncOptions): Provider[] {
+		if (options.useFactory) {
+			return [
+				{
+					provide: LLM_MODULE_OPTIONS,
+					useFactory: options.useFactory,
+					inject: options.inject || [],
+				},
+			];
+		}
+
+		const useClass = options.useClass;
+		const useExisting = options.useExisting;
+
+		if (useClass) {
+			return [
+				{
+					provide: LLM_MODULE_OPTIONS,
+					useFactory: async (optionsFactory: LlmOptionsFactory) =>
+						await optionsFactory.createLlmOptions(),
+					inject: [useClass],
+				},
+				{
+					provide: useClass,
+					useClass,
+				},
+			];
+		}
+
+		if (useExisting) {
+			return [
+				{
+					provide: LLM_MODULE_OPTIONS,
+					useFactory: async (optionsFactory: LlmOptionsFactory) =>
+						await optionsFactory.createLlmOptions(),
+					inject: [useExisting],
+				},
+			];
+		}
+
+		return [];
+	}
+}
--- a/packages/shared-llm/src/standalone.ts
+++ b/packages/shared-llm/src/standalone.ts
@ -0,0 +1,30 @@
+/**
+ * Standalone exports for non-NestJS consumers (e.g. bot-services).
+ *
+ * Usage:
+ *   import { LlmClient } from '@manacore/shared-llm/standalone';
+ *   const llm = new LlmClient({ manaLlmUrl: 'http://localhost:3025' });
+ */
+
+export { LlmClient } from './llm-client';
+export { resolveOptions } from './interfaces/llm-options.interface';
+export type { LlmModuleOptions, ResolvedLlmOptions } from './interfaces/llm-options.interface';
+
+// Types
+export type {
+	ChatMessage,
+	ContentPart,
+	TextContentPart,
+	ImageContentPart,
+	ChatOptions,
+	JsonOptions,
+	VisionOptions,
+	TokenUsage,
+	ChatResult,
+	JsonResult,
+	ModelInfo,
+	HealthStatus,
+} from './types';
+
+// Utilities
+export { extractJson } from './utils';
--- a/packages/shared-llm/src/types/chat.types.ts
+++ b/packages/shared-llm/src/types/chat.types.ts
@ -0,0 +1,100 @@
+/**
+ * Core chat types for the LLM client.
+ * These are the high-level types that consumers interact with.
+ */
+
+// ---------------------------------------------------------------------------
+// Messages
+// ---------------------------------------------------------------------------
+
+export interface TextContentPart {
+	type: 'text';
+	text: string;
+}
+
+export interface ImageContentPart {
+	type: 'image_url';
+	image_url: { url: string };
+}
+
+export type ContentPart = TextContentPart | ImageContentPart;
+
+export interface ChatMessage {
+	role: 'system' | 'user' | 'assistant';
+	content: string | ContentPart[];
+}
+
+// ---------------------------------------------------------------------------
+// Options
+// ---------------------------------------------------------------------------
+
+export interface ChatOptions {
+	/** Model to use (default from module config, e.g. "ollama/gemma3:4b") */
+	model?: string;
+	/** Sampling temperature 0.0-2.0 */
+	temperature?: number;
+	/** Max tokens to generate */
+	maxTokens?: number;
+	/** System prompt prepended to messages */
+	systemPrompt?: string;
+	/** Request timeout in ms (overrides module default) */
+	timeout?: number;
+}
+
+export interface JsonOptions<T = unknown> extends ChatOptions {
+	/** Validation function applied to parsed JSON. Should throw on invalid data. */
+	validate?: (data: unknown) => T;
+	/** Number of extraction retries on parse failure (default: 1) */
+	jsonRetries?: number;
+}
+
+export interface VisionOptions extends ChatOptions {
+	/** Vision model override (default from module config, e.g. "ollama/llava:7b") */
+	visionModel?: string;
+}
+
+// ---------------------------------------------------------------------------
+// Results
+// ---------------------------------------------------------------------------
+
+export interface TokenUsage {
+	prompt_tokens: number;
+	completion_tokens: number;
+	total_tokens: number;
+}
+
+export interface ChatResult {
+	/** Generated text content */
+	content: string;
+	/** Model that was actually used */
+	model: string;
+	/** Token usage statistics */
+	usage: TokenUsage;
+	/** Request latency in milliseconds */
+	latencyMs: number;
+}
+
+export interface JsonResult<T = unknown> extends ChatResult {
+	/** Parsed and optionally validated data */
+	data: T;
+}
+
+// ---------------------------------------------------------------------------
+// Models
+// ---------------------------------------------------------------------------
+
+export interface ModelInfo {
+	id: string;
+	object: 'model';
+	created: number;
+	owned_by: string;
+}
+
+// ---------------------------------------------------------------------------
+// Health
+// ---------------------------------------------------------------------------
+
+export interface HealthStatus {
+	status: 'healthy' | 'degraded' | 'unhealthy';
+	providers: Record<string, unknown>;
+}
--- a/packages/shared-llm/src/types/index.ts
+++ b/packages/shared-llm/src/types/index.ts
@ -0,0 +1,26 @@
+export type {
+	ChatMessage,
+	ContentPart,
+	TextContentPart,
+	ImageContentPart,
+	ChatOptions,
+	JsonOptions,
+	VisionOptions,
+	TokenUsage,
+	ChatResult,
+	JsonResult,
+	ModelInfo,
+	HealthStatus,
+} from './chat.types';
+
+export type {
+	ChatCompletionRequest,
+	ChatCompletionResponse,
+	ChatCompletionChoice,
+	ChatCompletionStreamChunk,
+	StreamChoice,
+	EmbeddingRequest,
+	EmbeddingResponse,
+	EmbeddingData,
+	ModelsListResponse,
+} from './openai-compat.types';
--- a/packages/shared-llm/src/types/openai-compat.types.ts
+++ b/packages/shared-llm/src/types/openai-compat.types.ts
@ -0,0 +1,97 @@
+/**
+ * OpenAI-compatible wire format types matching the mana-llm API contract.
+ * These are internal types used for HTTP communication - consumers should
+ * use the high-level types from chat.types.ts instead.
+ */
+
+import type { ChatMessage, TokenUsage } from './chat.types';
+
+// ---------------------------------------------------------------------------
+// Request (POST /v1/chat/completions)
+// ---------------------------------------------------------------------------
+
+export interface ChatCompletionRequest {
+	model: string;
+	messages: ChatMessage[];
+	stream?: boolean;
+	temperature?: number;
+	max_tokens?: number;
+	top_p?: number;
+	frequency_penalty?: number;
+	presence_penalty?: number;
+	stop?: string | string[];
+}
+
+// ---------------------------------------------------------------------------
+// Response (non-streaming)
+// ---------------------------------------------------------------------------
+
+export interface ChatCompletionResponse {
+	id: string;
+	object: 'chat.completion';
+	created: number;
+	model: string;
+	choices: ChatCompletionChoice[];
+	usage: TokenUsage;
+}
+
+export interface ChatCompletionChoice {
+	index: number;
+	message: { role: 'assistant'; content: string };
+	finish_reason: 'stop' | 'length' | 'content_filter' | null;
+}
+
+// ---------------------------------------------------------------------------
+// Response (streaming)
+// ---------------------------------------------------------------------------
+
+export interface ChatCompletionStreamChunk {
+	id: string;
+	object: 'chat.completion.chunk';
+	created: number;
+	model: string;
+	choices: StreamChoice[];
+}
+
+export interface StreamChoice {
+	index: number;
+	delta: { role?: 'assistant'; content?: string };
+	finish_reason: string | null;
+}
+
+// ---------------------------------------------------------------------------
+// Embeddings
+// ---------------------------------------------------------------------------
+
+export interface EmbeddingRequest {
+	model: string;
+	input: string | string[];
+	encoding_format?: 'float' | 'base64';
+}
+
+export interface EmbeddingResponse {
+	object: 'list';
+	data: EmbeddingData[];
+	model: string;
+	usage: TokenUsage;
+}
+
+export interface EmbeddingData {
+	object: 'embedding';
+	index: number;
+	embedding: number[];
+}
+
+// ---------------------------------------------------------------------------
+// Models (GET /v1/models)
+// ---------------------------------------------------------------------------
+
+export interface ModelsListResponse {
+	object: 'list';
+	data: Array<{
+		id: string;
+		object: 'model';
+		created: number;
+		owned_by: string;
+	}>;
+}
--- a/packages/shared-llm/src/utils/index.ts
+++ b/packages/shared-llm/src/utils/index.ts
@ -0,0 +1,3 @@
+export { extractJson } from './json-extractor';
+export { retryFetch } from './retry';
+export type { RetryOptions } from './retry';
--- a/packages/shared-llm/src/utils/json-extractor.ts
+++ b/packages/shared-llm/src/utils/json-extractor.ts
@ -0,0 +1,94 @@
+/**
+ * Extract and parse JSON from LLM responses.
+ *
+ * LLMs often wrap JSON in markdown code fences or include extra text.
+ * This utility handles all common patterns:
+ * 1. Direct JSON parse
+ * 2. Markdown ```json ... ``` fences
+ * 3. First { ... } or [ ... ] block in text
+ */
+export function extractJson<T = unknown>(text: string, validate?: (data: unknown) => T): T {
+	const trimmed = text.trim();
+
+	// Step 1: Try direct parse
+	const direct = tryParse<T>(trimmed, validate);
+	if (direct !== undefined) return direct;
+
+	// Step 2: Strip markdown code fences
+	const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
+	if (fenceMatch) {
+		const fenced = tryParse<T>(fenceMatch[1].trim(), validate);
+		if (fenced !== undefined) return fenced;
+	}
+
+	// Step 3: Find first JSON object
+	const objectStart = trimmed.indexOf('{');
+	if (objectStart !== -1) {
+		const objectStr = extractBalanced(trimmed, objectStart, '{', '}');
+		if (objectStr) {
+			const obj = tryParse<T>(objectStr, validate);
+			if (obj !== undefined) return obj;
+		}
+	}
+
+	// Step 4: Find first JSON array
+	const arrayStart = trimmed.indexOf('[');
+	if (arrayStart !== -1) {
+		const arrayStr = extractBalanced(trimmed, arrayStart, '[', ']');
+		if (arrayStr) {
+			const arr = tryParse<T>(arrayStr, validate);
+			if (arr !== undefined) return arr;
+		}
+	}
+
+	throw new Error(`Failed to extract JSON from LLM response: ${trimmed.slice(0, 200)}...`);
+}
+
+function tryParse<T>(text: string, validate?: (data: unknown) => T): T | undefined {
+	try {
+		const parsed = JSON.parse(text);
+		return validate ? validate(parsed) : parsed;
+	} catch {
+		return undefined;
+	}
+}
+
+/**
+ * Extract a balanced block starting from the given position.
+ * Handles nested braces/brackets but not strings with escaped delimiters.
+ */
+function extractBalanced(text: string, start: number, open: string, close: string): string | null {
+	let depth = 0;
+	let inString = false;
+	let escape = false;
+
+	for (let i = start; i < text.length; i++) {
+		const ch = text[i];
+
+		if (escape) {
+			escape = false;
+			continue;
+		}
+
+		if (ch === '\\') {
+			escape = true;
+			continue;
+		}
+
+		if (ch === '"') {
+			inString = !inString;
+			continue;
+		}
+
+		if (inString) continue;
+
+		if (ch === open) depth++;
+		if (ch === close) depth--;
+
+		if (depth === 0) {
+			return text.slice(start, i + 1);
+		}
+	}
+
+	return null;
+}
--- a/packages/shared-llm/src/utils/retry.ts
+++ b/packages/shared-llm/src/utils/retry.ts
@ -0,0 +1,51 @@
+/**
+ * Fetch wrapper with exponential backoff retry for transient failures.
+ *
+ * Retries on: 429 (rate limit), 502, 503, 504 (server errors), network errors.
+ * Does NOT retry on: 400, 401, 403, 404 (client errors).
+ */
+
+const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]);
+
+export interface RetryOptions {
+	maxRetries: number;
+	/** Base delay in ms (doubles each retry). Default: 200 */
+	baseDelay?: number;
+}
+
+export async function retryFetch(
+	url: string,
+	init: RequestInit,
+	options: RetryOptions
+): Promise<Response> {
+	const { maxRetries, baseDelay = 200 } = options;
+	let lastError: Error | undefined;
+
+	for (let attempt = 0; attempt <= maxRetries; attempt++) {
+		try {
+			const response = await fetch(url, init);
+
+			if (response.ok || !RETRYABLE_STATUS_CODES.has(response.status)) {
+				return response;
+			}
+
+			// Retryable status code
+			lastError = new Error(`HTTP ${response.status}: ${response.statusText}`);
+		} catch (error) {
+			// Network error (connection refused, timeout, etc.)
+			lastError = error instanceof Error ? error : new Error(String(error));
+		}
+
+		// Don't sleep after the last attempt
+		if (attempt < maxRetries) {
+			const delay = baseDelay * Math.pow(2, attempt);
+			await sleep(delay);
+		}
+	}
+
+	throw lastError ?? new Error('retryFetch exhausted all retries');
+}
+
+function sleep(ms: number): Promise<void> {
+	return new Promise((resolve) => setTimeout(resolve, ms));
+}
				`@ -0,0 +1 @@`
				`export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';`