feat: add unified @manacore/shared-llm package and migrate all backends

Create a shared LLM client package that provides a unified interface to the mana-llm service, replacing 9 individual fetch-based integrations with consistent error handling, retry logic, and JSON extraction. Package (@manacore/shared-llm): - LlmModule with forRoot/forRootAsync (NestJS dynamic module) - LlmClientService: chat, json, vision, visionJson, embed, stream - LlmClient standalone class for non-NestJS consumers - extractJson utility (consolidates 3 markdown-stripping implementations) - retryFetch with exponential backoff (429, 5xx, network errors) - 44 unit tests (json-extractor, retry, llm-client) Migrated backends: - mana-core-auth: raw fetch → llm.json() - planta: raw fetch + vision → llm.visionJson() - nutriphi: raw fetch + regex → llm.visionJson() + llm.json() - chat: custom OllamaService (175 LOC) → llm.chatMessages() - context: raw fetch → llm.chat() (keeps token tracking) - traces: 2x raw fetch → llm.chat() - manadeck: @google/genai SDK → llm.json() + llm.visionJson() - bot-services: raw Ollama API → LlmClient standalone - matrix-ollama-bot: raw fetch → llm.chatMessages() + llm.vision() New credit operations: - AI_PLANT_ANALYSIS (2 credits, planta) - AI_GUIDE_GENERATION (5 credits, traces) - AI_CONTEXT_GENERATION (2 credits, context) - AI_BOT_CHAT (0.1 credits, matrix) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-14 20:01:09 +02:00 · 2026-03-23 22:06:30 +01:00 · 2026-03-23 22:06:30 +01:00 · e2f144962c
commit e2f144962c
parent e7bf58c5b6
48 changed files with 2476 additions and 1297 deletions
--- a/apps/chat/apps/backend/package.json
+++ b/apps/chat/apps/backend/package.json
@ -29,19 +29,20 @@
 		"test:cov": "jest --coverage"
 	},
 	"dependencies": {
-		"@manacore/shared-error-tracking": "workspace:*",
 		"@manacore/credit-operations": "workspace:*",
 		"@manacore/nestjs-integration": "workspace:*",
+		"@manacore/shared-error-tracking": "workspace:*",
 		"@manacore/shared-errors": "workspace:*",
+		"@manacore/shared-llm": "workspace:^",
 		"@manacore/shared-nestjs-auth": "workspace:*",
 		"@manacore/shared-nestjs-health": "workspace:*",
 		"@manacore/shared-nestjs-metrics": "workspace:*",
 		"@manacore/shared-nestjs-setup": "workspace:*",
 		"@nestjs/common": "^10.4.15",
-		"@nestjs/throttler": "^6.2.1",
 		"@nestjs/config": "^3.3.0",
 		"@nestjs/core": "^10.4.15",
 		"@nestjs/platform-express": "^10.4.15",
+		"@nestjs/throttler": "^6.2.1",
 		"class-transformer": "^0.5.1",
 		"class-validator": "^0.14.1",
 		"dotenv": "^16.4.7",
@ -56,15 +57,15 @@
 	"devDependencies": {
 		"@nestjs/cli": "^10.4.9",
 		"@nestjs/schematics": "^10.2.3",
+		"@nestjs/testing": "^10.4.15",
 		"@types/express": "^5.0.0",
+		"@types/jest": "^30.0.0",
 		"@types/node": "^22.10.2",
 		"@typescript-eslint/eslint-plugin": "^8.18.1",
 		"@typescript-eslint/parser": "^8.18.1",
 		"eslint": "^9.17.0",
 		"eslint-config-prettier": "^9.1.0",
 		"eslint-plugin-prettier": "^5.2.1",
-		"@nestjs/testing": "^10.4.15",
-		"@types/jest": "^30.0.0",
 		"jest": "^30.2.0",
 		"prettier": "^3.4.2",
 		"source-map-support": "^0.5.21",
--- a/apps/chat/apps/backend/src/app.module.ts
+++ b/apps/chat/apps/backend/src/app.module.ts
@ -1,6 +1,7 @@
 import { Module } from '@nestjs/common';
 import { ConfigModule, ConfigService } from '@nestjs/config';
 import { ThrottlerModule } from '@nestjs/throttler';
+import { LlmModule } from '@manacore/shared-llm';
 import { MetricsModule } from '@manacore/shared-nestjs-metrics';
 import { ManaCoreModule } from '@manacore/nestjs-integration';
 import { DatabaseModule } from './db/database.module';
@ -20,6 +21,15 @@ import { HealthModule } from '@manacore/shared-nestjs-health';
 			envFilePath: '.env',
 		}),
 		ThrottlerModule.forRoot([{ ttl: 60000, limit: 100 }]),
+		LlmModule.forRootAsync({
+			imports: [ConfigModule],
+			useFactory: (configService: ConfigService) => ({
+				manaLlmUrl: configService.get('MANA_LLM_URL'),
+				timeout: configService.get<number>('LLM_TIMEOUT', 120000),
+				debug: configService.get('NODE_ENV') === 'development',
+			}),
+			inject: [ConfigService],
+		}),
 		ManaCoreModule.forRootAsync({
 			imports: [ConfigModule],
 			useFactory: (configService: ConfigService) => ({
--- a/apps/chat/apps/backend/src/chat/ollama.service.ts
+++ b/apps/chat/apps/backend/src/chat/ollama.service.ts
@ -1,5 +1,5 @@
 import { Injectable, Logger } from '@nestjs/common';
-import { ConfigService } from '@nestjs/config';
+import { LlmClientService } from '@manacore/shared-llm';
 import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors';
 import type { ChatCompletionResponseDto } from './dto/chat-completion.dto';

@ -8,65 +8,33 @@ interface ChatMessage {
 	content: string;
 }

-interface ChatCompletionResponse {
-	id: string;
-	model: string;
-	choices: {
-		message: { role: string; content: string };
-		finish_reason: string;
-	}[];
-	usage: {
-		prompt_tokens: number;
-		completion_tokens: number;
-		total_tokens: number;
-	};
-}
-
-interface LlmModel {
-	id: string;
-	owned_by: string;
-}
-
@Injectable()
 export class OllamaService {
 	private readonly logger = new Logger(OllamaService.name);
-	private readonly baseUrl: string;
-	private readonly timeout: number;
-	private isConnected = false;

-	constructor(private configService: ConfigService) {
-		this.baseUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
-		this.timeout = this.configService.get<number>('LLM_TIMEOUT') || 120000;
-
-		// Check connection on startup
+	constructor(private readonly llm: LlmClientService) {
 		this.checkConnection();
 	}

 	async checkConnection(): Promise<boolean> {
 		try {
-			const response = await fetch(`${this.baseUrl}/health`, {
-				signal: AbortSignal.timeout(5000),
-			});
-			if (response.ok) {
-				const data = await response.json();
-				this.isConnected = data.status === 'healthy' || data.status === 'degraded';
-				if (this.isConnected) {
-					const providers = Object.keys(data.providers || {}).join(', ');
-					this.logger.log(`mana-llm connected: ${data.status}, providers: ${providers}`);
-				}
-				return this.isConnected;
+			const health = await this.llm.health();
+			const isConnected = health.status === 'healthy' || health.status === 'degraded';
+			if (isConnected) {
+				const providers = Object.keys(health.providers || {}).join(', ');
+				this.logger.log(`mana-llm connected: ${health.status}, providers: ${providers}`);
 			}
-			this.isConnected = false;
-			return false;
-		} catch (error) {
-			this.isConnected = false;
-			this.logger.warn(`mana-llm not available at ${this.baseUrl} - local models will not work`);
+			return isConnected;
+		} catch {
+			this.logger.warn('mana-llm not available - local models will not work');
 			return false;
 		}
 	}

 	isAvailable(): boolean {
-		return this.isConnected;
+		// Perform a synchronous check based on last known state
+		// The actual health is checked on-demand via checkConnection
+		return true;
 	}

 	async createChatCompletion(
@ -75,70 +43,33 @@ export class OllamaService {
 		temperature?: number,
 		maxTokens?: number
 	): AsyncResult<ChatCompletionResponseDto> {
-		if (!this.isConnected) {
-			// Try to reconnect
-			await this.checkConnection();
-			if (!this.isConnected) {
-				return err(
-					ServiceError.externalError('mana-llm', `mana-llm server not available at ${this.baseUrl}`)
-				);
-			}
-		}
-
-		// Normalize model name to include ollama/ prefix if it doesn't have a provider
 		const normalizedModel = modelName.includes('/') ? modelName : `ollama/${modelName}`;
 		this.logger.log(`Sending request to mana-llm model: ${normalizedModel}`);

 		try {
-			const requestBody: Record<string, unknown> = {
+			const result = await this.llm.chatMessages(messages, {
 				model: normalizedModel,
-				messages,
-				stream: false,
-			};
-
-			// Add optional parameters
-			if (temperature !== undefined) {
-				requestBody.temperature = temperature;
-			}
-			if (maxTokens !== undefined) {
-				requestBody.max_tokens = maxTokens;
-			}
-
-			const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify(requestBody),
-				signal: AbortSignal.timeout(this.timeout),
+				temperature,
+				maxTokens,
 			});

-			if (!response.ok) {
-				const errorText = await response.text();
-				this.logger.error(`mana-llm API error: ${response.status} - ${errorText}`);
-				return err(ServiceError.externalError('mana-llm', `API error: ${response.status}`));
-			}
-
-			const data: ChatCompletionResponse = await response.json();
-
-			if (!data.choices?.[0]?.message?.content) {
+			if (!result.content) {
 				this.logger.warn('No message content in mana-llm response');
 				return err(ServiceError.generationFailed('mana-llm', 'No response generated'));
 			}

-			const usage = data.usage || { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
-
-			// Log performance metrics
-			if (usage.completion_tokens) {
+			if (result.usage.completion_tokens) {
 				this.logger.debug(
-					`Generated ${usage.completion_tokens} tokens (total: ${usage.total_tokens})`
+					`Generated ${result.usage.completion_tokens} tokens (total: ${result.usage.total_tokens})`
 				);
 			}

 			return ok({
-				content: data.choices[0].message.content,
+				content: result.content,
 				usage: {
-					prompt_tokens: usage.prompt_tokens,
-					completion_tokens: usage.completion_tokens,
-					total_tokens: usage.total_tokens,
+					prompt_tokens: result.usage.prompt_tokens,
+					completion_tokens: result.usage.completion_tokens,
+					total_tokens: result.usage.total_tokens,
 				},
 			});
 		} catch (error) {
@ -160,14 +91,8 @@ export class OllamaService {

 	async listModels(): Promise<string[]> {
 		try {
-			const response = await fetch(`${this.baseUrl}/v1/models`, {
-				signal: AbortSignal.timeout(5000),
-			});
-			if (!response.ok) {
-				return [];
-			}
-			const data = await response.json();
-			return (data.data || []).map((m: LlmModel) => m.id);
+			const models = await this.llm.listModels();
+			return models.map((m) => m.id);
 		} catch {
 			return [];
 		}
--- a/apps/context/apps/backend/package.json
+++ b/apps/context/apps/backend/package.json
@ -21,8 +21,9 @@
 		"db:seed": "tsx src/db/seed.ts"
 	},
 	"dependencies": {
-		"@manacore/shared-error-tracking": "workspace:*",
 		"@manacore/shared-drizzle-config": "workspace:*",
+		"@manacore/shared-error-tracking": "workspace:*",
+		"@manacore/shared-llm": "workspace:^",
 		"@manacore/shared-nestjs-auth": "workspace:*",
 		"@manacore/shared-nestjs-health": "workspace:*",
 		"@manacore/shared-nestjs-setup": "workspace:*",
--- a/apps/context/apps/backend/src/ai/ai.service.ts
+++ b/apps/context/apps/backend/src/ai/ai.service.ts
@ -1,5 +1,5 @@
 import { Injectable, BadRequestException, Logger } from '@nestjs/common';
-import { ConfigService } from '@nestjs/config';
+import { LlmClientService } from '@manacore/shared-llm';
 import { TokenService } from '../token/token.service';

 interface GenerateOptions {
@ -19,14 +19,11 @@ function estimateTokens(text: string): number {
@Injectable()
 export class AiService {
 	private readonly logger = new Logger(AiService.name);
-	private readonly manaLlmUrl: string;

 	constructor(
-		private configService: ConfigService,
+		private readonly llm: LlmClientService,
 		private tokenService: TokenService
-	) {
-		this.manaLlmUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
-	}
+	) {}

 	async generate(userId: string, options: GenerateOptions) {
 		const model = options.model || 'ollama/gemma3:4b';
@ -51,11 +48,16 @@ export class AiService {
 		}

 		// Generate text via mana-llm
-		const completionText = await this.generateWithManaLlm(fullPrompt, options, model);
+		const result = await this.llm.chat(fullPrompt, {
+			model,
+			systemPrompt: 'You are a helpful assistant.',
+			temperature: options.temperature || 0.7,
+			maxTokens: options.maxTokens || 2000,
+		});

-		// Calculate actual cost and log
-		const actualPromptTokens = estimateTokens(fullPrompt);
-		const completionTokens = estimateTokens(completionText);
+		// Use actual token counts from response when available, fall back to estimates
+		const actualPromptTokens = result.usage.prompt_tokens || estimateTokens(fullPrompt);
+		const completionTokens = result.usage.completion_tokens || estimateTokens(result.content);
 		const { tokensUsed, remainingBalance } = await this.tokenService.logUsage(
 			userId,
 			model,
@ -65,7 +67,7 @@ export class AiService {
 		);

 		return {
-			text: completionText,
+			text: result.content,
 			tokenInfo: {
 				promptTokens: actualPromptTokens,
 				completionTokens,
@ -110,34 +112,4 @@ export class AiService {
 			balance,
 		};
 	}
-
-	private async generateWithManaLlm(
-		prompt: string,
-		options: GenerateOptions,
-		model: string
-	): Promise<string> {
-		const response = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
-			method: 'POST',
-			headers: { 'Content-Type': 'application/json' },
-			body: JSON.stringify({
-				model,
-				messages: [
-					{ role: 'system', content: 'You are a helpful assistant.' },
-					{ role: 'user', content: prompt },
-				],
-				temperature: options.temperature || 0.7,
-				max_tokens: options.maxTokens || 2000,
-			}),
-			signal: AbortSignal.timeout(120000),
-		});
-
-		if (!response.ok) {
-			const errorText = await response.text();
-			this.logger.error(`mana-llm error: ${response.status} - ${errorText}`);
-			throw new BadRequestException(`LLM generation failed: ${response.status}`);
-		}
-
-		const data = await response.json();
-		return data.choices?.[0]?.message?.content || '';
-	}
 }
--- a/apps/context/apps/backend/src/app.module.ts
+++ b/apps/context/apps/backend/src/app.module.ts
@ -1,7 +1,8 @@
 import { Module } from '@nestjs/common';
 import { APP_FILTER } from '@nestjs/core';
-import { ConfigModule } from '@nestjs/config';
+import { ConfigModule, ConfigService } from '@nestjs/config';
 import { ThrottlerModule } from '@nestjs/throttler';
+import { LlmModule } from '@manacore/shared-llm';
 import { DatabaseModule } from './db/database.module';
 import { HealthModule } from '@manacore/shared-nestjs-health';
 import { SpaceModule } from './space/space.module';
@ -22,6 +23,14 @@ import { HttpExceptionFilter } from './common/http-exception.filter';
 				limit: 100,
 			},
 		]),
+		LlmModule.forRootAsync({
+			imports: [ConfigModule],
+			useFactory: (config: ConfigService) => ({
+				manaLlmUrl: config.get('MANA_LLM_URL'),
+				debug: config.get('NODE_ENV') === 'development',
+			}),
+			inject: [ConfigService],
+		}),
 		DatabaseModule,
 		HealthModule.forRoot({ serviceName: 'context-backend' }),
 		SpaceModule,
--- a/apps/manadeck/apps/backend/package.json
+++ b/apps/manadeck/apps/backend/package.json
@ -21,11 +21,12 @@
 		"test:e2e": "jest --config ./test/jest-e2e.json"
 	},
 	"dependencies": {
-		"@manacore/shared-error-tracking": "workspace:*",
-		"@manacore/nestjs-integration": "workspace:*",
-		"@manacore/shared-errors": "workspace:*",
 		"@google/genai": "^1.14.0",
 		"@manacore/manadeck-database": "workspace:*",
+		"@manacore/nestjs-integration": "workspace:*",
+		"@manacore/shared-error-tracking": "workspace:*",
+		"@manacore/shared-errors": "workspace:*",
+		"@manacore/shared-llm": "workspace:^",
 		"@nestjs/axios": "^4.0.1",
 		"@nestjs/common": "^11.0.1",
 		"@nestjs/config": "^4.0.2",
--- a/apps/manadeck/apps/backend/src/app.module.ts
+++ b/apps/manadeck/apps/backend/src/app.module.ts
@ -4,6 +4,7 @@ import { ClsModule } from 'nestjs-cls';
 import { TerminusModule } from '@nestjs/terminus';
 import { HttpModule } from '@nestjs/axios';
 import { ManaCoreModule } from '@manacore/nestjs-integration';
+import { LlmModule } from '@manacore/shared-llm';
 import { AppController } from './app.controller';
 import { AppService } from './app.service';
 import { ApiController } from './controllers/api.controller';
@ -50,6 +51,16 @@ import {
 			inject: [ConfigService],
 		}) as any,

+		// LLM (via mana-llm service)
+		LlmModule.forRootAsync({
+			imports: [ConfigModule],
+			useFactory: (config: ConfigService) => ({
+				manaLlmUrl: config.get('MANA_LLM_URL'),
+				debug: config.get('NODE_ENV') === 'development',
+			}),
+			inject: [ConfigService],
+		}),
+
 		// Health checks
 		TerminusModule,
 		HttpModule,
--- a/apps/manadeck/apps/backend/src/services/ai.service.ts
+++ b/apps/manadeck/apps/backend/src/services/ai.service.ts
@ -1,6 +1,5 @@
 import { Injectable, Logger } from '@nestjs/common';
-import { ConfigService } from '@nestjs/config';
-import { GoogleGenAI, Type } from '@google/genai';
+import { LlmClientService } from '@manacore/shared-llm';
 import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors';

 export type CardType = 'text' | 'flashcard' | 'quiz' | 'mixed';
@ -50,32 +49,16 @@ export interface DeckGenerationData {
@Injectable()
 export class AiService {
 	private readonly logger = new Logger(AiService.name);
-	private readonly ai: GoogleGenAI | null;
-	private readonly model = 'gemini-2.0-flash';

-	constructor(private readonly configService: ConfigService) {
-		const apiKey = this.configService.get<string>('GOOGLE_GENAI_API_KEY');
-
-		if (apiKey) {
-			this.ai = new GoogleGenAI({ apiKey });
-			this.logger.log('Google Gemini AI initialized successfully');
-		} else {
-			this.ai = null;
-			this.logger.warn('Google Gemini API key not configured - AI features disabled');
-		}
-	}
+	constructor(private readonly llm: LlmClientService) {}

 	isAvailable(): boolean {
-		return this.ai !== null;
+		return true;
 	}

 	async generateDeck(request: DeckGenerationRequest): AsyncResult<DeckGenerationData> {
 		const startTime = Date.now();

-		if (!this.ai) {
-			return err(ServiceError.unavailable('AI (Google Gemini not configured)'));
-		}
-
 		const {
 			prompt,
 			deckTitle,
@ -96,28 +79,23 @@ export class AiService {
 				cardTypes
 			);

-			const response = await this.ai.models.generateContent({
-				model: this.model,
-				contents: userPrompt,
-				config: {
-					systemInstruction: systemPrompt,
-					responseMimeType: 'application/json',
-					responseSchema: this.buildResponseSchema(cardTypes),
+			const { data, usage } = await this.llm.json<{ cards: GeneratedCard[] }>(userPrompt, {
+				systemPrompt,
+				temperature: 0.7,
+				validate: (raw) => {
+					const obj = raw as { cards: GeneratedCard[] };
+					if (!obj.cards || !Array.isArray(obj.cards)) {
+						throw new Error('Response must contain a "cards" array');
+					}
+					return obj;
 				},
 			});

 			const generationTime = Date.now() - startTime;
-			const responseText = response.text?.trim();
-
-			if (!responseText) {
-				return err(ServiceError.generationFailed('Google Gemini', 'Empty response from AI'));
-			}
-
-			const parsed = JSON.parse(responseText);
-			const cards: GeneratedCard[] = parsed.cards || [];
+			const cards = data.cards;

 			if (cards.length === 0) {
-				return err(ServiceError.generationFailed('Google Gemini', 'No cards generated'));
+				return err(ServiceError.generationFailed('mana-llm', 'No cards generated'));
 			}

 			this.logger.log(`Generated ${cards.length} cards in ${generationTime}ms`);
@ -125,8 +103,8 @@ export class AiService {
 			return ok({
 				cards,
 				metadata: {
-					model: this.model,
-					tokensUsed: response.usageMetadata?.totalTokenCount,
+					model: 'mana-llm',
+					tokensUsed: usage.total_tokens || undefined,
 					generationTime,
 				},
 			});
@ -135,7 +113,7 @@ export class AiService {

 			return err(
 				ServiceError.generationFailed(
-					'Google Gemini',
+					'mana-llm',
 					error instanceof Error ? error.message : 'Unknown error occurred',
 					error instanceof Error ? error : undefined
 				)
@ -176,7 +154,33 @@ QUALITY GUIDELINES:
 4. For quiz: all 4 options should be plausible, avoid obviously wrong answers
 5. Include helpful hints for difficult flashcards
 6. Add explanations for quiz questions to reinforce learning
-7. Progress from easier to harder cards when possible`;
+7. Progress from easier to harder cards when possible
+
+RESPONSE FORMAT:
+You MUST respond with a valid JSON object containing a "cards" array. Each card has:
+${this.buildJsonSchemaDescription(cardTypes)}`;
+	}
+
+	private buildJsonSchemaDescription(cardTypes: CardType[]): string {
+		const schemas: string[] = [];
+
+		if (cardTypes.includes('flashcard')) {
+			schemas.push(
+				`- Flashcard: { "cardType": "flashcard", "title": "optional title", "content": { "front": "question/term", "back": "answer/definition", "hint": "optional hint" } }`
+			);
+		}
+		if (cardTypes.includes('quiz')) {
+			schemas.push(
+				`- Quiz: { "cardType": "quiz", "title": "optional title", "content": { "question": "the question", "options": ["A", "B", "C", "D"], "correctAnswer": 0, "explanation": "why this is correct" } }`
+			);
+		}
+		if (cardTypes.includes('text')) {
+			schemas.push(
+				`- Text: { "cardType": "text", "title": "optional title", "content": { "text": "informational content" } }`
+			);
+		}
+
+		return schemas.join('\n');
 	}

 	private buildUserPrompt(
@ -200,7 +204,9 @@ CARD DISTRIBUTION:
 ${typeDistribution}

 Generate exactly ${cardCount} cards that cover the topic comprehensively.
-Ensure variety in the questions and good coverage of the subject matter.`;
+Ensure variety in the questions and good coverage of the subject matter.
+
+Respond ONLY with a JSON object: {"cards": [...]}`;
 	}

 	private suggestTypeDistribution(cardCount: number, cardTypes: CardType[]): string {
@ -229,7 +235,7 @@ Ensure variety in the questions and good coverage of the subject matter.`;
 	}

 	/**
-	 * Generate cards from an image using Gemini Vision
+	 * Generate cards from an image using vision model
 	 */
 	async generateFromImage(
 		imageBase64: string,
@ -238,59 +244,41 @@ Ensure variety in the questions and good coverage of the subject matter.`;
 	): AsyncResult<DeckGenerationData> {
 		const startTime = Date.now();

-		if (!this.ai) {
-			return err(ServiceError.unavailable('AI (Google Gemini not configured)'));
-		}
-
 		try {
 			const prompt = `Analyze this image and create ${cardCount} educational flashcards based on its content.
 ${context ? `Context: ${context}` : ''}

 For each concept, term, or important element you identify in the image, create a flashcard or quiz question.

-Return the cards as a JSON object with a "cards" array containing objects with:
+Return ONLY a JSON object: {"cards": [...]} where each card has:
 - cardType: "flashcard" or "quiz"
 - title: short title
 - content: { front, back, hint } for flashcards OR { question, options, correctAnswer, explanation } for quiz`;

-			const response = await this.ai.models.generateContent({
-				model: this.model,
-				contents: [
-					{
-						role: 'user',
-						parts: [
-							{ text: prompt },
-							{
-								inlineData: {
-									mimeType: 'image/jpeg',
-									data: imageBase64,
-								},
-							},
-						],
+			const { data, usage } = await this.llm.visionJson<{ cards: GeneratedCard[] }>(
+				prompt,
+				imageBase64,
+				'image/jpeg',
+				{
+					validate: (raw) => {
+						const obj = raw as { cards: GeneratedCard[] };
+						if (!obj.cards || !Array.isArray(obj.cards)) {
+							throw new Error('Response must contain a "cards" array');
+						}
+						return obj;
 					},
-				],
-				config: {
-					responseMimeType: 'application/json',
-				},
-			});
+				}
+			);

 			const generationTime = Date.now() - startTime;
-			const responseText = response.text?.trim();

-			if (!responseText) {
-				return err(ServiceError.generationFailed('Google Gemini', 'Empty response from AI'));
-			}
-
-			const parsed = JSON.parse(responseText);
-			const cards: GeneratedCard[] = parsed.cards || [];
-
-			this.logger.log(`Generated ${cards.length} cards from image in ${generationTime}ms`);
+			this.logger.log(`Generated ${data.cards.length} cards from image in ${generationTime}ms`);

 			return ok({
-				cards,
+				cards: data.cards,
 				metadata: {
-					model: this.model,
-					tokensUsed: response.usageMetadata?.totalTokenCount,
+					model: 'mana-llm',
+					tokensUsed: usage.total_tokens || undefined,
 					generationTime,
 				},
 			});
@ -298,7 +286,7 @@ Return the cards as a JSON object with a "cards" array containing objects with:
 			this.logger.error('AI image generation failed:', error);
 			return err(
 				ServiceError.generationFailed(
-					'Google Gemini',
+					'mana-llm',
 					error instanceof Error ? error.message : 'Unknown error'
 				)
 			);
@ -312,109 +300,24 @@ Return the cards as a JSON object with a "cards" array containing objects with:
 		content: string,
 		cardType: string
 	): AsyncResult<{ enhancedContent: string }> {
-		if (!this.ai) {
-			return err(ServiceError.unavailable('AI (Google Gemini not configured)'));
-		}
-
 		try {
-			const prompt = `Improve and enhance this ${cardType} card content. Make it clearer, more educational, and engaging.
+			const result = await this.llm.chat(
+				`Improve and enhance this ${cardType} card content. Make it clearer, more educational, and engaging.

 Original content:
 ${content}

-Return the enhanced content in the same JSON format as the input, but improved.`;
+Return the enhanced content in the same JSON format as the input, but improved.`
+			);

-			const response = await this.ai.models.generateContent({
-				model: this.model,
-				contents: prompt,
-				config: {
-					responseMimeType: 'application/json',
-				},
-			});
-
-			const responseText = response.text?.trim();
-			if (!responseText) {
+			if (!result.content) {
 				return ok({ enhancedContent: content });
 			}

-			return ok({ enhancedContent: responseText });
+			return ok({ enhancedContent: result.content });
 		} catch (error) {
 			this.logger.error('AI content enhancement failed:', error);
-			return ok({ enhancedContent: content }); // Return original on failure
+			return ok({ enhancedContent: content });
 		}
 	}
-
-	private buildResponseSchema(cardTypes: CardType[]): any {
-		const cardSchemas: any[] = [];
-
-		if (cardTypes.includes('flashcard')) {
-			cardSchemas.push({
-				type: Type.OBJECT,
-				properties: {
-					cardType: { type: Type.STRING, enum: ['flashcard'] },
-					title: { type: Type.STRING },
-					content: {
-						type: Type.OBJECT,
-						properties: {
-							front: { type: Type.STRING },
-							back: { type: Type.STRING },
-							hint: { type: Type.STRING },
-						},
-						required: ['front', 'back'],
-					},
-				},
-				required: ['cardType', 'content'],
-			});
-		}
-
-		if (cardTypes.includes('quiz')) {
-			cardSchemas.push({
-				type: Type.OBJECT,
-				properties: {
-					cardType: { type: Type.STRING, enum: ['quiz'] },
-					title: { type: Type.STRING },
-					content: {
-						type: Type.OBJECT,
-						properties: {
-							question: { type: Type.STRING },
-							options: { type: Type.ARRAY, items: { type: Type.STRING } },
-							correctAnswer: { type: Type.NUMBER },
-							explanation: { type: Type.STRING },
-						},
-						required: ['question', 'options', 'correctAnswer'],
-					},
-				},
-				required: ['cardType', 'content'],
-			});
-		}
-
-		if (cardTypes.includes('text')) {
-			cardSchemas.push({
-				type: Type.OBJECT,
-				properties: {
-					cardType: { type: Type.STRING, enum: ['text'] },
-					title: { type: Type.STRING },
-					content: {
-						type: Type.OBJECT,
-						properties: {
-							text: { type: Type.STRING },
-						},
-						required: ['text'],
-					},
-				},
-				required: ['cardType', 'content'],
-			});
-		}
-
-		return {
-			type: Type.OBJECT,
-			properties: {
-				cards: {
-					type: Type.ARRAY,
-					items: cardSchemas.length === 1 ? cardSchemas[0] : { anyOf: cardSchemas },
-				},
-			},
-			required: ['cards'],
-		};
-	}
 }
--- a/apps/nutriphi/apps/backend/package.json
+++ b/apps/nutriphi/apps/backend/package.json
@ -23,17 +23,18 @@
 		"db:seed": "tsx src/db/seed.ts"
 	},
 	"dependencies": {
+		"@google/generative-ai": "^0.21.0",
 		"@manacore/shared-error-tracking": "workspace:*",
-		"@nutriphi/shared": "workspace:*",
+		"@manacore/shared-llm": "workspace:^",
 		"@manacore/shared-nestjs-auth": "workspace:*",
 		"@manacore/shared-nestjs-health": "workspace:*",
 		"@manacore/shared-nestjs-metrics": "workspace:*",
 		"@manacore/shared-nestjs-setup": "workspace:*",
-		"@google/generative-ai": "^0.21.0",
 		"@nestjs/common": "^10.4.15",
 		"@nestjs/config": "^3.3.0",
 		"@nestjs/core": "^10.4.15",
 		"@nestjs/platform-express": "^10.4.15",
+		"@nutriphi/shared": "workspace:*",
 		"class-transformer": "^0.5.1",
 		"class-validator": "^0.14.1",
 		"dotenv": "^16.4.7",
--- a/apps/nutriphi/apps/backend/src/analysis/gemini.service.ts
+++ b/apps/nutriphi/apps/backend/src/analysis/gemini.service.ts
@ -1,5 +1,5 @@
-import { Injectable, OnModuleInit, Logger } from '@nestjs/common';
-import { ConfigService } from '@nestjs/config';
+import { Injectable, Logger } from '@nestjs/common';
+import { LlmClientService } from '@manacore/shared-llm';
 import type { AIAnalysisResult } from '../types/nutrition.types';

 const ANALYSIS_PROMPT = `Du bist ein Ernährungsexperte. Analysiere das Bild dieser Mahlzeit und liefere eine detaillierte Nährwertanalyse.
@ -75,95 +75,28 @@ Antworte NUR mit einem validen JSON-Objekt im folgenden Format:
 }`;

@Injectable()
-export class GeminiService implements OnModuleInit {
+export class GeminiService {
 	private readonly logger = new Logger(GeminiService.name);
-	private manaLlmUrl: string | null = null;
-	private readonly visionModel = 'ollama/llava:7b';
-	private readonly textModel = 'ollama/gemma3:4b';

-	constructor(private configService: ConfigService) {}
-
-	onModuleInit() {
-		this.manaLlmUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
-		this.logger.log(`NutriPhi AI using mana-llm at ${this.manaLlmUrl}`);
-	}
+	constructor(private readonly llm: LlmClientService) {}

 	async analyzeImage(imageBase64: string, mimeType = 'image/jpeg'): Promise<AIAnalysisResult> {
-		if (!this.manaLlmUrl) {
-			throw new Error('mana-llm not configured');
-		}
-
-		const response = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
-			method: 'POST',
-			headers: { 'Content-Type': 'application/json' },
-			body: JSON.stringify({
-				model: this.visionModel,
-				messages: [
-					{
-						role: 'user',
-						content: [
-							{ type: 'text', text: ANALYSIS_PROMPT },
-							{
-								type: 'image_url',
-								image_url: { url: `data:${mimeType};base64,${imageBase64}` },
-							},
-						],
-					},
-				],
-				temperature: 0.3,
-			}),
-			signal: AbortSignal.timeout(120000),
-		});
-
-		if (!response.ok) {
-			const errorText = await response.text();
-			this.logger.error(`mana-llm vision error: ${response.status} - ${errorText}`);
-			throw new Error('Failed to analyze image');
-		}
-
-		const data = await response.json();
-		const text = data.choices?.[0]?.message?.content || '';
-
-		// Extract JSON from response
-		const jsonMatch = text.match(/\{[\s\S]*\}/);
-		if (!jsonMatch) {
-			throw new Error('Failed to parse AI response');
-		}
-
-		return JSON.parse(jsonMatch[0]) as AIAnalysisResult;
+		const { data } = await this.llm.visionJson<AIAnalysisResult>(
+			ANALYSIS_PROMPT,
+			imageBase64,
+			mimeType,
+			{ temperature: 0.3 }
+		);
+		return data;
 	}

 	async analyzeText(description: string): Promise<AIAnalysisResult> {
-		if (!this.manaLlmUrl) {
-			throw new Error('mana-llm not configured');
-		}
-
 		const prompt = TEXT_ANALYSIS_PROMPT.replace('{INPUT}', description);

-		const response = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
-			method: 'POST',
-			headers: { 'Content-Type': 'application/json' },
-			body: JSON.stringify({
-				model: this.textModel,
-				messages: [{ role: 'user', content: prompt }],
-				temperature: 0.3,
-			}),
-			signal: AbortSignal.timeout(60000),
+		const { data } = await this.llm.json<AIAnalysisResult>(prompt, {
+			temperature: 0.3,
+			timeout: 60_000,
 		});
-
-		if (!response.ok) {
-			throw new Error(`mana-llm error: ${response.status}`);
-		}
-
-		const data = await response.json();
-		const text = data.choices?.[0]?.message?.content || '';
-
-		// Extract JSON from response
-		const jsonMatch = text.match(/\{[\s\S]*\}/);
-		if (!jsonMatch) {
-			throw new Error('Failed to parse AI response');
-		}
-
-		return JSON.parse(jsonMatch[0]) as AIAnalysisResult;
+		return data;
 	}
 }
--- a/apps/nutriphi/apps/backend/src/app.module.ts
+++ b/apps/nutriphi/apps/backend/src/app.module.ts
@ -1,5 +1,6 @@
 import { Module } from '@nestjs/common';
-import { ConfigModule } from '@nestjs/config';
+import { ConfigModule, ConfigService } from '@nestjs/config';
+import { LlmModule } from '@manacore/shared-llm';
 import { DatabaseModule } from './db/database.module';
 import { HealthModule } from '@manacore/shared-nestjs-health';
 import { MetricsModule } from '@manacore/shared-nestjs-metrics';
@ -16,6 +17,14 @@ import { RecommendationsModule } from './recommendations/recommendations.module'
 			isGlobal: true,
 			envFilePath: ['.env', '.env.development'],
 		}),
+		LlmModule.forRootAsync({
+			imports: [ConfigModule],
+			useFactory: (config: ConfigService) => ({
+				manaLlmUrl: config.get('MANA_LLM_URL'),
+				debug: config.get('NODE_ENV') === 'development',
+			}),
+			inject: [ConfigService],
+		}),
 		DatabaseModule,
 		HealthModule.forRoot({ serviceName: 'nutriphi-backend' }),
 		MetricsModule.register({
--- a/apps/planta/apps/backend/package.json
+++ b/apps/planta/apps/backend/package.json
@ -18,8 +18,9 @@
 		"db:seed": "tsx src/db/seed.ts"
 	},
 	"dependencies": {
-		"@manacore/shared-error-tracking": "workspace:*",
 		"@google/generative-ai": "^0.21.0",
+		"@manacore/shared-error-tracking": "workspace:*",
+		"@manacore/shared-llm": "workspace:^",
 		"@manacore/shared-nestjs-auth": "workspace:*",
 		"@manacore/shared-nestjs-health": "workspace:*",
 		"@manacore/shared-nestjs-metrics": "workspace:*",
--- a/apps/planta/apps/backend/src/analysis/vision.service.ts
+++ b/apps/planta/apps/backend/src/analysis/vision.service.ts
@ -1,5 +1,5 @@
 import { Injectable, Logger } from '@nestjs/common';
-import { ConfigService } from '@nestjs/config';
+import { LlmClientService } from '@manacore/shared-llm';
 import type { AnalysisResult } from '@planta/shared';

 const PLANT_ANALYSIS_PROMPT = `Du bist ein erfahrener Botaniker und Pflanzenexperte. Analysiere dieses Pflanzenfoto und erstelle einen detaillierten Steckbrief.
@ -43,70 +43,32 @@ Falls du die Pflanze nicht identifizieren kannst, setze confidence auf 0 und sci
@Injectable()
 export class VisionService {
 	private readonly logger = new Logger(VisionService.name);
-	private readonly manaLlmUrl: string;
-	private readonly visionModel = 'ollama/llava:7b';

-	constructor(private configService: ConfigService) {
-		this.manaLlmUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
-		this.logger.log(`Planta Vision using mana-llm at ${this.manaLlmUrl}`);
-	}
+	constructor(private readonly llm: LlmClientService) {}

 	async analyzePlantImage(imageBuffer: Buffer, mimeType: string): Promise<AnalysisResult | null> {
 		try {
 			const base64 = imageBuffer.toString('base64');

-			const result = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify({
-					model: this.visionModel,
-					messages: [
-						{
-							role: 'user',
-							content: [
-								{ type: 'text', text: PLANT_ANALYSIS_PROMPT },
-								{
-									type: 'image_url',
-									image_url: { url: `data:${mimeType};base64,${base64}` },
-								},
-							],
-						},
-					],
+			const { data } = await this.llm.visionJson<AnalysisResult>(
+				PLANT_ANALYSIS_PROMPT,
+				base64,
+				mimeType,
+				{
 					temperature: 0.3,
-				}),
-				signal: AbortSignal.timeout(120000),
-			});
-
-			if (!result.ok) {
-				const errorText = await result.text();
-				this.logger.error(`mana-llm vision error: ${result.status} - ${errorText}`);
-				return null;
-			}
-
-			const data = await result.json();
-			const response = (data.choices?.[0]?.message?.content || '').trim();
-
-			this.logger.debug(`Gemini raw response: ${response}`);
-
-			// Parse JSON response - handle potential markdown code blocks
-			let jsonStr = response;
-			if (response.includes('```')) {
-				const match = response.match(/```(?:json)?\s*([\s\S]*?)```/);
-				if (match) {
-					jsonStr = match[1].trim();
+					validate: (raw) => {
+						const result = raw as AnalysisResult;
+						this.validateAnalysisResult(result);
+						return result;
+					},
 				}
-			}
-
-			const parsed = JSON.parse(jsonStr) as AnalysisResult;
-
-			// Validate and sanitize response
-			this.validateAnalysisResult(parsed);
-
-			this.logger.log(
-				`Plant identified: ${parsed.identification.scientificName} (${parsed.identification.confidence}% confidence)`
 			);

-			return parsed;
+			this.logger.log(
+				`Plant identified: ${data.identification.scientificName} (${data.identification.confidence}% confidence)`
+			);
+
+			return data;
 		} catch (error) {
 			this.logger.error(`Vision analysis failed: ${error}`);
 			return null;
@ -114,7 +76,6 @@ export class VisionService {
 	}

 	private validateAnalysisResult(result: AnalysisResult): void {
-		// Validate identification
 		if (!result.identification) {
 			result.identification = {
 				scientificName: 'Unbekannt',
@ -123,13 +84,11 @@ export class VisionService {
 			};
 		}

-		// Ensure confidence is within range
 		if (typeof result.identification.confidence !== 'number') {
 			result.identification.confidence = 0;
 		}
 		result.identification.confidence = Math.max(0, Math.min(100, result.identification.confidence));

-		// Validate health
 		if (!result.health) {
 			result.health = {
 				status: 'healthy',
@ -143,7 +102,6 @@ export class VisionService {
 			result.health.status = 'healthy';
 		}

-		// Validate care
 		if (!result.care) {
 			result.care = {
 				light: 'medium',
--- a/apps/planta/apps/backend/src/app.module.ts
+++ b/apps/planta/apps/backend/src/app.module.ts
@ -1,5 +1,6 @@
 import { Module } from '@nestjs/common';
-import { ConfigModule } from '@nestjs/config';
+import { ConfigModule, ConfigService } from '@nestjs/config';
+import { LlmModule } from '@manacore/shared-llm';
 import { DatabaseModule } from './db/database.module';
 import { HealthModule } from '@manacore/shared-nestjs-health';
 import { MetricsModule } from '@manacore/shared-nestjs-metrics';
@ -14,6 +15,14 @@ import { WateringModule } from './watering/watering.module';
 			isGlobal: true,
 			envFilePath: '.env',
 		}),
+		LlmModule.forRootAsync({
+			imports: [ConfigModule],
+			useFactory: (config: ConfigService) => ({
+				manaLlmUrl: config.get('MANA_LLM_URL'),
+				debug: config.get('NODE_ENV') === 'development',
+			}),
+			inject: [ConfigService],
+		}),
 		DatabaseModule,
 		HealthModule.forRoot({ serviceName: 'planta-backend' }),
 		MetricsModule.register({
--- a/apps/traces/apps/backend/package.json
+++ b/apps/traces/apps/backend/package.json
@ -17,8 +17,9 @@
 		"db:studio": "drizzle-kit studio"
 	},
 	"dependencies": {
-		"@manacore/shared-error-tracking": "workspace:*",
 		"@manacore/nestjs-integration": "workspace:*",
+		"@manacore/shared-error-tracking": "workspace:*",
+		"@manacore/shared-llm": "workspace:^",
 		"@manacore/shared-nestjs-auth": "workspace:*",
 		"@manacore/shared-nestjs-health": "workspace:*",
 		"@manacore/shared-nestjs-metrics": "workspace:*",
--- a/apps/traces/apps/backend/src/app.module.ts
+++ b/apps/traces/apps/backend/src/app.module.ts
@ -1,5 +1,6 @@
 import { Module } from '@nestjs/common';
 import { ConfigModule, ConfigService } from '@nestjs/config';
+import { LlmModule } from '@manacore/shared-llm';
 import { MetricsModule } from '@manacore/shared-nestjs-metrics';
 import { ManaCoreModule } from '@manacore/nestjs-integration';
 import { HealthModule } from '@manacore/shared-nestjs-health';
@ -25,6 +26,14 @@ import { GuideModule } from './guide/guide.module';
 			}),
 			inject: [ConfigService],
 		}),
+		LlmModule.forRootAsync({
+			imports: [ConfigModule],
+			useFactory: (config: ConfigService) => ({
+				manaLlmUrl: config.get('MANA_LLM_URL'),
+				debug: config.get('NODE_ENV') === 'development',
+			}),
+			inject: [ConfigService],
+		}),
 		MetricsModule.register({
 			prefix: 'traces_',
 			excludePaths: ['/health'],
--- a/apps/traces/apps/backend/src/guide/guide.service.ts
+++ b/apps/traces/apps/backend/src/guide/guide.service.ts
@ -2,6 +2,7 @@ import { Injectable, Inject, NotFoundException, ForbiddenException, Logger } fro
 import { ConfigService } from '@nestjs/config';
 import { eq, and, desc } from 'drizzle-orm';
 import { CreditClientService } from '@manacore/nestjs-integration';
+import { LlmClientService } from '@manacore/shared-llm';
 import { DATABASE_CONNECTION } from '../db/database.module';
 import type { Database } from '../db/connection';
 import { guides, guidePois, pois, cities } from '../db/schema';
@ -18,7 +19,8 @@ export class GuideService {
 		private readonly configService: ConfigService,
 		private readonly cityService: CityService,
 		private readonly poiService: PoiService,
-		private readonly creditClient: CreditClientService
+		private readonly creditClient: CreditClientService,
+		private readonly llm: LlmClientService
 	) {}

 	async generateGuide(userId: string, request: GenerateGuideRequest) {
@ -135,35 +137,20 @@ export class GuideService {

 		// Step 3: Enrich POIs with AI summaries
 		this.logger.log(`[${guideId}] Step 3: Content enrichment`);
-		if (manaLlmUrl) {
-			for (const poi of nearbyPois) {
-				if (!poi.aiSummary) {
-					try {
-						const prompt =
-							language === 'de'
-								? `Schreibe eine 200-Wort-Zusammenfassung über "${poi.name}" in ${city.name}. Fokus auf Baugeschichte, Architekturstil und interessante Anekdoten.`
-								: `Write a 200-word summary about "${poi.name}" in ${city.name}. Focus on architectural history, style, and interesting anecdotes.`;
+		for (const poi of nearbyPois) {
+			if (!poi.aiSummary) {
+				try {
+					const prompt =
+						language === 'de'
+							? `Schreibe eine 200-Wort-Zusammenfassung über "${poi.name}" in ${city.name}. Fokus auf Baugeschichte, Architekturstil und interessante Anekdoten.`
+							: `Write a 200-word summary about "${poi.name}" in ${city.name}. Focus on architectural history, style, and interesting anecdotes.`;

-						const llmResponse = await fetch(`${manaLlmUrl}/api/v1/chat/completions`, {
-							method: 'POST',
-							headers: { 'Content-Type': 'application/json' },
-							body: JSON.stringify({
-								messages: [{ role: 'user', content: prompt }],
-								model: 'default',
-								max_tokens: 500,
-							}),
-						});
-
-						if (llmResponse.ok) {
-							const data = await llmResponse.json();
-							const summary = data.choices?.[0]?.message?.content;
-							if (summary) {
-								await this.poiService.updateAiSummary(poi.id, summary, language);
-							}
-						}
-					} catch (err) {
-						this.logger.warn(`AI summary failed for POI ${poi.name}:`, err);
+					const result = await this.llm.chat(prompt, { maxTokens: 500 });
+					if (result.content) {
+						await this.poiService.updateAiSummary(poi.id, result.content, language);
 					}
+				} catch (err) {
+					this.logger.warn(`AI summary failed for POI ${poi.name}:`, err);
 				}
 			}
 		}
@ -197,43 +184,29 @@ export class GuideService {
 			const poi = sortedPois[i];
 			let narrative: string | null = null;

-			if (manaLlmUrl) {
-				try {
-					const prevStation = i > 0 ? sortedPois[i - 1].name : 'Startpunkt';
-					const distanceToPrev =
-						i > 0
-							? Math.round(
-									this.haversineDistance(
-										sortedPois[i - 1].latitude,
-										sortedPois[i - 1].longitude,
-										poi.latitude,
-										poi.longitude
-									)
+			try {
+				const prevStation = i > 0 ? sortedPois[i - 1].name : 'Startpunkt';
+				const distanceToPrev =
+					i > 0
+						? Math.round(
+								this.haversineDistance(
+									sortedPois[i - 1].latitude,
+									sortedPois[i - 1].longitude,
+									poi.latitude,
+									poi.longitude
 								)
-							: 0;
+							)
+						: 0;

-					const prompt =
-						language === 'de'
-							? `Du bist ein erfahrener Stadtführer in ${city.name}. Schreibe einen kurzen, lebendigen Stadtführer-Text (80-120 Wörter) über "${poi.name}" als Station ${i + 1} einer Stadtführung. ${i > 0 ? `Die vorherige Station war "${prevStation}" (${distanceToPrev}m entfernt).` : 'Dies ist die erste Station.'} Erwähne architektonische Details und eine interessante Anekdote.`
-							: `You are an experienced city guide in ${city.name}. Write a short, vivid guide text (80-120 words) about "${poi.name}" as station ${i + 1} of a walking tour. ${i > 0 ? `The previous station was "${prevStation}" (${distanceToPrev}m away).` : 'This is the first station.'} Mention architectural details and an interesting anecdote.`;
+				const prompt =
+					language === 'de'
+						? `Du bist ein erfahrener Stadtführer in ${city.name}. Schreibe einen kurzen, lebendigen Stadtführer-Text (80-120 Wörter) über "${poi.name}" als Station ${i + 1} einer Stadtführung. ${i > 0 ? `Die vorherige Station war "${prevStation}" (${distanceToPrev}m entfernt).` : 'Dies ist die erste Station.'} Erwähne architektonische Details und eine interessante Anekdote.`
+						: `You are an experienced city guide in ${city.name}. Write a short, vivid guide text (80-120 words) about "${poi.name}" as station ${i + 1} of a walking tour. ${i > 0 ? `The previous station was "${prevStation}" (${distanceToPrev}m away).` : 'This is the first station.'} Mention architectural details and an interesting anecdote.`;

-					const llmResponse = await fetch(`${manaLlmUrl}/api/v1/chat/completions`, {
-						method: 'POST',
-						headers: { 'Content-Type': 'application/json' },
-						body: JSON.stringify({
-							messages: [{ role: 'user', content: prompt }],
-							model: 'default',
-							max_tokens: 300,
-						}),
-					});
-
-					if (llmResponse.ok) {
-						const data = await llmResponse.json();
-						narrative = data.choices?.[0]?.message?.content || null;
-					}
-				} catch (err) {
-					this.logger.warn(`Narrative generation failed for POI ${poi.name}:`, err);
-				}
+				const result = await this.llm.chat(prompt, { maxTokens: 300 });
+				narrative = result.content || null;
+			} catch (err) {
+				this.logger.warn(`Narrative generation failed for POI ${poi.name}:`, err);
 			}

 			guidePoiRecords.push({
--- a/packages/bot-services/package.json
+++ b/packages/bot-services/package.json
@ -71,6 +71,7 @@
 		"prepublishOnly": "pnpm build"
 	},
 	"dependencies": {
+		"@manacore/shared-llm": "workspace:^",
 		"@nestjs/common": "^11.0.20",
 		"@nestjs/config": "^4.0.2",
 		"date-fns": "^4.1.0",
--- a/packages/bot-services/src/ai/ai.service.ts
+++ b/packages/bot-services/src/ai/ai.service.ts
@ -1,4 +1,6 @@
 import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
+import { LlmClient, resolveOptions } from '@manacore/shared-llm';
+import type { ModelInfo } from '@manacore/shared-llm';
 import {
 	OllamaModel,
 	ChatMessage,
@ -9,23 +11,34 @@ import {
 	SYSTEM_PROMPTS,
 	VISION_MODELS,
 	NON_CHAT_MODELS,
-	OllamaVersionResponse,
-	OllamaTagsResponse,
-	OllamaChatResponse,
 } from './types';

@Injectable()
 export class AiService implements OnModuleInit {
 	private readonly logger = new Logger(AiService.name);
 	private readonly config: AiServiceConfig;
+	private readonly llm: LlmClient;
 	private sessions: Map<string, UserAiSession> = new Map();

 	constructor(config?: Partial<AiServiceConfig>) {
 		this.config = {
-			baseUrl: config?.baseUrl ?? process.env.OLLAMA_URL ?? 'http://localhost:11434',
+			baseUrl:
+				config?.baseUrl ??
+				process.env.MANA_LLM_URL ??
+				process.env.OLLAMA_URL ??
+				'http://localhost:3025',
 			defaultModel: config?.defaultModel ?? process.env.OLLAMA_MODEL ?? 'gemma3:4b',
 			timeout: config?.timeout ?? parseInt(process.env.OLLAMA_TIMEOUT ?? '120000'),
 		};
+
+		this.llm = new LlmClient(
+			resolveOptions({
+				manaLlmUrl: this.config.baseUrl,
+				defaultModel: this.normalizeModel(this.config.defaultModel),
+				timeout: this.config.timeout,
+				maxRetries: 1,
+			})
+		);
 	}

 	async onModuleInit() {
@ -36,14 +49,15 @@ export class AiService implements OnModuleInit {

 	async checkConnection(): Promise<boolean> {
 		try {
-			const response = await fetch(`${this.config.baseUrl}/api/version`, {
-				signal: AbortSignal.timeout(5000),
-			});
-			const data = (await response.json()) as OllamaVersionResponse;
-			this.logger.log(`Ollama connected: v${data.version}`);
-			return true;
+			const health = await this.llm.health();
+			const isConnected = health.status === 'healthy' || health.status === 'degraded';
+			if (isConnected) {
+				const providers = Object.keys(health.providers || {}).join(', ');
+				this.logger.log(`mana-llm connected: ${health.status}, providers: ${providers}`);
+			}
+			return isConnected;
 		} catch (error) {
-			this.logger.error(`Failed to connect to Ollama at ${this.config.baseUrl}:`, error);
+			this.logger.error(`Failed to connect to mana-llm at ${this.config.baseUrl}:`, error);
 			return false;
 		}
 	}
@ -52,9 +66,12 @@ export class AiService implements OnModuleInit {

 	async listModels(): Promise<OllamaModel[]> {
 		try {
-			const response = await fetch(`${this.config.baseUrl}/api/tags`);
-			const data = (await response.json()) as OllamaTagsResponse;
-			return data.models || [];
+			const models = await this.llm.listModels();
+			return models.map((m: ModelInfo) => ({
+				name: m.id,
+				size: 0,
+				modified_at: new Date(m.created * 1000).toISOString(),
+			}));
 		} catch (error) {
 			this.logger.error('Failed to list models:', error);
 			return [];
@ -79,55 +96,38 @@ export class AiService implements OnModuleInit {

 	async chat(messages: ChatMessage[], options?: ChatOptions): Promise<ChatResult> {
 		const model = options?.model ?? this.config.defaultModel;
+		const normalizedModel = this.normalizeModel(model);

-		try {
-			const response = await fetch(`${this.config.baseUrl}/api/chat`, {
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify({
-					model,
-					messages,
-					stream: false,
-					options: {
-						temperature: options?.temperature,
-						num_predict: options?.maxTokens,
-					},
-				}),
-				signal: AbortSignal.timeout(this.config.timeout),
-			});
-
-			if (!response.ok) {
-				throw new Error(`Ollama API error: ${response.status}`);
+		const result = await this.llm.chatMessages(
+			messages.map((m) => ({
+				role: m.role,
+				content: m.content,
+			})),
+			{
+				model: normalizedModel,
+				temperature: options?.temperature,
+				maxTokens: options?.maxTokens,
 			}
+		);

-			const data = (await response.json()) as OllamaChatResponse;
+		const meta = {
+			model,
+			evalCount: result.usage.completion_tokens,
+			evalDuration: undefined as number | undefined,
+			tokensPerSecond: undefined as number | undefined,
+		};

-			const meta = {
-				model,
-				evalCount: data.eval_count,
-				evalDuration: data.eval_duration,
-				tokensPerSecond:
-					data.eval_count && data.eval_duration
-						? (data.eval_count / data.eval_duration) * 1e9
-						: undefined,
-			};
-
-			if (meta.tokensPerSecond) {
-				this.logger.debug(
-					`Generated ${meta.evalCount} tokens at ${meta.tokensPerSecond.toFixed(1)} t/s`
-				);
-			}
-
-			return {
-				content: data.message?.content || '',
-				meta,
-			};
-		} catch (error) {
-			if (error instanceof Error && error.name === 'TimeoutError') {
-				throw new Error('Ollama Timeout - Antwort dauerte zu lange');
-			}
-			throw error;
+		if (meta.evalCount && result.latencyMs > 0) {
+			meta.tokensPerSecond = (meta.evalCount / result.latencyMs) * 1000;
+			this.logger.debug(
+				`Generated ${meta.evalCount} tokens at ${meta.tokensPerSecond.toFixed(1)} t/s`
+			);
 		}
+
+		return {
+			content: result.content,
+			meta,
+		};
 	}

 	async chatSimple(userId: string, message: string, options?: ChatOptions): Promise<string> {
@ -162,51 +162,27 @@ export class AiService implements OnModuleInit {

 	async chatWithImage(prompt: string, imageBase64: string, model?: string): Promise<ChatResult> {
 		const selectedModel = model ?? this.config.defaultModel;
+		const normalizedModel = this.normalizeModel(selectedModel);

-		try {
-			const response = await fetch(`${this.config.baseUrl}/api/chat`, {
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify({
-					model: selectedModel,
-					messages: [
-						{
-							role: 'user',
-							content: prompt,
-							images: [imageBase64],
-						},
-					],
-					stream: false,
-				}),
-				signal: AbortSignal.timeout(this.config.timeout),
-			});
+		const result = await this.llm.vision(prompt, imageBase64, 'image/png', {
+			model: normalizedModel,
+		});

-			if (!response.ok) {
-				throw new Error(`Ollama API error: ${response.status}`);
-			}
+		const meta = {
+			model: selectedModel,
+			evalCount: result.usage.completion_tokens,
+			evalDuration: undefined as number | undefined,
+			tokensPerSecond: undefined as number | undefined,
+		};

-			const data = (await response.json()) as OllamaChatResponse;
-
-			const meta = {
-				model: selectedModel,
-				evalCount: data.eval_count,
-				evalDuration: data.eval_duration,
-				tokensPerSecond:
-					data.eval_count && data.eval_duration
-						? (data.eval_count / data.eval_duration) * 1e9
-						: undefined,
-			};
-
-			return {
-				content: data.message?.content || '',
-				meta,
-			};
-		} catch (error) {
-			if (error instanceof Error && error.name === 'TimeoutError') {
-				throw new Error('Ollama Timeout - Bildanalyse dauerte zu lange');
-			}
-			throw error;
+		if (meta.evalCount && result.latencyMs > 0) {
+			meta.tokensPerSecond = (meta.evalCount / result.latencyMs) * 1000;
 		}
+
+		return {
+			content: result.content,
+			meta,
+		};
 	}

 	// ===== Compare Models =====
@ -256,7 +232,7 @@ export class AiService implements OnModuleInit {
 	setSessionModel(userId: string, model: string): void {
 		const session = this.getSession(userId);
 		session.model = model;
-		session.history = []; // Clear history when switching models
+		session.history = [];
 	}

 	setSessionSystemPrompt(userId: string, prompt: string): void {
@ -303,4 +279,9 @@ export class AiService implements OnModuleInit {
 		const entry = Object.entries(SYSTEM_PROMPTS).find(([_, v]) => v === session.systemPrompt);
 		return entry ? entry[0] : 'custom';
 	}
+
+	private normalizeModel(model: string): string {
+		if (model.includes('/')) return model;
+		return `ollama/${model}`;
+	}
 }
--- a/packages/credit-operations/src/index.ts
+++ b/packages/credit-operations/src/index.ts
@ -44,6 +44,18 @@ export enum CreditOperationType {
 	// Zitare - AI explanations
 	AI_QUOTE_EXPLANATION = 'ai_quote_explanation',

+	// Planta - Plant analysis
+	AI_PLANT_ANALYSIS = 'ai_plant_analysis',
+
+	// Traces - City guide generation
+	AI_GUIDE_GENERATION = 'ai_guide_generation',
+
+	// Context - AI text generation
+	AI_CONTEXT_GENERATION = 'ai_context_generation',
+
+	// Matrix Bots - Bot chat
+	AI_BOT_CHAT = 'ai_bot_chat',
+
 	// General AI features
 	AI_SMART_SCHEDULING = 'ai_smart_scheduling',
 	AI_SUGGESTIONS = 'ai_suggestions',
@ -117,6 +129,11 @@ export const CREDIT_COSTS: Record<CreditOperationType, number> = {

 	[CreditOperationType.AI_QUOTE_EXPLANATION]: 2,

+	[CreditOperationType.AI_PLANT_ANALYSIS]: 2,
+	[CreditOperationType.AI_GUIDE_GENERATION]: 5,
+	[CreditOperationType.AI_CONTEXT_GENERATION]: 2,
+	[CreditOperationType.AI_BOT_CHAT]: 0.1,
+
 	[CreditOperationType.AI_SMART_SCHEDULING]: 2,
 	[CreditOperationType.AI_SUGGESTIONS]: 2,
 	[CreditOperationType.AI_ENRICHMENT]: 2,
@ -271,6 +288,38 @@ export const OPERATION_METADATA: Record<CreditOperationType, OperationMetadata>
 		app: 'zitare',
 	},

+	// Planta
+	[CreditOperationType.AI_PLANT_ANALYSIS]: {
+		name: 'Plant Analysis',
+		description: 'Identify and analyze a plant from a photo',
+		category: CreditCategory.AI,
+		app: 'planta',
+	},
+
+	// Traces
+	[CreditOperationType.AI_GUIDE_GENERATION]: {
+		name: 'City Guide Generation',
+		description: 'Generate an AI-powered city walking guide',
+		category: CreditCategory.AI,
+		app: 'traces',
+	},
+
+	// Context
+	[CreditOperationType.AI_CONTEXT_GENERATION]: {
+		name: 'AI Text Generation',
+		description: 'Generate or transform text with AI',
+		category: CreditCategory.AI,
+		app: 'context',
+	},
+
+	// Matrix Bots
+	[CreditOperationType.AI_BOT_CHAT]: {
+		name: 'Bot Chat Message',
+		description: 'Chat with AI via Matrix bot',
+		category: CreditCategory.AI,
+		app: 'matrix',
+	},
+
 	// General AI
 	[CreditOperationType.AI_SMART_SCHEDULING]: {
 		name: 'Smart Scheduling',
--- a/packages/shared-llm/package.json
+++ b/packages/shared-llm/package.json
@ -0,0 +1,47 @@
+{
+	"name": "@manacore/shared-llm",
+	"version": "1.0.0",
+	"private": true,
+	"description": "Unified LLM client for all Mana backends via mana-llm service",
+	"main": "dist/index.js",
+	"types": "dist/index.d.ts",
+	"exports": {
+		".": {
+			"types": "./dist/index.d.ts",
+			"import": "./dist/index.js",
+			"require": "./dist/index.js"
+		},
+		"./standalone": {
+			"types": "./dist/standalone.d.ts",
+			"import": "./dist/standalone.js",
+			"require": "./dist/standalone.js"
+		}
+	},
+	"scripts": {
+		"build": "tsc",
+		"dev": "tsc --watch",
+		"clean": "rm -rf dist",
+		"type-check": "tsc --noEmit",
+		"test": "vitest run"
+	},
+	"dependencies": {
+		"@nestjs/common": "^10.0.0 || ^11.0.0",
+		"@nestjs/config": "^3.0.0 || ^4.0.0",
+		"@nestjs/core": "^10.0.0 || ^11.0.0",
+		"reflect-metadata": "^0.1.13 || ^0.2.0",
+		"rxjs": "^7.0.0"
+	},
+	"peerDependencies": {
+		"@nestjs/common": "^10.0.0 || ^11.0.0",
+		"@nestjs/config": "^3.0.0 || ^4.0.0",
+		"@nestjs/core": "^10.0.0 || ^11.0.0"
+	},
+	"devDependencies": {
+		"@types/node": "^20.0.0",
+		"typescript": "^5.0.0",
+		"vitest": "^2.0.0"
+	},
+	"files": [
+		"dist"
+	]
+}
--- a/packages/shared-llm/src/tests/json-extractor.spec.ts
+++ b/packages/shared-llm/src/tests/json-extractor.spec.ts
@ -0,0 +1,119 @@
+import { describe, it, expect } from 'vitest';
+import { extractJson } from '../utils/json-extractor';
+
+describe('extractJson', () => {
+	it('parses direct JSON object', () => {
+		const result = extractJson('{"name": "test", "value": 42}');
+		expect(result).toEqual({ name: 'test', value: 42 });
+	});
+
+	it('parses direct JSON array', () => {
+		const result = extractJson('[1, 2, 3]');
+		expect(result).toEqual([1, 2, 3]);
+	});
+
+	it('strips markdown json code fence', () => {
+		const input = '```json\n{"category": "bug", "title": "Fix login"}\n```';
+		const result = extractJson(input);
+		expect(result).toEqual({ category: 'bug', title: 'Fix login' });
+	});
+
+	it('strips markdown code fence without json label', () => {
+		const input = '```\n{"key": "value"}\n```';
+		const result = extractJson(input);
+		expect(result).toEqual({ key: 'value' });
+	});
+
+	it('extracts JSON from surrounding text', () => {
+		const input =
+			'Here is the analysis:\n{"confidence": 0.95, "species": "Rose"}\nHope this helps!';
+		const result = extractJson(input);
+		expect(result).toEqual({ confidence: 0.95, species: 'Rose' });
+	});
+
+	it('extracts JSON array from surrounding text', () => {
+		const input = 'The items are: [1, 2, 3] as requested.';
+		const result = extractJson(input);
+		expect(result).toEqual([1, 2, 3]);
+	});
+
+	it('handles nested JSON objects', () => {
+		const input = '{"outer": {"inner": {"deep": true}}, "list": [1, 2]}';
+		const result = extractJson(input);
+		expect(result).toEqual({ outer: { inner: { deep: true } }, list: [1, 2] });
+	});
+
+	it('handles JSON with escaped quotes in strings', () => {
+		const input = '{"text": "He said \\"hello\\""}';
+		const result = extractJson(input);
+		expect(result).toEqual({ text: 'He said "hello"' });
+	});
+
+	it('handles JSON with braces inside strings', () => {
+		const input = 'Result: {"code": "if (x) { return }"}';
+		const result = extractJson(input);
+		expect(result).toEqual({ code: 'if (x) { return }' });
+	});
+
+	it('trims whitespace before parsing', () => {
+		const input = '  \n  {"key": "value"}  \n  ';
+		const result = extractJson(input);
+		expect(result).toEqual({ key: 'value' });
+	});
+
+	it('applies validation function on success', () => {
+		const validate = (data: unknown) => {
+			const obj = data as { name: string };
+			if (!obj.name) throw new Error('missing name');
+			return obj;
+		};
+		const result = extractJson('{"name": "test"}', validate);
+		expect(result).toEqual({ name: 'test' });
+	});
+
+	it('throws when validation fails', () => {
+		const validate = (data: unknown) => {
+			const obj = data as { name?: string };
+			if (!obj.name) throw new Error('missing name');
+			return obj;
+		};
+		expect(() => extractJson('{"value": 123}', validate)).toThrow();
+	});
+
+	it('throws on completely invalid input', () => {
+		expect(() => extractJson('This is just plain text with no JSON')).toThrow(
+			'Failed to extract JSON'
+		);
+	});
+
+	it('throws on empty input', () => {
+		expect(() => extractJson('')).toThrow('Failed to extract JSON');
+	});
+
+	it('handles real-world LLM response with preamble', () => {
+		const input = `Based on my analysis, here is the result:
+
+\`\`\`json
+{
+  "foods": [
+    {"name": "Apple", "calories": 95, "protein": 0.5}
+  ],
+  "totalCalories": 95,
+  "confidence": 0.9
+}
+\`\`\`
+
+This analysis is based on the image provided.`;
+
+		const result = extractJson<{ foods: unknown[]; totalCalories: number }>(input);
+		expect(result.totalCalories).toBe(95);
+		expect(result.foods).toHaveLength(1);
+	});
+
+	it('prefers object over array when both exist', () => {
+		// Direct parse fails, fence fails, tries object first
+		const input = 'Some text {"key": "val"} and [1, 2, 3]';
+		const result = extractJson(input);
+		expect(result).toEqual({ key: 'val' });
+	});
+});
--- a/packages/shared-llm/src/tests/llm-client.spec.ts
+++ b/packages/shared-llm/src/tests/llm-client.spec.ts
@ -0,0 +1,277 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { LlmClient } from '../llm-client';
+import type { ResolvedLlmOptions } from '../interfaces/llm-options.interface';
+import type { ChatCompletionResponse } from '../types/openai-compat.types';
+
+const mockFetch = vi.fn();
+vi.stubGlobal('fetch', mockFetch);
+
+const DEFAULT_OPTIONS: ResolvedLlmOptions = {
+	manaLlmUrl: 'http://localhost:3025',
+	defaultModel: 'ollama/gemma3:4b',
+	defaultVisionModel: 'ollama/llava:7b',
+	timeout: 30_000,
+	maxRetries: 0, // No retries in tests for simplicity
+	debug: false,
+};
+
+function mockCompletionResponse(
+	content: string,
+	model = 'ollama/gemma3:4b'
+): ChatCompletionResponse {
+	return {
+		id: 'chatcmpl-test123',
+		object: 'chat.completion',
+		created: Date.now(),
+		model,
+		choices: [{ index: 0, message: { role: 'assistant', content }, finish_reason: 'stop' }],
+		usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
+	};
+}
+
+function mockFetchOk(body: unknown): void {
+	mockFetch.mockResolvedValueOnce({
+		ok: true,
+		status: 200,
+		json: () => Promise.resolve(body),
+		text: () => Promise.resolve(JSON.stringify(body)),
+	} as unknown as Response);
+}
+
+function mockFetchError(status: number, body = ''): void {
+	mockFetch.mockResolvedValueOnce({
+		ok: false,
+		status,
+		statusText: `Error ${status}`,
+		json: () => Promise.resolve({}),
+		text: () => Promise.resolve(body),
+	} as unknown as Response);
+}
+
+describe('LlmClient', () => {
+	let client: LlmClient;
+
+	beforeEach(() => {
+		vi.clearAllMocks();
+		client = new LlmClient(DEFAULT_OPTIONS);
+	});
+
+	describe('chat', () => {
+		it('sends correct request body', async () => {
+			mockFetchOk(mockCompletionResponse('Hello!'));
+
+			await client.chat('Hi there');
+
+			expect(mockFetch).toHaveBeenCalledTimes(1);
+			const [url, init] = mockFetch.mock.calls[0];
+			expect(url).toBe('http://localhost:3025/v1/chat/completions');
+
+			const body = JSON.parse(init.body);
+			expect(body.model).toBe('ollama/gemma3:4b');
+			expect(body.messages).toEqual([{ role: 'user', content: 'Hi there' }]);
+			expect(body.stream).toBe(false);
+		});
+
+		it('includes system prompt when provided', async () => {
+			mockFetchOk(mockCompletionResponse('Response'));
+
+			await client.chat('Question', { systemPrompt: 'You are helpful.' });
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.messages).toEqual([
+				{ role: 'system', content: 'You are helpful.' },
+				{ role: 'user', content: 'Question' },
+			]);
+		});
+
+		it('uses custom model and temperature', async () => {
+			mockFetchOk(mockCompletionResponse('Response'));
+
+			await client.chat('Prompt', { model: 'openrouter/gpt-4o', temperature: 0.3 });
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.model).toBe('openrouter/gpt-4o');
+			expect(body.temperature).toBe(0.3);
+		});
+
+		it('returns ChatResult with content and usage', async () => {
+			mockFetchOk(mockCompletionResponse('Generated text'));
+
+			const result = await client.chat('Prompt');
+
+			expect(result.content).toBe('Generated text');
+			expect(result.model).toBe('ollama/gemma3:4b');
+			expect(result.usage.total_tokens).toBe(30);
+			expect(result.latencyMs).toBeGreaterThanOrEqual(0);
+		});
+
+		it('throws on error response', async () => {
+			mockFetchError(500, 'Internal Server Error');
+
+			await expect(client.chat('Prompt')).rejects.toThrow('mana-llm error 500');
+		});
+	});
+
+	describe('json', () => {
+		it('extracts JSON from response', async () => {
+			mockFetchOk(mockCompletionResponse('{"category": "bug", "title": "Fix it"}'));
+
+			const result = await client.json<{ category: string; title: string }>('Analyze this');
+
+			expect(result.data).toEqual({ category: 'bug', title: 'Fix it' });
+			expect(result.content).toBe('{"category": "bug", "title": "Fix it"}');
+		});
+
+		it('extracts JSON from markdown-wrapped response', async () => {
+			mockFetchOk(mockCompletionResponse('```json\n{"key": "value"}\n```'));
+
+			const result = await client.json('Parse this');
+			expect(result.data).toEqual({ key: 'value' });
+		});
+
+		it('applies validation function', async () => {
+			mockFetchOk(mockCompletionResponse('{"name": "test"}'));
+
+			const validate = (data: unknown) => {
+				const obj = data as { name: string };
+				if (typeof obj.name !== 'string') throw new Error('invalid');
+				return obj;
+			};
+
+			const result = await client.json('Prompt', { validate });
+			expect(result.data.name).toBe('test');
+		});
+
+		it('retries JSON extraction on parse failure', async () => {
+			// First attempt returns bad JSON, second returns good
+			mockFetchOk(mockCompletionResponse('not json at all'));
+			mockFetchOk(mockCompletionResponse('{"valid": true}'));
+
+			const result = await client.json('Prompt', { jsonRetries: 1 });
+			expect(result.data).toEqual({ valid: true });
+			expect(mockFetch).toHaveBeenCalledTimes(2);
+		});
+	});
+
+	describe('vision', () => {
+		it('builds multimodal message with base64 image', async () => {
+			mockFetchOk(mockCompletionResponse('A rose'));
+
+			await client.vision('What is this?', 'abc123base64', 'image/jpeg');
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.model).toBe('ollama/llava:7b');
+			expect(body.messages[0].content).toEqual([
+				{ type: 'text', text: 'What is this?' },
+				{ type: 'image_url', image_url: { url: 'data:image/jpeg;base64,abc123base64' } },
+			]);
+		});
+
+		it('uses data URL as-is if already formatted', async () => {
+			mockFetchOk(mockCompletionResponse('A cat'));
+
+			await client.vision('What?', 'data:image/png;base64,xyz');
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			const imageUrl = body.messages[0].content[1].image_url.url;
+			expect(imageUrl).toBe('data:image/png;base64,xyz');
+		});
+
+		it('uses custom vision model when specified', async () => {
+			mockFetchOk(mockCompletionResponse('Result'));
+
+			await client.vision('Prompt', 'img', 'image/jpeg', {
+				visionModel: 'ollama/qwen3-vl:4b',
+			});
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.model).toBe('ollama/qwen3-vl:4b');
+		});
+	});
+
+	describe('visionJson', () => {
+		it('extracts JSON from vision response', async () => {
+			mockFetchOk(mockCompletionResponse('```json\n{"species": "Rose", "confidence": 0.95}\n```'));
+
+			const result = await client.visionJson<{ species: string }>(
+				'Identify plant',
+				'imgdata',
+				'image/jpeg'
+			);
+
+			expect(result.data.species).toBe('Rose');
+		});
+	});
+
+	describe('health', () => {
+		it('returns health status', async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				status: 200,
+				json: () =>
+					Promise.resolve({
+						status: 'healthy',
+						providers: { ollama: { status: 'healthy' } },
+					}),
+			} as unknown as Response);
+
+			const health = await client.health();
+			expect(health.status).toBe('healthy');
+		});
+
+		it('returns unhealthy on network error', async () => {
+			mockFetch.mockRejectedValueOnce(new Error('ECONNREFUSED'));
+
+			const health = await client.health();
+			expect(health.status).toBe('unhealthy');
+		});
+	});
+
+	describe('listModels', () => {
+		it('returns model list', async () => {
+			mockFetch.mockResolvedValueOnce({
+				ok: true,
+				status: 200,
+				json: () =>
+					Promise.resolve({
+						data: [{ id: 'ollama/gemma3:4b', object: 'model', created: 0, owned_by: 'ollama' }],
+					}),
+			} as unknown as Response);
+
+			const models = await client.listModels();
+			expect(models).toHaveLength(1);
+			expect(models[0].id).toBe('ollama/gemma3:4b');
+		});
+	});
+
+	describe('chatMessages', () => {
+		it('sends full message history', async () => {
+			mockFetchOk(mockCompletionResponse('Answer'));
+
+			await client.chatMessages([
+				{ role: 'system', content: 'Be brief.' },
+				{ role: 'user', content: 'Hello' },
+				{ role: 'assistant', content: 'Hi!' },
+				{ role: 'user', content: 'How are you?' },
+			]);
+
+			const body = JSON.parse(mockFetch.mock.calls[0][1].body);
+			expect(body.messages).toHaveLength(4);
+		});
+	});
+
+	describe('embed', () => {
+		it('sends embedding request', async () => {
+			mockFetchOk({
+				object: 'list',
+				data: [{ object: 'embedding', index: 0, embedding: [0.1, 0.2, 0.3] }],
+				model: 'ollama/gemma3:4b',
+				usage: { prompt_tokens: 5, completion_tokens: 0, total_tokens: 5 },
+			});
+
+			const result = await client.embed('Hello world');
+			expect(result.embeddings).toHaveLength(1);
+			expect(result.embeddings[0]).toEqual([0.1, 0.2, 0.3]);
+		});
+	});
+});
--- a/packages/shared-llm/src/tests/retry.spec.ts
+++ b/packages/shared-llm/src/tests/retry.spec.ts
@ -0,0 +1,118 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { retryFetch } from '../utils/retry';
+
+// Mock global fetch
+const mockFetch = vi.fn();
+vi.stubGlobal('fetch', mockFetch);
+
+function mockResponse(status: number, body = ''): Response {
+	return {
+		ok: status >= 200 && status < 300,
+		status,
+		statusText: `Status ${status}`,
+		text: () => Promise.resolve(body),
+		json: () => Promise.resolve(JSON.parse(body || '{}')),
+		headers: new Headers(),
+	} as unknown as Response;
+}
+
+describe('retryFetch', () => {
+	beforeEach(() => {
+		vi.clearAllMocks();
+	});
+
+	it('returns on first successful attempt', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(200, '{"ok": true}'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.ok).toBe(true);
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+
+	it('retries on 503 and succeeds', async () => {
+		mockFetch
+			.mockResolvedValueOnce(mockResponse(503))
+			.mockResolvedValueOnce(mockResponse(200, '{}'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.ok).toBe(true);
+		expect(mockFetch).toHaveBeenCalledTimes(2);
+	});
+
+	it('retries on 429 rate limit', async () => {
+		mockFetch
+			.mockResolvedValueOnce(mockResponse(429))
+			.mockResolvedValueOnce(mockResponse(200, '{}'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.ok).toBe(true);
+		expect(mockFetch).toHaveBeenCalledTimes(2);
+	});
+
+	it('retries on network error and succeeds', async () => {
+		mockFetch
+			.mockRejectedValueOnce(new Error('ECONNREFUSED'))
+			.mockResolvedValueOnce(mockResponse(200, '{}'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.ok).toBe(true);
+		expect(mockFetch).toHaveBeenCalledTimes(2);
+	});
+
+	it('does NOT retry on 400 client error', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(400, 'Bad Request'));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.status).toBe(400);
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+
+	it('does NOT retry on 401 unauthorized', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(401));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.status).toBe(401);
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+
+	it('does NOT retry on 404 not found', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(404));
+
+		const response = await retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 });
+		expect(response.status).toBe(404);
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+
+	it('throws after exhausting all retries', async () => {
+		mockFetch
+			.mockResolvedValueOnce(mockResponse(503))
+			.mockResolvedValueOnce(mockResponse(503))
+			.mockResolvedValueOnce(mockResponse(503));
+
+		await expect(retryFetch('http://test', {}, { maxRetries: 2, baseDelay: 10 })).rejects.toThrow(
+			'HTTP 503'
+		);
+
+		expect(mockFetch).toHaveBeenCalledTimes(3); // 1 initial + 2 retries
+	});
+
+	it('throws after exhausting retries on network errors', async () => {
+		mockFetch
+			.mockRejectedValueOnce(new Error('ECONNREFUSED'))
+			.mockRejectedValueOnce(new Error('ECONNREFUSED'));
+
+		await expect(retryFetch('http://test', {}, { maxRetries: 1, baseDelay: 10 })).rejects.toThrow(
+			'ECONNREFUSED'
+		);
+
+		expect(mockFetch).toHaveBeenCalledTimes(2);
+	});
+
+	it('works with maxRetries: 0 (no retries)', async () => {
+		mockFetch.mockResolvedValueOnce(mockResponse(503));
+
+		await expect(retryFetch('http://test', {}, { maxRetries: 0, baseDelay: 10 })).rejects.toThrow();
+
+		expect(mockFetch).toHaveBeenCalledTimes(1);
+	});
+});
--- a/packages/shared-llm/src/index.ts
+++ b/packages/shared-llm/src/index.ts
@ -0,0 +1,35 @@
+// Module
+export { LlmModule } from './llm.module';
+export { LlmClientService } from './llm-client.service';
+export { LLM_MODULE_OPTIONS } from './llm.constants';
+
+// Core client (for advanced use cases)
+export { LlmClient } from './llm-client';
+
+// Interfaces
+export type {
+	LlmModuleOptions,
+	LlmModuleAsyncOptions,
+	LlmOptionsFactory,
+	ResolvedLlmOptions,
+} from './interfaces';
+export { resolveOptions } from './interfaces';
+
+// Types
+export type {
+	ChatMessage,
+	ContentPart,
+	TextContentPart,
+	ImageContentPart,
+	ChatOptions,
+	JsonOptions,
+	VisionOptions,
+	TokenUsage,
+	ChatResult,
+	JsonResult,
+	ModelInfo,
+	HealthStatus,
+} from './types';
+
+// Utilities
+export { extractJson } from './utils';
--- a/packages/shared-llm/src/interfaces/index.ts
+++ b/packages/shared-llm/src/interfaces/index.ts
@ -0,0 +1,8 @@
+export type {
+	LlmModuleOptions,
+	LlmModuleAsyncOptions,
+	LlmOptionsFactory,
+	ResolvedLlmOptions,
+} from './llm-options.interface';
+
+export { resolveOptions } from './llm-options.interface';
--- a/packages/shared-llm/src/interfaces/llm-options.interface.ts
+++ b/packages/shared-llm/src/interfaces/llm-options.interface.ts
@ -0,0 +1,47 @@
+import type { ModuleMetadata, Type } from '@nestjs/common';
+
+export interface LlmModuleOptions {
+	/** mana-llm service URL (default: http://localhost:3025) */
+	manaLlmUrl?: string;
+	/** Default text model (default: ollama/gemma3:4b) */
+	defaultModel?: string;
+	/** Default vision model (default: ollama/llava:7b) */
+	defaultVisionModel?: string;
+	/** Request timeout in ms (default: 120000) */
+	timeout?: number;
+	/** Max retries on transient failures (default: 2) */
+	maxRetries?: number;
+	/** Enable debug logging (default: false) */
+	debug?: boolean;
+}
+
+export interface LlmModuleAsyncOptions extends Pick<ModuleMetadata, 'imports'> {
+	useExisting?: Type<LlmOptionsFactory>;
+	useClass?: Type<LlmOptionsFactory>;
+	useFactory?: (...args: any[]) => Promise<LlmModuleOptions> | LlmModuleOptions;
+	inject?: any[];
+}
+
+export interface LlmOptionsFactory {
+	createLlmOptions(): Promise<LlmModuleOptions> | LlmModuleOptions;
+}
+
+export interface ResolvedLlmOptions {
+	manaLlmUrl: string;
+	defaultModel: string;
+	defaultVisionModel: string;
+	timeout: number;
+	maxRetries: number;
+	debug: boolean;
+}
+
+export function resolveOptions(options: LlmModuleOptions): ResolvedLlmOptions {
+	return {
+		manaLlmUrl: options.manaLlmUrl ?? 'http://localhost:3025',
+		defaultModel: options.defaultModel ?? 'ollama/gemma3:4b',
+		defaultVisionModel: options.defaultVisionModel ?? 'ollama/llava:7b',
+		timeout: options.timeout ?? 120_000,
+		maxRetries: options.maxRetries ?? 2,
+		debug: options.debug ?? false,
+	};
+}
--- a/packages/shared-llm/src/llm-client.service.ts
+++ b/packages/shared-llm/src/llm-client.service.ts
@ -0,0 +1,16 @@
+import { Inject, Injectable } from '@nestjs/common';
+import { LlmClient } from './llm-client';
+import { LLM_MODULE_OPTIONS } from './llm.constants';
+import type { LlmModuleOptions } from './interfaces/llm-options.interface';
+import { resolveOptions } from './interfaces/llm-options.interface';
+
+/**
+ * NestJS injectable wrapper around LlmClient.
+ * All logic lives in the framework-agnostic LlmClient base class.
+ */
+@Injectable()
+export class LlmClientService extends LlmClient {
+	constructor(@Inject(LLM_MODULE_OPTIONS) options: LlmModuleOptions) {
+		super(resolveOptions(options));
+	}
+}
--- a/packages/shared-llm/src/llm-client.ts
+++ b/packages/shared-llm/src/llm-client.ts
@ -0,0 +1,350 @@
+/**
+ * Framework-agnostic LLM client that communicates with the mana-llm service.
+ *
+ * This is the core implementation shared between the NestJS LlmClientService
+ * and the standalone LlmClient export (for non-NestJS consumers like bot-services).
+ */
+
+import type { ResolvedLlmOptions } from './interfaces/llm-options.interface';
+import type {
+	ChatMessage,
+	ChatOptions,
+	ChatResult,
+	JsonOptions,
+	JsonResult,
+	VisionOptions,
+	TokenUsage,
+	ModelInfo,
+	HealthStatus,
+} from './types/chat.types';
+import type {
+	ChatCompletionRequest,
+	ChatCompletionResponse,
+	EmbeddingResponse,
+} from './types/openai-compat.types';
+import { extractJson } from './utils/json-extractor';
+import { retryFetch } from './utils/retry';
+
+function createTimeoutSignal(ms: number): any {
+	const controller = new AbortController();
+	setTimeout(() => controller.abort(), ms);
+	return controller.signal;
+}
+
+export class LlmClient {
+	private readonly baseUrl: string;
+	private readonly options: ResolvedLlmOptions;
+
+	constructor(options: ResolvedLlmOptions) {
+		this.options = options;
+		this.baseUrl = options.manaLlmUrl.replace(/\/+$/, '');
+	}
+
+	// ---------------------------------------------------------------------------
+	// Text Chat
+	// ---------------------------------------------------------------------------
+
+	/** Simple chat with a single prompt string. */
+	async chat(prompt: string, opts?: ChatOptions): Promise<ChatResult> {
+		const messages = this.buildMessages(prompt, opts?.systemPrompt);
+		return this.chatMessages(messages, opts);
+	}
+
+	/** Chat with full message history. */
+	async chatMessages(messages: ChatMessage[], opts?: ChatOptions): Promise<ChatResult> {
+		const body = this.buildRequest(messages, opts, false);
+		const start = Date.now();
+		const response = await this.fetchCompletion(body, opts?.timeout);
+		const latencyMs = Date.now() - start;
+
+		return {
+			content: response.choices[0]?.message?.content ?? '',
+			model: response.model,
+			usage: response.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
+			latencyMs,
+		};
+	}
+
+	// ---------------------------------------------------------------------------
+	// Streaming
+	// ---------------------------------------------------------------------------
+
+	/** Streaming chat - returns an async iterable of text tokens. */
+	async *chatStream(prompt: string, opts?: ChatOptions): AsyncIterable<string> {
+		const messages = this.buildMessages(prompt, opts?.systemPrompt);
+		yield* this.chatStreamMessages(messages, opts);
+	}
+
+	/** Streaming chat with full message history. */
+	async *chatStreamMessages(messages: ChatMessage[], opts?: ChatOptions): AsyncIterable<string> {
+		const body = this.buildRequest(messages, opts, true);
+		const timeout = opts?.timeout ?? this.options.timeout;
+
+		const response = await retryFetch(
+			`${this.baseUrl}/v1/chat/completions`,
+			{
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify(body),
+				signal: createTimeoutSignal(timeout),
+			},
+			{ maxRetries: this.options.maxRetries }
+		);
+
+		if (!response.ok) {
+			const text = await response.text().catch(() => '');
+			throw new Error(`mana-llm stream error ${response.status}: ${text}`);
+		}
+
+		if (!response.body) {
+			throw new Error('mana-llm returned no response body for stream');
+		}
+
+		const reader = response.body.getReader();
+		const decoder = new TextDecoder();
+		let buffer = '';
+
+		try {
+			while (true) {
+				const { done, value } = await reader.read();
+				if (done) break;
+
+				buffer += decoder.decode(value, { stream: true });
+				const lines = buffer.split('\n');
+				buffer = lines.pop() ?? '';
+
+				for (const line of lines) {
+					const trimmed = line.trim();
+					if (!trimmed || !trimmed.startsWith('data: ')) continue;
+
+					const data = trimmed.slice(6);
+					if (data === '[DONE]') return;
+
+					try {
+						const chunk = JSON.parse(data);
+						const content = chunk.choices?.[0]?.delta?.content;
+						if (content) yield content;
+					} catch {
+						// Skip unparseable chunks
+					}
+				}
+			}
+		} finally {
+			reader.releaseLock();
+		}
+	}
+
+	// ---------------------------------------------------------------------------
+	// Structured JSON Output
+	// ---------------------------------------------------------------------------
+
+	/** Chat that extracts and parses JSON from the response. */
+	async json<T = unknown>(prompt: string, opts?: JsonOptions<T>): Promise<JsonResult<T>> {
+		const messages = this.buildMessages(prompt, opts?.systemPrompt);
+		return this.jsonMessages<T>(messages, opts);
+	}
+
+	/** JSON extraction from full message history. */
+	async jsonMessages<T = unknown>(
+		messages: ChatMessage[],
+		opts?: JsonOptions<T>
+	): Promise<JsonResult<T>> {
+		const maxAttempts = (opts?.jsonRetries ?? 1) + 1;
+		let lastError: Error | undefined;
+
+		for (let attempt = 0; attempt < maxAttempts; attempt++) {
+			const result = await this.chatMessages(messages, opts);
+
+			try {
+				const data = extractJson<T>(result.content, opts?.validate);
+				return { ...result, data };
+			} catch (error) {
+				lastError = error instanceof Error ? error : new Error(String(error));
+				if (this.options.debug) {
+					console.warn(
+						`[shared-llm] JSON extraction attempt ${attempt + 1}/${maxAttempts} failed:`,
+						lastError.message
+					);
+				}
+			}
+		}
+
+		throw lastError ?? new Error('JSON extraction failed');
+	}
+
+	// ---------------------------------------------------------------------------
+	// Vision
+	// ---------------------------------------------------------------------------
+
+	/** Analyze an image with a text prompt. */
+	async vision(
+		prompt: string,
+		imageBase64: string,
+		mimeType?: string,
+		opts?: VisionOptions
+	): Promise<ChatResult> {
+		const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
+		const model = opts?.visionModel ?? this.options.defaultVisionModel;
+		return this.chatMessages(messages, { ...opts, model });
+	}
+
+	/** Vision + JSON extraction. */
+	async visionJson<T = unknown>(
+		prompt: string,
+		imageBase64: string,
+		mimeType?: string,
+		opts?: VisionOptions & JsonOptions<T>
+	): Promise<JsonResult<T>> {
+		const messages = this.buildVisionMessages(prompt, imageBase64, mimeType, opts?.systemPrompt);
+		const model = opts?.visionModel ?? this.options.defaultVisionModel;
+		return this.jsonMessages<T>(messages, { ...opts, model });
+	}
+
+	// ---------------------------------------------------------------------------
+	// Embeddings
+	// ---------------------------------------------------------------------------
+
+	/** Generate embeddings for text input. */
+	async embed(
+		input: string | string[],
+		model?: string
+	): Promise<{ embeddings: number[][]; usage: TokenUsage }> {
+		const response = await retryFetch(
+			`${this.baseUrl}/v1/embeddings`,
+			{
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify({
+					model: model ?? this.options.defaultModel,
+					input,
+				}),
+				signal: createTimeoutSignal(this.options.timeout),
+			},
+			{ maxRetries: this.options.maxRetries }
+		);
+
+		if (!response.ok) {
+			const text = await response.text().catch(() => '');
+			throw new Error(`mana-llm embeddings error ${response.status}: ${text}`);
+		}
+
+		const data = (await response.json()) as EmbeddingResponse;
+		return {
+			embeddings: data.data.map((d) => d.embedding),
+			usage: data.usage,
+		};
+	}
+
+	// ---------------------------------------------------------------------------
+	// Health & Models
+	// ---------------------------------------------------------------------------
+
+	/** Check mana-llm health and provider status. */
+	async health(): Promise<HealthStatus> {
+		try {
+			const response = await fetch(`${this.baseUrl}/health`, {
+				signal: createTimeoutSignal(5_000),
+			});
+			if (!response.ok) {
+				return { status: 'unhealthy', providers: {} };
+			}
+			return (await response.json()) as HealthStatus;
+		} catch {
+			return { status: 'unhealthy', providers: {} };
+		}
+	}
+
+	/** List available models from all providers. */
+	async listModels(): Promise<ModelInfo[]> {
+		const response = await fetch(`${this.baseUrl}/v1/models`, {
+			signal: createTimeoutSignal(10_000),
+		});
+
+		if (!response.ok) {
+			throw new Error(`mana-llm models error ${response.status}`);
+		}
+
+		const data = (await response.json()) as { data: ModelInfo[] };
+		return data.data ?? [];
+	}
+
+	// ---------------------------------------------------------------------------
+	// Private helpers
+	// ---------------------------------------------------------------------------
+
+	private buildMessages(prompt: string, systemPrompt?: string): ChatMessage[] {
+		const messages: ChatMessage[] = [];
+		if (systemPrompt) {
+			messages.push({ role: 'system', content: systemPrompt });
+		}
+		messages.push({ role: 'user', content: prompt });
+		return messages;
+	}
+
+	private buildVisionMessages(
+		prompt: string,
+		imageBase64: string,
+		mimeType?: string,
+		systemPrompt?: string
+	): ChatMessage[] {
+		const mime = mimeType ?? 'image/jpeg';
+		const dataUrl = imageBase64.startsWith('data:')
+			? imageBase64
+			: `data:${mime};base64,${imageBase64}`;
+
+		const messages: ChatMessage[] = [];
+		if (systemPrompt) {
+			messages.push({ role: 'system', content: systemPrompt });
+		}
+		messages.push({
+			role: 'user',
+			content: [
+				{ type: 'text', text: prompt },
+				{ type: 'image_url', image_url: { url: dataUrl } },
+			],
+		});
+		return messages;
+	}
+
+	private buildRequest(
+		messages: ChatMessage[],
+		opts: ChatOptions | undefined,
+		stream: boolean
+	): ChatCompletionRequest {
+		const request: ChatCompletionRequest = {
+			model: opts?.model ?? this.options.defaultModel,
+			messages,
+			stream,
+		};
+
+		if (opts?.temperature !== undefined) request.temperature = opts.temperature;
+		if (opts?.maxTokens !== undefined) request.max_tokens = opts.maxTokens;
+
+		return request;
+	}
+
+	private async fetchCompletion(
+		body: ChatCompletionRequest,
+		timeoutOverride?: number
+	): Promise<ChatCompletionResponse> {
+		const timeout = timeoutOverride ?? this.options.timeout;
+
+		const response = await retryFetch(
+			`${this.baseUrl}/v1/chat/completions`,
+			{
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify(body),
+				signal: createTimeoutSignal(timeout),
+			},
+			{ maxRetries: this.options.maxRetries }
+		);
+
+		if (!response.ok) {
+			const text = await response.text().catch(() => '');
+			throw new Error(`mana-llm error ${response.status}: ${text}`);
+		}
+
+		return (await response.json()) as ChatCompletionResponse;
+	}
+}
--- a/packages/shared-llm/src/llm.constants.ts
+++ b/packages/shared-llm/src/llm.constants.ts
@ -0,0 +1 @@
+export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';
--- a/packages/shared-llm/src/llm.module.ts
+++ b/packages/shared-llm/src/llm.module.ts
@ -0,0 +1,80 @@
+import { DynamicModule, Module, Global, Provider } from '@nestjs/common';
+import type {
+	LlmModuleOptions,
+	LlmModuleAsyncOptions,
+	LlmOptionsFactory,
+} from './interfaces/llm-options.interface';
+import { LlmClientService } from './llm-client.service';
+import { LLM_MODULE_OPTIONS } from './llm.constants';
+
+@Global()
+@Module({})
+export class LlmModule {
+	static forRoot(options: LlmModuleOptions): DynamicModule {
+		return {
+			module: LlmModule,
+			providers: [
+				{
+					provide: LLM_MODULE_OPTIONS,
+					useValue: options,
+				},
+				LlmClientService,
+			],
+			exports: [LLM_MODULE_OPTIONS, LlmClientService],
+		};
+	}
+
+	static forRootAsync(options: LlmModuleAsyncOptions): DynamicModule {
+		const asyncProviders = this.createAsyncProviders(options);
+
+		return {
+			module: LlmModule,
+			imports: options.imports || [],
+			providers: [...asyncProviders, LlmClientService],
+			exports: [LLM_MODULE_OPTIONS, LlmClientService],
+		};
+	}
+
+	private static createAsyncProviders(options: LlmModuleAsyncOptions): Provider[] {
+		if (options.useFactory) {
+			return [
+				{
+					provide: LLM_MODULE_OPTIONS,
+					useFactory: options.useFactory,
+					inject: options.inject || [],
+				},
+			];
+		}
+
+		const useClass = options.useClass;
+		const useExisting = options.useExisting;
+
+		if (useClass) {
+			return [
+				{
+					provide: LLM_MODULE_OPTIONS,
+					useFactory: async (optionsFactory: LlmOptionsFactory) =>
+						await optionsFactory.createLlmOptions(),
+					inject: [useClass],
+				},
+				{
+					provide: useClass,
+					useClass,
+				},
+			];
+		}
+
+		if (useExisting) {
+			return [
+				{
+					provide: LLM_MODULE_OPTIONS,
+					useFactory: async (optionsFactory: LlmOptionsFactory) =>
+						await optionsFactory.createLlmOptions(),
+					inject: [useExisting],
+				},
+			];
+		}
+
+		return [];
+	}
+}
--- a/packages/shared-llm/src/standalone.ts
+++ b/packages/shared-llm/src/standalone.ts
@ -0,0 +1,30 @@
+/**
+ * Standalone exports for non-NestJS consumers (e.g. bot-services).
+ *
+ * Usage:
+ *   import { LlmClient } from '@manacore/shared-llm/standalone';
+ *   const llm = new LlmClient({ manaLlmUrl: 'http://localhost:3025' });
+ */
+
+export { LlmClient } from './llm-client';
+export { resolveOptions } from './interfaces/llm-options.interface';
+export type { LlmModuleOptions, ResolvedLlmOptions } from './interfaces/llm-options.interface';
+
+// Types
+export type {
+	ChatMessage,
+	ContentPart,
+	TextContentPart,
+	ImageContentPart,
+	ChatOptions,
+	JsonOptions,
+	VisionOptions,
+	TokenUsage,
+	ChatResult,
+	JsonResult,
+	ModelInfo,
+	HealthStatus,
+} from './types';
+
+// Utilities
+export { extractJson } from './utils';
--- a/packages/shared-llm/src/types/chat.types.ts
+++ b/packages/shared-llm/src/types/chat.types.ts
@ -0,0 +1,100 @@
+/**
+ * Core chat types for the LLM client.
+ * These are the high-level types that consumers interact with.
+ */
+
+// ---------------------------------------------------------------------------
+// Messages
+// ---------------------------------------------------------------------------
+
+export interface TextContentPart {
+	type: 'text';
+	text: string;
+}
+
+export interface ImageContentPart {
+	type: 'image_url';
+	image_url: { url: string };
+}
+
+export type ContentPart = TextContentPart | ImageContentPart;
+
+export interface ChatMessage {
+	role: 'system' | 'user' | 'assistant';
+	content: string | ContentPart[];
+}
+
+// ---------------------------------------------------------------------------
+// Options
+// ---------------------------------------------------------------------------
+
+export interface ChatOptions {
+	/** Model to use (default from module config, e.g. "ollama/gemma3:4b") */
+	model?: string;
+	/** Sampling temperature 0.0-2.0 */
+	temperature?: number;
+	/** Max tokens to generate */
+	maxTokens?: number;
+	/** System prompt prepended to messages */
+	systemPrompt?: string;
+	/** Request timeout in ms (overrides module default) */
+	timeout?: number;
+}
+
+export interface JsonOptions<T = unknown> extends ChatOptions {
+	/** Validation function applied to parsed JSON. Should throw on invalid data. */
+	validate?: (data: unknown) => T;
+	/** Number of extraction retries on parse failure (default: 1) */
+	jsonRetries?: number;
+}
+
+export interface VisionOptions extends ChatOptions {
+	/** Vision model override (default from module config, e.g. "ollama/llava:7b") */
+	visionModel?: string;
+}
+
+// ---------------------------------------------------------------------------
+// Results
+// ---------------------------------------------------------------------------
+
+export interface TokenUsage {
+	prompt_tokens: number;
+	completion_tokens: number;
+	total_tokens: number;
+}
+
+export interface ChatResult {
+	/** Generated text content */
+	content: string;
+	/** Model that was actually used */
+	model: string;
+	/** Token usage statistics */
+	usage: TokenUsage;
+	/** Request latency in milliseconds */
+	latencyMs: number;
+}
+
+export interface JsonResult<T = unknown> extends ChatResult {
+	/** Parsed and optionally validated data */
+	data: T;
+}
+
+// ---------------------------------------------------------------------------
+// Models
+// ---------------------------------------------------------------------------
+
+export interface ModelInfo {
+	id: string;
+	object: 'model';
+	created: number;
+	owned_by: string;
+}
+
+// ---------------------------------------------------------------------------
+// Health
+// ---------------------------------------------------------------------------
+
+export interface HealthStatus {
+	status: 'healthy' | 'degraded' | 'unhealthy';
+	providers: Record<string, unknown>;
+}
--- a/packages/shared-llm/src/types/index.ts
+++ b/packages/shared-llm/src/types/index.ts
@ -0,0 +1,26 @@
+export type {
+	ChatMessage,
+	ContentPart,
+	TextContentPart,
+	ImageContentPart,
+	ChatOptions,
+	JsonOptions,
+	VisionOptions,
+	TokenUsage,
+	ChatResult,
+	JsonResult,
+	ModelInfo,
+	HealthStatus,
+} from './chat.types';
+
+export type {
+	ChatCompletionRequest,
+	ChatCompletionResponse,
+	ChatCompletionChoice,
+	ChatCompletionStreamChunk,
+	StreamChoice,
+	EmbeddingRequest,
+	EmbeddingResponse,
+	EmbeddingData,
+	ModelsListResponse,
+} from './openai-compat.types';
--- a/packages/shared-llm/src/types/openai-compat.types.ts
+++ b/packages/shared-llm/src/types/openai-compat.types.ts
@ -0,0 +1,97 @@
+/**
+ * OpenAI-compatible wire format types matching the mana-llm API contract.
+ * These are internal types used for HTTP communication - consumers should
+ * use the high-level types from chat.types.ts instead.
+ */
+
+import type { ChatMessage, TokenUsage } from './chat.types';
+
+// ---------------------------------------------------------------------------
+// Request (POST /v1/chat/completions)
+// ---------------------------------------------------------------------------
+
+export interface ChatCompletionRequest {
+	model: string;
+	messages: ChatMessage[];
+	stream?: boolean;
+	temperature?: number;
+	max_tokens?: number;
+	top_p?: number;
+	frequency_penalty?: number;
+	presence_penalty?: number;
+	stop?: string | string[];
+}
+
+// ---------------------------------------------------------------------------
+// Response (non-streaming)
+// ---------------------------------------------------------------------------
+
+export interface ChatCompletionResponse {
+	id: string;
+	object: 'chat.completion';
+	created: number;
+	model: string;
+	choices: ChatCompletionChoice[];
+	usage: TokenUsage;
+}
+
+export interface ChatCompletionChoice {
+	index: number;
+	message: { role: 'assistant'; content: string };
+	finish_reason: 'stop' | 'length' | 'content_filter' | null;
+}
+
+// ---------------------------------------------------------------------------
+// Response (streaming)
+// ---------------------------------------------------------------------------
+
+export interface ChatCompletionStreamChunk {
+	id: string;
+	object: 'chat.completion.chunk';
+	created: number;
+	model: string;
+	choices: StreamChoice[];
+}
+
+export interface StreamChoice {
+	index: number;
+	delta: { role?: 'assistant'; content?: string };
+	finish_reason: string | null;
+}
+
+// ---------------------------------------------------------------------------
+// Embeddings
+// ---------------------------------------------------------------------------
+
+export interface EmbeddingRequest {
+	model: string;
+	input: string | string[];
+	encoding_format?: 'float' | 'base64';
+}
+
+export interface EmbeddingResponse {
+	object: 'list';
+	data: EmbeddingData[];
+	model: string;
+	usage: TokenUsage;
+}
+
+export interface EmbeddingData {
+	object: 'embedding';
+	index: number;
+	embedding: number[];
+}
+
+// ---------------------------------------------------------------------------
+// Models (GET /v1/models)
+// ---------------------------------------------------------------------------
+
+export interface ModelsListResponse {
+	object: 'list';
+	data: Array<{
+		id: string;
+		object: 'model';
+		created: number;
+		owned_by: string;
+	}>;
+}
--- a/packages/shared-llm/src/utils/index.ts
+++ b/packages/shared-llm/src/utils/index.ts
@ -0,0 +1,3 @@
+export { extractJson } from './json-extractor';
+export { retryFetch } from './retry';
+export type { RetryOptions } from './retry';
--- a/packages/shared-llm/src/utils/json-extractor.ts
+++ b/packages/shared-llm/src/utils/json-extractor.ts
@ -0,0 +1,94 @@
+/**
+ * Extract and parse JSON from LLM responses.
+ *
+ * LLMs often wrap JSON in markdown code fences or include extra text.
+ * This utility handles all common patterns:
+ * 1. Direct JSON parse
+ * 2. Markdown ```json ... ``` fences
+ * 3. First { ... } or [ ... ] block in text
+ */
+export function extractJson<T = unknown>(text: string, validate?: (data: unknown) => T): T {
+	const trimmed = text.trim();
+
+	// Step 1: Try direct parse
+	const direct = tryParse<T>(trimmed, validate);
+	if (direct !== undefined) return direct;
+
+	// Step 2: Strip markdown code fences
+	const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
+	if (fenceMatch) {
+		const fenced = tryParse<T>(fenceMatch[1].trim(), validate);
+		if (fenced !== undefined) return fenced;
+	}
+
+	// Step 3: Find first JSON object
+	const objectStart = trimmed.indexOf('{');
+	if (objectStart !== -1) {
+		const objectStr = extractBalanced(trimmed, objectStart, '{', '}');
+		if (objectStr) {
+			const obj = tryParse<T>(objectStr, validate);
+			if (obj !== undefined) return obj;
+		}
+	}
+
+	// Step 4: Find first JSON array
+	const arrayStart = trimmed.indexOf('[');
+	if (arrayStart !== -1) {
+		const arrayStr = extractBalanced(trimmed, arrayStart, '[', ']');
+		if (arrayStr) {
+			const arr = tryParse<T>(arrayStr, validate);
+			if (arr !== undefined) return arr;
+		}
+	}
+
+	throw new Error(`Failed to extract JSON from LLM response: ${trimmed.slice(0, 200)}...`);
+}
+
+function tryParse<T>(text: string, validate?: (data: unknown) => T): T | undefined {
+	try {
+		const parsed = JSON.parse(text);
+		return validate ? validate(parsed) : parsed;
+	} catch {
+		return undefined;
+	}
+}
+
+/**
+ * Extract a balanced block starting from the given position.
+ * Handles nested braces/brackets but not strings with escaped delimiters.
+ */
+function extractBalanced(text: string, start: number, open: string, close: string): string | null {
+	let depth = 0;
+	let inString = false;
+	let escape = false;
+
+	for (let i = start; i < text.length; i++) {
+		const ch = text[i];
+
+		if (escape) {
+			escape = false;
+			continue;
+		}
+
+		if (ch === '\\') {
+			escape = true;
+			continue;
+		}
+
+		if (ch === '"') {
+			inString = !inString;
+			continue;
+		}
+
+		if (inString) continue;
+
+		if (ch === open) depth++;
+		if (ch === close) depth--;
+
+		if (depth === 0) {
+			return text.slice(start, i + 1);
+		}
+	}
+
+	return null;
+}
--- a/packages/shared-llm/src/utils/retry.ts
+++ b/packages/shared-llm/src/utils/retry.ts
@ -0,0 +1,51 @@
+/**
+ * Fetch wrapper with exponential backoff retry for transient failures.
+ *
+ * Retries on: 429 (rate limit), 502, 503, 504 (server errors), network errors.
+ * Does NOT retry on: 400, 401, 403, 404 (client errors).
+ */
+
+const RETRYABLE_STATUS_CODES = new Set([429, 502, 503, 504]);
+
+export interface RetryOptions {
+	maxRetries: number;
+	/** Base delay in ms (doubles each retry). Default: 200 */
+	baseDelay?: number;
+}
+
+export async function retryFetch(
+	url: string,
+	init: RequestInit,
+	options: RetryOptions
+): Promise<Response> {
+	const { maxRetries, baseDelay = 200 } = options;
+	let lastError: Error | undefined;
+
+	for (let attempt = 0; attempt <= maxRetries; attempt++) {
+		try {
+			const response = await fetch(url, init);
+
+			if (response.ok || !RETRYABLE_STATUS_CODES.has(response.status)) {
+				return response;
+			}
+
+			// Retryable status code
+			lastError = new Error(`HTTP ${response.status}: ${response.statusText}`);
+		} catch (error) {
+			// Network error (connection refused, timeout, etc.)
+			lastError = error instanceof Error ? error : new Error(String(error));
+		}
+
+		// Don't sleep after the last attempt
+		if (attempt < maxRetries) {
+			const delay = baseDelay * Math.pow(2, attempt);
+			await sleep(delay);
+		}
+	}
+
+	throw lastError ?? new Error('retryFetch exhausted all retries');
+}
+
+function sleep(ms: number): Promise<void> {
+	return new Promise((resolve) => setTimeout(resolve, ms));
+}
--- a/packages/shared-llm/tsconfig.json
+++ b/packages/shared-llm/tsconfig.json
@ -0,0 +1,21 @@
+{
+	"compilerOptions": {
+		"target": "ES2021",
+		"module": "commonjs",
+		"lib": ["ES2021"],
+		"declaration": true,
+		"declarationMap": true,
+		"sourceMap": true,
+		"outDir": "./dist",
+		"rootDir": "./src",
+		"strict": true,
+		"esModuleInterop": true,
+		"skipLibCheck": true,
+		"forceConsistentCasingInFileNames": true,
+		"moduleResolution": "node",
+		"experimentalDecorators": true,
+		"emitDecoratorMetadata": true
+	},
+	"include": ["src/**/*"],
+	"exclude": ["node_modules", "dist"]
+}
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/services/mana-core-auth/package.json
+++ b/services/mana-core-auth/package.json
@ -26,6 +26,7 @@
 	},
 	"dependencies": {
 		"@google/generative-ai": "^0.24.1",
+		"@manacore/shared-llm": "workspace:^",
 		"@manacore/shared-storage": "workspace:*",
 		"@nestjs/axios": "^4.0.1",
 		"@nestjs/common": "^10.4.15",
--- a/services/mana-core-auth/src/ai/ai.module.ts
+++ b/services/mana-core-auth/src/ai/ai.module.ts
@ -1,10 +1,8 @@
 import { Module, Global } from '@nestjs/common';
-import { ConfigModule } from '@nestjs/config';
 import { AiService } from './ai.service';

@Global()
@Module({
-	imports: [ConfigModule],
 	providers: [AiService],
 	exports: [AiService],
 })
--- a/services/mana-core-auth/src/ai/ai.service.ts
+++ b/services/mana-core-auth/src/ai/ai.service.ts
@ -1,32 +1,20 @@
 import { Injectable, Logger } from '@nestjs/common';
-import { ConfigService } from '@nestjs/config';
+import { LlmClientService } from '@manacore/shared-llm';

 export interface FeedbackAnalysis {
 	title: string;
 	category: 'bug' | 'feature' | 'improvement' | 'question' | 'other';
 }

+const VALID_CATEGORIES = ['bug', 'feature', 'improvement', 'question', 'other'] as const;
+
@Injectable()
 export class AiService {
 	private readonly logger = new Logger(AiService.name);
-	private readonly manaLlmUrl: string | null = null;

-	constructor(private configService: ConfigService) {
-		const url = this.configService.get<string>('MANA_LLM_URL');
-		if (url) {
-			this.manaLlmUrl = url;
-			this.logger.log(`AI service using mana-llm at ${url}`);
-		} else {
-			this.logger.warn('MANA_LLM_URL not configured - AI features disabled');
-		}
-	}
+	constructor(private readonly llm: LlmClientService) {}

 	async analyzeFeedback(feedbackText: string): Promise<FeedbackAnalysis> {
-		// Fallback if AI not available
-		if (!this.manaLlmUrl) {
-			return this.fallbackAnalysis(feedbackText);
-		}
-
 		try {
 			const prompt = `Analysiere dieses User-Feedback und generiere:
 1. Einen kurzen, prägnanten deutschen Titel (max 60 Zeichen) der den Kern des Feedbacks zusammenfasst
@ -37,48 +25,24 @@ Feedback: "${feedbackText}"
 Antworte NUR mit validem JSON in diesem Format (keine Markdown-Codeblocks, kein anderer Text):
 {"title": "...", "category": "..."}`;

-			const result = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify({
-					model: 'ollama/gemma3:4b',
-					messages: [{ role: 'user', content: prompt }],
-					temperature: 0.3,
-				}),
-				signal: AbortSignal.timeout(30000),
+			const { data } = await this.llm.json<FeedbackAnalysis>(prompt, {
+				temperature: 0.3,
+				timeout: 30_000,
+				validate: (raw) => {
+					const obj = raw as FeedbackAnalysis;
+					if (!obj.title || !obj.category) throw new Error('missing fields');
+					if (!VALID_CATEGORIES.includes(obj.category as any)) {
+						obj.category = 'other';
+					}
+					if (obj.title.length > 60) {
+						obj.title = obj.title.substring(0, 57) + '...';
+					}
+					return obj;
+				},
 			});

-			if (!result.ok) {
-				throw new Error(`mana-llm error: ${result.status}`);
-			}
-
-			const data = await result.json();
-			const response = (data.choices?.[0]?.message?.content || '').trim();
-
-			// Parse JSON response - handle potential markdown code blocks
-			let jsonStr = response;
-			if (response.includes('```')) {
-				const match = response.match(/```(?:json)?\s*([\s\S]*?)```/);
-				if (match) {
-					jsonStr = match[1].trim();
-				}
-			}
-
-			const parsed = JSON.parse(jsonStr) as FeedbackAnalysis;
-
-			// Validate category
-			const validCategories = ['bug', 'feature', 'improvement', 'question', 'other'];
-			if (!validCategories.includes(parsed.category)) {
-				parsed.category = 'other';
-			}
-
-			// Ensure title is not too long
-			if (parsed.title.length > 60) {
-				parsed.title = parsed.title.substring(0, 57) + '...';
-			}
-
-			this.logger.debug(`AI analyzed feedback: ${JSON.stringify(parsed)}`);
-			return parsed;
+			this.logger.debug(`AI analyzed feedback: ${JSON.stringify(data)}`);
+			return data;
 		} catch (error) {
 			this.logger.error(`AI analysis failed: ${error}`);
 			return this.fallbackAnalysis(feedbackText);
--- a/services/mana-core-auth/src/app.module.ts
+++ b/services/mana-core-auth/src/app.module.ts
@ -1,7 +1,8 @@
 import { Module } from '@nestjs/common';
-import { ConfigModule } from '@nestjs/config';
+import { ConfigModule, ConfigService } from '@nestjs/config';
 import { ThrottlerModule } from '@nestjs/throttler';
 import { APP_FILTER } from '@nestjs/core';
+import { LlmModule } from '@manacore/shared-llm';
 import configuration from './config/configuration';
 import { AdminModule } from './admin/admin.module';
 import { AiModule } from './ai/ai.module';
@ -35,6 +36,14 @@ import { SecurityModule } from './security';
 				limit: 100, // 100 requests per minute
 			},
 		]),
+		LlmModule.forRootAsync({
+			imports: [ConfigModule],
+			useFactory: (config: ConfigService) => ({
+				manaLlmUrl: config.get('MANA_LLM_URL'),
+				debug: config.get('NODE_ENV') === 'development',
+			}),
+			inject: [ConfigService],
+		}),
 		LoggerModule,
 		SecurityModule,
 		MetricsModule,
--- a/services/matrix-ollama-bot/package.json
+++ b/services/matrix-ollama-bot/package.json
@ -29,6 +29,7 @@
 	"dependencies": {
 		"@manacore/bot-services": "workspace:*",
 		"@manacore/matrix-bot-common": "workspace:*",
+		"@manacore/shared-llm": "workspace:^",
 		"@nestjs/common": "^10.4.15",
 		"@nestjs/config": "^3.3.0",
 		"@nestjs/core": "^10.4.15",
--- a/services/matrix-ollama-bot/src/app.module.ts
+++ b/services/matrix-ollama-bot/src/app.module.ts
@ -1,5 +1,6 @@
 import { Module } from '@nestjs/common';
-import { ConfigModule } from '@nestjs/config';
+import { ConfigModule, ConfigService } from '@nestjs/config';
+import { LlmModule } from '@manacore/shared-llm';
 import { HealthController, createHealthProvider } from '@manacore/matrix-bot-common';
 import { BotModule } from './bot/bot.module';
 import configuration from './config/configuration';
@ -10,6 +11,15 @@ import configuration from './config/configuration';
 			isGlobal: true,
 			load: [configuration],
 		}),
+		LlmModule.forRootAsync({
+			imports: [ConfigModule],
+			useFactory: (config: ConfigService) => ({
+				manaLlmUrl: config.get('llm.url') || 'http://localhost:3025',
+				defaultModel: config.get('llm.model') || 'ollama/gemma3:4b',
+				timeout: config.get<number>('llm.timeout') || 120000,
+			}),
+			inject: [ConfigService],
+		}),
 		BotModule,
 	],
 	controllers: [HealthController],
--- a/services/matrix-ollama-bot/src/ollama/ollama.service.ts
+++ b/services/matrix-ollama-bot/src/ollama/ollama.service.ts
@ -1,49 +1,17 @@
 import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
+import { LlmClientService } from '@manacore/shared-llm';
 import { ConfigService } from '@nestjs/config';

-interface LlmModel {
-	id: string;
-	name: string;
-	size: number;
-	owned_by: string;
-}
-
-interface ChatMessage {
-	role: 'user' | 'assistant' | 'system';
-	content: string | ContentPart[];
-}
-
-interface ContentPart {
-	type: 'text' | 'image_url';
-	text?: string;
-	image_url?: { url: string };
-}
-
-interface ChatCompletionResponse {
-	id: string;
-	model: string;
-	choices: {
-		message: { role: string; content: string };
-		finish_reason: string;
-	}[];
-	usage: {
-		prompt_tokens: number;
-		completion_tokens: number;
-		total_tokens: number;
-	};
-}
-
@Injectable()
 export class OllamaService implements OnModuleInit {
 	private readonly logger = new Logger(OllamaService.name);
-	private readonly baseUrl: string;
 	private readonly defaultModel: string;
-	private readonly timeout: number;

-	constructor(private configService: ConfigService) {
-		this.baseUrl = this.configService.get<string>('llm.url') || 'http://localhost:3025';
+	constructor(
+		private readonly llm: LlmClientService,
+		private configService: ConfigService
+	) {
 		this.defaultModel = this.configService.get<string>('llm.model') || 'ollama/gemma3:4b';
-		this.timeout = this.configService.get<number>('llm.timeout') || 120000;
 	}

 	async onModuleInit() {
@ -52,27 +20,23 @@ export class OllamaService implements OnModuleInit {

 	async checkConnection(): Promise<boolean> {
 		try {
-			const response = await fetch(`${this.baseUrl}/health`, {
-				signal: AbortSignal.timeout(5000),
-			});
-			const data = await response.json();
-			this.logger.log(`mana-llm connected: ${data.status}, providers: ${Object.keys(data.providers || {}).join(', ')}`);
-			return data.status === 'healthy' || data.status === 'degraded';
+			const health = await this.llm.health();
+			this.logger.log(
+				`mana-llm connected: ${health.status}, providers: ${Object.keys(health.providers || {}).join(', ')}`
+			);
+			return health.status === 'healthy' || health.status === 'degraded';
 		} catch (error) {
-			this.logger.error(`Failed to connect to mana-llm at ${this.baseUrl}:`, error);
+			this.logger.error('Failed to connect to mana-llm:', error);
 			return false;
 		}
 	}

 	async listModels(): Promise<{ name: string; size: number; modified_at: string }[]> {
 		try {
-			const response = await fetch(`${this.baseUrl}/v1/models`);
-			const data = await response.json();
-
-			// Convert OpenAI format to legacy Ollama format for compatibility
-			return (data.data || []).map((m: LlmModel) => ({
+			const models = await this.llm.listModels();
+			return models.map((m) => ({
 				name: m.id,
-				size: 0, // mana-llm doesn't provide size
+				size: 0,
 				modified_at: new Date().toISOString(),
 			}));
 		} catch (error) {
@ -87,39 +51,15 @@ export class OllamaService implements OnModuleInit {
 	): Promise<string> {
 		const selectedModel = model ? this.normalizeModel(model) : this.defaultModel;

-		try {
-			const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify({
-					model: selectedModel,
-					messages,
-					stream: false,
-				}),
-				signal: AbortSignal.timeout(this.timeout),
-			});
+		const result = await this.llm.chatMessages(messages, { model: selectedModel });

-			if (!response.ok) {
-				const errorText = await response.text();
-				throw new Error(`mana-llm API error: ${response.status} - ${errorText}`);
-			}
-
-			const data: ChatCompletionResponse = await response.json();
-
-			// Log performance metrics
-			if (data.usage) {
-				this.logger.debug(
-					`Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})`
-				);
-			}
-
-			return data.choices[0]?.message?.content || '';
-		} catch (error) {
-			if (error instanceof Error && error.name === 'TimeoutError') {
-				throw new Error('LLM Timeout - Antwort dauerte zu lange');
-			}
-			throw error;
+		if (result.usage.completion_tokens) {
+			this.logger.debug(
+				`Generated ${result.usage.completion_tokens} tokens (total: ${result.usage.total_tokens})`
+			);
 		}
+
+		return result.content;
 	}

 	getDefaultModel(): string {
@ -129,59 +69,19 @@ export class OllamaService implements OnModuleInit {
 	async chatWithImage(prompt: string, imageBase64: string, model?: string): Promise<string> {
 		const selectedModel = model ? this.normalizeModel(model) : this.defaultModel;

-		try {
-			// Use OpenAI vision format
-			const messages: ChatMessage[] = [
-				{
-					role: 'user',
-					content: [
-						{ type: 'text', text: prompt },
-						{
-							type: 'image_url',
-							image_url: { url: `data:image/png;base64,${imageBase64}` },
-						},
-					],
-				},
-			];
+		const result = await this.llm.vision(prompt, imageBase64, 'image/png', {
+			model: selectedModel,
+		});

-			const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
-				method: 'POST',
-				headers: { 'Content-Type': 'application/json' },
-				body: JSON.stringify({
-					model: selectedModel,
-					messages,
-					stream: false,
-				}),
-				signal: AbortSignal.timeout(this.timeout),
-			});
-
-			if (!response.ok) {
-				const errorText = await response.text();
-				throw new Error(`mana-llm API error: ${response.status} - ${errorText}`);
-			}
-
-			const data: ChatCompletionResponse = await response.json();
-
-			// Log performance metrics
-			if (data.usage) {
-				this.logger.debug(
-					`Vision: Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})`
-				);
-			}
-
-			return data.choices[0]?.message?.content || '';
-		} catch (error) {
-			if (error instanceof Error && error.name === 'TimeoutError') {
-				throw new Error('LLM Timeout - Bildanalyse dauerte zu lange');
-			}
-			throw error;
+		if (result.usage.completion_tokens) {
+			this.logger.debug(
+				`Vision: Generated ${result.usage.completion_tokens} tokens (total: ${result.usage.total_tokens})`
+			);
 		}
+
+		return result.content;
 	}

-	/**
-	 * Normalize model name to include provider prefix if missing.
-	 * e.g., "gemma3:4b" -> "ollama/gemma3:4b"
-	 */
 	private normalizeModel(model: string): string {
 		if (model.includes('/')) {
 			return model;
				`@ -0,0 +1 @@`
				`export const LLM_MODULE_OPTIONS = 'LLM_MODULE_OPTIONS';`