feat(chat-backend): integrate Ollama for local LLM inference

- Add OllamaService for local model inference via Ollama API - Update ChatService to route requests based on model provider - Support both 'ollama' (local) and 'openrouter' (cloud) providers - Add Gemma 3 4B as default model (free, runs on Mac Mini) - Add SQL migration script for existing databases - Update CLAUDE.md with Ollama configuration docs Environment variables: - OLLAMA_URL: Ollama server URL (default: http://localhost:11434) - OLLAMA_TIMEOUT: Request timeout in ms (default: 120000) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-15 01:21:09 +02:00 · 2026-01-26 16:03:03 +01:00 · 2026-01-26 16:03:03 +01:00 · 6f51f1a24c
commit 6f51f1a24c
parent 14aaf01fa3
7 changed files with 280 additions and 16 deletions
--- a/apps/chat/CLAUDE.md
+++ b/apps/chat/CLAUDE.md
@ -70,7 +70,7 @@ pnpm preview                     # Preview production build
 - **Mobile**: React Native 0.76.7 + Expo SDK 52, NativeWind, Expo Router
 - **Web**: SvelteKit 2.x, Svelte 5, Tailwind CSS 4
 - **Landing**: Astro 5.16, Tailwind CSS
- **Backend**: NestJS 10, OpenRouter AI, Drizzle ORM, PostgreSQL
+- **Backend**: NestJS 10, OpenRouter AI + Ollama (local), Drizzle ORM, PostgreSQL
 - **Auth**: Mana Core Auth (JWT)
 - **Types**: TypeScript 5.x

@ -94,9 +94,13 @@ pnpm preview                     # Preview production build
 #### Backend (.env)

 ```env
-# Required - All AI models via OpenRouter
+# Cloud AI models via OpenRouter (optional if using only local models)
 OPENROUTER_API_KEY=sk-or-v1-xxx    # Get at https://openrouter.ai/keys

+# Local AI via Ollama (optional, defaults to localhost:11434)
+OLLAMA_URL=http://localhost:11434  # Or http://host.docker.internal:11434 in Docker
+OLLAMA_TIMEOUT=120000              # Timeout in ms (default: 120s)
+
 # Database (uses shared Docker PostgreSQL)
 DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/chat

@ -129,13 +133,19 @@ PUBLIC_BACKEND_URL=http://localhost:3002
 - **Styling**: Tailwind CSS everywhere
 - **Formatting**: 100 char line limit, 2 space tabs, single quotes

-## AI Models Available (via OpenRouter)
+## AI Models Available

-All models are accessed through OpenRouter, providing access to 100+ models with a single API key.
+### Local Models (Ollama - Free)
+
+| Model ID | Name | Provider | Best For |
+| -------- | ---- | -------- | -------- |
+| ...440101 | Gemma 3 4B (Lokal) | ollama | Everyday tasks (default) - runs on Mac Mini |
+
+### Cloud Models (OpenRouter - Paid)

 | Model ID | Name | Price | Best For |
 | -------- | ---- | ----- | -------- |
-| ...440201 | Llama 3.1 8B | $0.05/M | Everyday tasks (default) |
+| ...440201 | Llama 3.1 8B | $0.05/M | Fast cloud alternative |
 | ...440202 | Llama 3.1 70B | $0.35/M | Complex reasoning |
 | ...440203 | DeepSeek V3 | $0.14/M | Reasoning at low cost |
 | ...440204 | Mistral Small | $0.10/M | General tasks |
--- a/apps/chat/apps/backend/package.json
+++ b/apps/chat/apps/backend/package.json
@ -16,6 +16,7 @@
 		"db:push": "drizzle-kit push",
 		"db:studio": "drizzle-kit studio",
 		"db:seed": "tsx src/db/seed.ts",
+		"db:add-ollama": "psql $DATABASE_URL -f src/db/migrations/add-ollama-model.sql",
 		"docker:build": "docker compose build",
 		"docker:up": "docker compose up -d",
 		"docker:down": "docker compose down",
--- a/apps/chat/apps/backend/src/chat/chat.module.ts
+++ b/apps/chat/apps/backend/src/chat/chat.module.ts
@ -1,10 +1,11 @@
 import { Module } from '@nestjs/common';
 import { ChatController } from './chat.controller';
 import { ChatService } from './chat.service';
+import { OllamaService } from './ollama.service';

@Module({
 	controllers: [ChatController],
-	providers: [ChatService],
-	exports: [ChatService],
+	providers: [ChatService, OllamaService],
+	exports: [ChatService, OllamaService],
 })
 export class ChatModule {}
--- a/apps/chat/apps/backend/src/chat/chat.service.ts
+++ b/apps/chat/apps/backend/src/chat/chat.service.ts
@ -9,18 +9,20 @@ import { models } from '../db/schema/models.schema';
 import type { Model } from '../db/schema/models.schema';
 import { ChatCompletionDto } from './dto/chat-completion.dto';
 import type { ChatCompletionResponseDto } from './dto/chat-completion.dto';
+import { OllamaService } from './ollama.service';

@Injectable()
 export class ChatService {
 	private readonly logger = new Logger(ChatService.name);
-	// OpenRouter config (primary provider)
+	// OpenRouter config (cloud provider)
 	private readonly openRouterClient: OpenAI | null = null;

 	constructor(
 		private configService: ConfigService,
-		@Inject(DATABASE_CONNECTION) private readonly db: Database
+		@Inject(DATABASE_CONNECTION) private readonly db: Database,
+		private readonly ollamaService: OllamaService
 	) {
-		// OpenRouter setup (primary and only provider)
+		// OpenRouter setup (cloud provider)
 		const openRouterApiKey = this.configService.get<string>('OPENROUTER_API_KEY');
 		if (openRouterApiKey) {
 			this.openRouterClient = new OpenAI({
@ -33,7 +35,7 @@ export class ChatService {
 			});
 			this.logger.log('OpenRouter client initialized');
 		} else {
-			this.logger.error('OPENROUTER_API_KEY is not set - Chat will not work!');
+			this.logger.warn('OPENROUTER_API_KEY not set - only local Ollama models will work');
 		}
 	}

@ -69,11 +71,46 @@ export class ChatService {

 		// Log user context for tracking (optional)
 		if (userId) {
-			this.logger.log(`User ${userId} creating chat completion with model ${dto.modelId}`);
+			this.logger.log(
+				`User ${userId} creating chat completion with model ${dto.modelId} (${model.provider})`
+			);
 		}

-		// All models go through OpenRouter
-		return this.createOpenRouterCompletion(model, dto);
+		// Route to appropriate provider based on model configuration
+		switch (model.provider) {
+			case 'ollama':
+				return this.createOllamaCompletion(model, dto);
+			case 'openrouter':
+			default:
+				return this.createOpenRouterCompletion(model, dto);
+		}
+	}
+
+	private async createOllamaCompletion(
+		model: Model,
+		dto: ChatCompletionDto
+	): AsyncResult<ChatCompletionResponseDto> {
+		const params = model.parameters as {
+			model?: string;
+			temperature?: number;
+			max_tokens?: number;
+		} | null;
+
+		const modelName = params?.model || 'gemma3:4b';
+		const temperature = dto.temperature ?? params?.temperature ?? 0.7;
+		const maxTokens = dto.maxTokens ?? params?.max_tokens ?? 4096;
+
+		this.logger.log(`Sending request to Ollama model: ${modelName}`);
+
+		return this.ollamaService.createChatCompletion(
+			modelName,
+			dto.messages.map((msg) => ({
+				role: msg.role as 'system' | 'user' | 'assistant',
+				content: msg.content,
+			})),
+			temperature,
+			maxTokens
+		);
 	}

 	private async createOpenRouterCompletion(
--- a/apps/chat/apps/backend/src/chat/ollama.service.ts
+++ b/apps/chat/apps/backend/src/chat/ollama.service.ts
@ -0,0 +1,169 @@
+import { Injectable, Logger } from '@nestjs/common';
+import { ConfigService } from '@nestjs/config';
+import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors';
+import type { ChatCompletionResponseDto } from './dto/chat-completion.dto';
+
+interface OllamaChatMessage {
+	role: 'system' | 'user' | 'assistant';
+	content: string;
+}
+
+interface OllamaChatResponse {
+	model: string;
+	message: {
+		role: string;
+		content: string;
+	};
+	done: boolean;
+	total_duration?: number;
+	eval_count?: number;
+	eval_duration?: number;
+	prompt_eval_count?: number;
+}
+
+@Injectable()
+export class OllamaService {
+	private readonly logger = new Logger(OllamaService.name);
+	private readonly baseUrl: string;
+	private readonly timeout: number;
+	private isConnected = false;
+
+	constructor(private configService: ConfigService) {
+		this.baseUrl = this.configService.get<string>('OLLAMA_URL') || 'http://localhost:11434';
+		this.timeout = this.configService.get<number>('OLLAMA_TIMEOUT') || 120000;
+
+		// Check connection on startup
+		this.checkConnection();
+	}
+
+	async checkConnection(): Promise<boolean> {
+		try {
+			const response = await fetch(`${this.baseUrl}/api/version`, {
+				signal: AbortSignal.timeout(5000),
+			});
+			if (response.ok) {
+				const data = await response.json();
+				this.isConnected = true;
+				this.logger.log(`Ollama connected: v${data.version} at ${this.baseUrl}`);
+				return true;
+			}
+			this.isConnected = false;
+			return false;
+		} catch (error) {
+			this.isConnected = false;
+			this.logger.warn(`Ollama not available at ${this.baseUrl} - local models will not work`);
+			return false;
+		}
+	}
+
+	isAvailable(): boolean {
+		return this.isConnected;
+	}
+
+	async createChatCompletion(
+		modelName: string,
+		messages: OllamaChatMessage[],
+		temperature?: number,
+		maxTokens?: number
+	): AsyncResult<ChatCompletionResponseDto> {
+		if (!this.isConnected) {
+			// Try to reconnect
+			await this.checkConnection();
+			if (!this.isConnected) {
+				return err(
+					ServiceError.externalError('Ollama', `Ollama server not available at ${this.baseUrl}`)
+				);
+			}
+		}
+
+		this.logger.log(`Sending request to Ollama model: ${modelName}`);
+
+		try {
+			const requestBody: Record<string, unknown> = {
+				model: modelName,
+				messages,
+				stream: false,
+			};
+
+			// Add options if provided
+			const options: Record<string, unknown> = {};
+			if (temperature !== undefined) {
+				options.temperature = temperature;
+			}
+			if (maxTokens !== undefined) {
+				options.num_predict = maxTokens;
+			}
+			if (Object.keys(options).length > 0) {
+				requestBody.options = options;
+			}
+
+			const response = await fetch(`${this.baseUrl}/api/chat`, {
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify(requestBody),
+				signal: AbortSignal.timeout(this.timeout),
+			});
+
+			if (!response.ok) {
+				const errorText = await response.text();
+				this.logger.error(`Ollama API error: ${response.status} - ${errorText}`);
+				return err(ServiceError.externalError('Ollama', `API error: ${response.status}`));
+			}
+
+			const data: OllamaChatResponse = await response.json();
+
+			if (!data.message?.content) {
+				this.logger.warn('No message content in Ollama response');
+				return err(ServiceError.generationFailed('Ollama', 'No response generated'));
+			}
+
+			// Calculate token usage from Ollama metrics
+			const promptTokens = data.prompt_eval_count || 0;
+			const completionTokens = data.eval_count || 0;
+
+			// Log performance metrics
+			if (data.eval_count && data.eval_duration) {
+				const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9;
+				this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`);
+			}
+
+			return ok({
+				content: data.message.content,
+				usage: {
+					prompt_tokens: promptTokens,
+					completion_tokens: completionTokens,
+					total_tokens: promptTokens + completionTokens,
+				},
+			});
+		} catch (error) {
+			if (error instanceof Error && error.name === 'TimeoutError') {
+				this.logger.error('Ollama request timed out');
+				return err(ServiceError.generationFailed('Ollama', 'Request timed out'));
+			}
+
+			this.logger.error('Error calling Ollama API', error);
+			return err(
+				ServiceError.generationFailed(
+					'Ollama',
+					error instanceof Error ? error.message : 'Unknown error',
+					error instanceof Error ? error : undefined
+				)
+			);
+		}
+	}
+
+	async listModels(): Promise<string[]> {
+		try {
+			const response = await fetch(`${this.baseUrl}/api/tags`, {
+				signal: AbortSignal.timeout(5000),
+			});
+			if (!response.ok) {
+				return [];
+			}
+			const data = await response.json();
+			return (data.models || []).map((m: { name: string }) => m.name);
+		} catch {
+			return [];
+		}
+	}
+}
--- a/apps/chat/apps/backend/src/db/migrations/add-ollama-model.sql
+++ b/apps/chat/apps/backend/src/db/migrations/add-ollama-model.sql
@ -0,0 +1,30 @@
+-- Migration: Add Ollama Gemma 3 4B model
+-- Run this on existing databases to add the local Ollama model
+
+-- Insert Ollama model if it doesn't exist
+INSERT INTO models (id, name, description, provider, parameters, is_active, is_default, created_at, updated_at)
+VALUES (
+    '550e8400-e29b-41d4-a716-446655440101',
+    'Gemma 3 4B (Lokal)',
+    'Schnelles lokales Modell - kostenlos, läuft auf Mac Mini',
+    'ollama',
+    '{"model": "gemma3:4b", "temperature": 0.7, "max_tokens": 4096}',
+    true,
+    true,
+    NOW(),
+    NOW()
+)
+ON CONFLICT (id) DO UPDATE SET
+    name = EXCLUDED.name,
+    description = EXCLUDED.description,
+    provider = EXCLUDED.provider,
+    parameters = EXCLUDED.parameters,
+    is_active = EXCLUDED.is_active,
+    updated_at = NOW();
+
+-- Set the new Ollama model as default and unset others
+UPDATE models SET is_default = false WHERE id != '550e8400-e29b-41d4-a716-446655440101';
+UPDATE models SET is_default = true WHERE id = '550e8400-e29b-41d4-a716-446655440101';
+
+-- Verify
+SELECT id, name, provider, is_default FROM models ORDER BY is_default DESC, name;
--- a/apps/chat/apps/backend/src/db/seed.ts
+++ b/apps/chat/apps/backend/src/db/seed.ts
@ -33,7 +33,23 @@ async function seed() {

 		const modelData = [
 			// ============================================
-			// OpenRouter Models (All models via OpenRouter)
+			// Local Ollama Models (Free, runs on Mac Mini)
+			// ============================================
+			{
+				id: '550e8400-e29b-41d4-a716-446655440101',
+				name: 'Gemma 3 4B (Lokal)',
+				description: 'Schnelles lokales Modell - kostenlos, läuft auf Mac Mini',
+				provider: 'ollama',
+				parameters: {
+					model: 'gemma3:4b',
+					temperature: 0.7,
+					max_tokens: 4096,
+				},
+				isActive: true,
+				isDefault: true, // Default model - free and local
+			},
+			// ============================================
+			// OpenRouter Models (Cloud, paid)
 			// ============================================
 			{
 				id: '550e8400-e29b-41d4-a716-446655440201',
@ -46,7 +62,7 @@ async function seed() {
 					max_tokens: 4096,
 				},
 				isActive: true,
-				isDefault: true, // Default model - fast and cost-effective
+				isDefault: false,
 			},
 			{
 				id: '550e8400-e29b-41d4-a716-446655440202',