feat(chat-backend): integrate Ollama for local LLM inference

- Add OllamaService for local model inference via Ollama API
- Update ChatService to route requests based on model provider
- Support both 'ollama' (local) and 'openrouter' (cloud) providers
- Add Gemma 3 4B as default model (free, runs on Mac Mini)
- Add SQL migration script for existing databases
- Update CLAUDE.md with Ollama configuration docs

Environment variables:
- OLLAMA_URL: Ollama server URL (default: http://localhost:11434)
- OLLAMA_TIMEOUT: Request timeout in ms (default: 120000)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Till-JS 2026-01-26 16:03:03 +01:00
parent 14aaf01fa3
commit 6f51f1a24c
7 changed files with 280 additions and 16 deletions

View file

@ -70,7 +70,7 @@ pnpm preview # Preview production build
- **Mobile**: React Native 0.76.7 + Expo SDK 52, NativeWind, Expo Router
- **Web**: SvelteKit 2.x, Svelte 5, Tailwind CSS 4
- **Landing**: Astro 5.16, Tailwind CSS
- **Backend**: NestJS 10, OpenRouter AI, Drizzle ORM, PostgreSQL
- **Backend**: NestJS 10, OpenRouter AI + Ollama (local), Drizzle ORM, PostgreSQL
- **Auth**: Mana Core Auth (JWT)
- **Types**: TypeScript 5.x
@ -94,9 +94,13 @@ pnpm preview # Preview production build
#### Backend (.env)
```env
# Required - All AI models via OpenRouter
# Cloud AI models via OpenRouter (optional if using only local models)
OPENROUTER_API_KEY=sk-or-v1-xxx # Get at https://openrouter.ai/keys
# Local AI via Ollama (optional, defaults to localhost:11434)
OLLAMA_URL=http://localhost:11434 # Or http://host.docker.internal:11434 in Docker
OLLAMA_TIMEOUT=120000 # Timeout in ms (default: 120s)
# Database (uses shared Docker PostgreSQL)
DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/chat
@ -129,13 +133,19 @@ PUBLIC_BACKEND_URL=http://localhost:3002
- **Styling**: Tailwind CSS everywhere
- **Formatting**: 100 char line limit, 2 space tabs, single quotes
## AI Models Available (via OpenRouter)
## AI Models Available
All models are accessed through OpenRouter, providing access to 100+ models with a single API key.
### Local Models (Ollama - Free)
| Model ID | Name | Provider | Best For |
| -------- | ---- | -------- | -------- |
| ...440101 | Gemma 3 4B (Lokal) | ollama | Everyday tasks (default) - runs on Mac Mini |
### Cloud Models (OpenRouter - Paid)
| Model ID | Name | Price | Best For |
| -------- | ---- | ----- | -------- |
| ...440201 | Llama 3.1 8B | $0.05/M | Everyday tasks (default) |
| ...440201 | Llama 3.1 8B | $0.05/M | Fast cloud alternative |
| ...440202 | Llama 3.1 70B | $0.35/M | Complex reasoning |
| ...440203 | DeepSeek V3 | $0.14/M | Reasoning at low cost |
| ...440204 | Mistral Small | $0.10/M | General tasks |

View file

@ -16,6 +16,7 @@
"db:push": "drizzle-kit push",
"db:studio": "drizzle-kit studio",
"db:seed": "tsx src/db/seed.ts",
"db:add-ollama": "psql $DATABASE_URL -f src/db/migrations/add-ollama-model.sql",
"docker:build": "docker compose build",
"docker:up": "docker compose up -d",
"docker:down": "docker compose down",

View file

@ -1,10 +1,11 @@
import { Module } from '@nestjs/common';
import { ChatController } from './chat.controller';
import { ChatService } from './chat.service';
import { OllamaService } from './ollama.service';
@Module({
controllers: [ChatController],
providers: [ChatService],
exports: [ChatService],
providers: [ChatService, OllamaService],
exports: [ChatService, OllamaService],
})
export class ChatModule {}

View file

@ -9,18 +9,20 @@ import { models } from '../db/schema/models.schema';
import type { Model } from '../db/schema/models.schema';
import { ChatCompletionDto } from './dto/chat-completion.dto';
import type { ChatCompletionResponseDto } from './dto/chat-completion.dto';
import { OllamaService } from './ollama.service';
@Injectable()
export class ChatService {
private readonly logger = new Logger(ChatService.name);
// OpenRouter config (primary provider)
// OpenRouter config (cloud provider)
private readonly openRouterClient: OpenAI | null = null;
constructor(
private configService: ConfigService,
@Inject(DATABASE_CONNECTION) private readonly db: Database
@Inject(DATABASE_CONNECTION) private readonly db: Database,
private readonly ollamaService: OllamaService
) {
// OpenRouter setup (primary and only provider)
// OpenRouter setup (cloud provider)
const openRouterApiKey = this.configService.get<string>('OPENROUTER_API_KEY');
if (openRouterApiKey) {
this.openRouterClient = new OpenAI({
@ -33,7 +35,7 @@ export class ChatService {
});
this.logger.log('OpenRouter client initialized');
} else {
this.logger.error('OPENROUTER_API_KEY is not set - Chat will not work!');
this.logger.warn('OPENROUTER_API_KEY not set - only local Ollama models will work');
}
}
@ -69,11 +71,46 @@ export class ChatService {
// Log user context for tracking (optional)
if (userId) {
this.logger.log(`User ${userId} creating chat completion with model ${dto.modelId}`);
this.logger.log(
`User ${userId} creating chat completion with model ${dto.modelId} (${model.provider})`
);
}
// All models go through OpenRouter
return this.createOpenRouterCompletion(model, dto);
// Route to appropriate provider based on model configuration
switch (model.provider) {
case 'ollama':
return this.createOllamaCompletion(model, dto);
case 'openrouter':
default:
return this.createOpenRouterCompletion(model, dto);
}
}
private async createOllamaCompletion(
model: Model,
dto: ChatCompletionDto
): AsyncResult<ChatCompletionResponseDto> {
const params = model.parameters as {
model?: string;
temperature?: number;
max_tokens?: number;
} | null;
const modelName = params?.model || 'gemma3:4b';
const temperature = dto.temperature ?? params?.temperature ?? 0.7;
const maxTokens = dto.maxTokens ?? params?.max_tokens ?? 4096;
this.logger.log(`Sending request to Ollama model: ${modelName}`);
return this.ollamaService.createChatCompletion(
modelName,
dto.messages.map((msg) => ({
role: msg.role as 'system' | 'user' | 'assistant',
content: msg.content,
})),
temperature,
maxTokens
);
}
private async createOpenRouterCompletion(

View file

@ -0,0 +1,169 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors';
import type { ChatCompletionResponseDto } from './dto/chat-completion.dto';
interface OllamaChatMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
interface OllamaChatResponse {
model: string;
message: {
role: string;
content: string;
};
done: boolean;
total_duration?: number;
eval_count?: number;
eval_duration?: number;
prompt_eval_count?: number;
}
@Injectable()
export class OllamaService {
private readonly logger = new Logger(OllamaService.name);
private readonly baseUrl: string;
private readonly timeout: number;
private isConnected = false;
constructor(private configService: ConfigService) {
this.baseUrl = this.configService.get<string>('OLLAMA_URL') || 'http://localhost:11434';
this.timeout = this.configService.get<number>('OLLAMA_TIMEOUT') || 120000;
// Check connection on startup
this.checkConnection();
}
async checkConnection(): Promise<boolean> {
try {
const response = await fetch(`${this.baseUrl}/api/version`, {
signal: AbortSignal.timeout(5000),
});
if (response.ok) {
const data = await response.json();
this.isConnected = true;
this.logger.log(`Ollama connected: v${data.version} at ${this.baseUrl}`);
return true;
}
this.isConnected = false;
return false;
} catch (error) {
this.isConnected = false;
this.logger.warn(`Ollama not available at ${this.baseUrl} - local models will not work`);
return false;
}
}
isAvailable(): boolean {
return this.isConnected;
}
async createChatCompletion(
modelName: string,
messages: OllamaChatMessage[],
temperature?: number,
maxTokens?: number
): AsyncResult<ChatCompletionResponseDto> {
if (!this.isConnected) {
// Try to reconnect
await this.checkConnection();
if (!this.isConnected) {
return err(
ServiceError.externalError('Ollama', `Ollama server not available at ${this.baseUrl}`)
);
}
}
this.logger.log(`Sending request to Ollama model: ${modelName}`);
try {
const requestBody: Record<string, unknown> = {
model: modelName,
messages,
stream: false,
};
// Add options if provided
const options: Record<string, unknown> = {};
if (temperature !== undefined) {
options.temperature = temperature;
}
if (maxTokens !== undefined) {
options.num_predict = maxTokens;
}
if (Object.keys(options).length > 0) {
requestBody.options = options;
}
const response = await fetch(`${this.baseUrl}/api/chat`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(requestBody),
signal: AbortSignal.timeout(this.timeout),
});
if (!response.ok) {
const errorText = await response.text();
this.logger.error(`Ollama API error: ${response.status} - ${errorText}`);
return err(ServiceError.externalError('Ollama', `API error: ${response.status}`));
}
const data: OllamaChatResponse = await response.json();
if (!data.message?.content) {
this.logger.warn('No message content in Ollama response');
return err(ServiceError.generationFailed('Ollama', 'No response generated'));
}
// Calculate token usage from Ollama metrics
const promptTokens = data.prompt_eval_count || 0;
const completionTokens = data.eval_count || 0;
// Log performance metrics
if (data.eval_count && data.eval_duration) {
const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9;
this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`);
}
return ok({
content: data.message.content,
usage: {
prompt_tokens: promptTokens,
completion_tokens: completionTokens,
total_tokens: promptTokens + completionTokens,
},
});
} catch (error) {
if (error instanceof Error && error.name === 'TimeoutError') {
this.logger.error('Ollama request timed out');
return err(ServiceError.generationFailed('Ollama', 'Request timed out'));
}
this.logger.error('Error calling Ollama API', error);
return err(
ServiceError.generationFailed(
'Ollama',
error instanceof Error ? error.message : 'Unknown error',
error instanceof Error ? error : undefined
)
);
}
}
async listModels(): Promise<string[]> {
try {
const response = await fetch(`${this.baseUrl}/api/tags`, {
signal: AbortSignal.timeout(5000),
});
if (!response.ok) {
return [];
}
const data = await response.json();
return (data.models || []).map((m: { name: string }) => m.name);
} catch {
return [];
}
}
}

View file

@ -0,0 +1,30 @@
-- Migration: Add Ollama Gemma 3 4B model
-- Run this on existing databases to add the local Ollama model
-- Insert Ollama model if it doesn't exist
INSERT INTO models (id, name, description, provider, parameters, is_active, is_default, created_at, updated_at)
VALUES (
'550e8400-e29b-41d4-a716-446655440101',
'Gemma 3 4B (Lokal)',
'Schnelles lokales Modell - kostenlos, läuft auf Mac Mini',
'ollama',
'{"model": "gemma3:4b", "temperature": 0.7, "max_tokens": 4096}',
true,
true,
NOW(),
NOW()
)
ON CONFLICT (id) DO UPDATE SET
name = EXCLUDED.name,
description = EXCLUDED.description,
provider = EXCLUDED.provider,
parameters = EXCLUDED.parameters,
is_active = EXCLUDED.is_active,
updated_at = NOW();
-- Set the new Ollama model as default and unset others
UPDATE models SET is_default = false WHERE id != '550e8400-e29b-41d4-a716-446655440101';
UPDATE models SET is_default = true WHERE id = '550e8400-e29b-41d4-a716-446655440101';
-- Verify
SELECT id, name, provider, is_default FROM models ORDER BY is_default DESC, name;

View file

@ -33,7 +33,23 @@ async function seed() {
const modelData = [
// ============================================
// OpenRouter Models (All models via OpenRouter)
// Local Ollama Models (Free, runs on Mac Mini)
// ============================================
{
id: '550e8400-e29b-41d4-a716-446655440101',
name: 'Gemma 3 4B (Lokal)',
description: 'Schnelles lokales Modell - kostenlos, läuft auf Mac Mini',
provider: 'ollama',
parameters: {
model: 'gemma3:4b',
temperature: 0.7,
max_tokens: 4096,
},
isActive: true,
isDefault: true, // Default model - free and local
},
// ============================================
// OpenRouter Models (Cloud, paid)
// ============================================
{
id: '550e8400-e29b-41d4-a716-446655440201',
@ -46,7 +62,7 @@ async function seed() {
max_tokens: 4096,
},
isActive: true,
isDefault: true, // Default model - fast and cost-effective
isDefault: false,
},
{
id: '550e8400-e29b-41d4-a716-446655440202',