mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 01:21:09 +02:00
feat(chat-backend): integrate Ollama for local LLM inference
- Add OllamaService for local model inference via Ollama API - Update ChatService to route requests based on model provider - Support both 'ollama' (local) and 'openrouter' (cloud) providers - Add Gemma 3 4B as default model (free, runs on Mac Mini) - Add SQL migration script for existing databases - Update CLAUDE.md with Ollama configuration docs Environment variables: - OLLAMA_URL: Ollama server URL (default: http://localhost:11434) - OLLAMA_TIMEOUT: Request timeout in ms (default: 120000) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
14aaf01fa3
commit
6f51f1a24c
7 changed files with 280 additions and 16 deletions
|
|
@ -70,7 +70,7 @@ pnpm preview # Preview production build
|
|||
- **Mobile**: React Native 0.76.7 + Expo SDK 52, NativeWind, Expo Router
|
||||
- **Web**: SvelteKit 2.x, Svelte 5, Tailwind CSS 4
|
||||
- **Landing**: Astro 5.16, Tailwind CSS
|
||||
- **Backend**: NestJS 10, OpenRouter AI, Drizzle ORM, PostgreSQL
|
||||
- **Backend**: NestJS 10, OpenRouter AI + Ollama (local), Drizzle ORM, PostgreSQL
|
||||
- **Auth**: Mana Core Auth (JWT)
|
||||
- **Types**: TypeScript 5.x
|
||||
|
||||
|
|
@ -94,9 +94,13 @@ pnpm preview # Preview production build
|
|||
#### Backend (.env)
|
||||
|
||||
```env
|
||||
# Required - All AI models via OpenRouter
|
||||
# Cloud AI models via OpenRouter (optional if using only local models)
|
||||
OPENROUTER_API_KEY=sk-or-v1-xxx # Get at https://openrouter.ai/keys
|
||||
|
||||
# Local AI via Ollama (optional, defaults to localhost:11434)
|
||||
OLLAMA_URL=http://localhost:11434 # Or http://host.docker.internal:11434 in Docker
|
||||
OLLAMA_TIMEOUT=120000 # Timeout in ms (default: 120s)
|
||||
|
||||
# Database (uses shared Docker PostgreSQL)
|
||||
DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/chat
|
||||
|
||||
|
|
@ -129,13 +133,19 @@ PUBLIC_BACKEND_URL=http://localhost:3002
|
|||
- **Styling**: Tailwind CSS everywhere
|
||||
- **Formatting**: 100 char line limit, 2 space tabs, single quotes
|
||||
|
||||
## AI Models Available (via OpenRouter)
|
||||
## AI Models Available
|
||||
|
||||
All models are accessed through OpenRouter, providing access to 100+ models with a single API key.
|
||||
### Local Models (Ollama - Free)
|
||||
|
||||
| Model ID | Name | Provider | Best For |
|
||||
| -------- | ---- | -------- | -------- |
|
||||
| ...440101 | Gemma 3 4B (Lokal) | ollama | Everyday tasks (default) - runs on Mac Mini |
|
||||
|
||||
### Cloud Models (OpenRouter - Paid)
|
||||
|
||||
| Model ID | Name | Price | Best For |
|
||||
| -------- | ---- | ----- | -------- |
|
||||
| ...440201 | Llama 3.1 8B | $0.05/M | Everyday tasks (default) |
|
||||
| ...440201 | Llama 3.1 8B | $0.05/M | Fast cloud alternative |
|
||||
| ...440202 | Llama 3.1 70B | $0.35/M | Complex reasoning |
|
||||
| ...440203 | DeepSeek V3 | $0.14/M | Reasoning at low cost |
|
||||
| ...440204 | Mistral Small | $0.10/M | General tasks |
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
"db:push": "drizzle-kit push",
|
||||
"db:studio": "drizzle-kit studio",
|
||||
"db:seed": "tsx src/db/seed.ts",
|
||||
"db:add-ollama": "psql $DATABASE_URL -f src/db/migrations/add-ollama-model.sql",
|
||||
"docker:build": "docker compose build",
|
||||
"docker:up": "docker compose up -d",
|
||||
"docker:down": "docker compose down",
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ChatController } from './chat.controller';
|
||||
import { ChatService } from './chat.service';
|
||||
import { OllamaService } from './ollama.service';
|
||||
|
||||
@Module({
|
||||
controllers: [ChatController],
|
||||
providers: [ChatService],
|
||||
exports: [ChatService],
|
||||
providers: [ChatService, OllamaService],
|
||||
exports: [ChatService, OllamaService],
|
||||
})
|
||||
export class ChatModule {}
|
||||
|
|
|
|||
|
|
@ -9,18 +9,20 @@ import { models } from '../db/schema/models.schema';
|
|||
import type { Model } from '../db/schema/models.schema';
|
||||
import { ChatCompletionDto } from './dto/chat-completion.dto';
|
||||
import type { ChatCompletionResponseDto } from './dto/chat-completion.dto';
|
||||
import { OllamaService } from './ollama.service';
|
||||
|
||||
@Injectable()
|
||||
export class ChatService {
|
||||
private readonly logger = new Logger(ChatService.name);
|
||||
// OpenRouter config (primary provider)
|
||||
// OpenRouter config (cloud provider)
|
||||
private readonly openRouterClient: OpenAI | null = null;
|
||||
|
||||
constructor(
|
||||
private configService: ConfigService,
|
||||
@Inject(DATABASE_CONNECTION) private readonly db: Database
|
||||
@Inject(DATABASE_CONNECTION) private readonly db: Database,
|
||||
private readonly ollamaService: OllamaService
|
||||
) {
|
||||
// OpenRouter setup (primary and only provider)
|
||||
// OpenRouter setup (cloud provider)
|
||||
const openRouterApiKey = this.configService.get<string>('OPENROUTER_API_KEY');
|
||||
if (openRouterApiKey) {
|
||||
this.openRouterClient = new OpenAI({
|
||||
|
|
@ -33,7 +35,7 @@ export class ChatService {
|
|||
});
|
||||
this.logger.log('OpenRouter client initialized');
|
||||
} else {
|
||||
this.logger.error('OPENROUTER_API_KEY is not set - Chat will not work!');
|
||||
this.logger.warn('OPENROUTER_API_KEY not set - only local Ollama models will work');
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -69,11 +71,46 @@ export class ChatService {
|
|||
|
||||
// Log user context for tracking (optional)
|
||||
if (userId) {
|
||||
this.logger.log(`User ${userId} creating chat completion with model ${dto.modelId}`);
|
||||
this.logger.log(
|
||||
`User ${userId} creating chat completion with model ${dto.modelId} (${model.provider})`
|
||||
);
|
||||
}
|
||||
|
||||
// All models go through OpenRouter
|
||||
return this.createOpenRouterCompletion(model, dto);
|
||||
// Route to appropriate provider based on model configuration
|
||||
switch (model.provider) {
|
||||
case 'ollama':
|
||||
return this.createOllamaCompletion(model, dto);
|
||||
case 'openrouter':
|
||||
default:
|
||||
return this.createOpenRouterCompletion(model, dto);
|
||||
}
|
||||
}
|
||||
|
||||
private async createOllamaCompletion(
|
||||
model: Model,
|
||||
dto: ChatCompletionDto
|
||||
): AsyncResult<ChatCompletionResponseDto> {
|
||||
const params = model.parameters as {
|
||||
model?: string;
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
} | null;
|
||||
|
||||
const modelName = params?.model || 'gemma3:4b';
|
||||
const temperature = dto.temperature ?? params?.temperature ?? 0.7;
|
||||
const maxTokens = dto.maxTokens ?? params?.max_tokens ?? 4096;
|
||||
|
||||
this.logger.log(`Sending request to Ollama model: ${modelName}`);
|
||||
|
||||
return this.ollamaService.createChatCompletion(
|
||||
modelName,
|
||||
dto.messages.map((msg) => ({
|
||||
role: msg.role as 'system' | 'user' | 'assistant',
|
||||
content: msg.content,
|
||||
})),
|
||||
temperature,
|
||||
maxTokens
|
||||
);
|
||||
}
|
||||
|
||||
private async createOpenRouterCompletion(
|
||||
|
|
|
|||
169
apps/chat/apps/backend/src/chat/ollama.service.ts
Normal file
169
apps/chat/apps/backend/src/chat/ollama.service.ts
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors';
|
||||
import type { ChatCompletionResponseDto } from './dto/chat-completion.dto';
|
||||
|
||||
interface OllamaChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
interface OllamaChatResponse {
|
||||
model: string;
|
||||
message: {
|
||||
role: string;
|
||||
content: string;
|
||||
};
|
||||
done: boolean;
|
||||
total_duration?: number;
|
||||
eval_count?: number;
|
||||
eval_duration?: number;
|
||||
prompt_eval_count?: number;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class OllamaService {
|
||||
private readonly logger = new Logger(OllamaService.name);
|
||||
private readonly baseUrl: string;
|
||||
private readonly timeout: number;
|
||||
private isConnected = false;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.baseUrl = this.configService.get<string>('OLLAMA_URL') || 'http://localhost:11434';
|
||||
this.timeout = this.configService.get<number>('OLLAMA_TIMEOUT') || 120000;
|
||||
|
||||
// Check connection on startup
|
||||
this.checkConnection();
|
||||
}
|
||||
|
||||
async checkConnection(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/version`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
this.isConnected = true;
|
||||
this.logger.log(`Ollama connected: v${data.version} at ${this.baseUrl}`);
|
||||
return true;
|
||||
}
|
||||
this.isConnected = false;
|
||||
return false;
|
||||
} catch (error) {
|
||||
this.isConnected = false;
|
||||
this.logger.warn(`Ollama not available at ${this.baseUrl} - local models will not work`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
isAvailable(): boolean {
|
||||
return this.isConnected;
|
||||
}
|
||||
|
||||
async createChatCompletion(
|
||||
modelName: string,
|
||||
messages: OllamaChatMessage[],
|
||||
temperature?: number,
|
||||
maxTokens?: number
|
||||
): AsyncResult<ChatCompletionResponseDto> {
|
||||
if (!this.isConnected) {
|
||||
// Try to reconnect
|
||||
await this.checkConnection();
|
||||
if (!this.isConnected) {
|
||||
return err(
|
||||
ServiceError.externalError('Ollama', `Ollama server not available at ${this.baseUrl}`)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(`Sending request to Ollama model: ${modelName}`);
|
||||
|
||||
try {
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model: modelName,
|
||||
messages,
|
||||
stream: false,
|
||||
};
|
||||
|
||||
// Add options if provided
|
||||
const options: Record<string, unknown> = {};
|
||||
if (temperature !== undefined) {
|
||||
options.temperature = temperature;
|
||||
}
|
||||
if (maxTokens !== undefined) {
|
||||
options.num_predict = maxTokens;
|
||||
}
|
||||
if (Object.keys(options).length > 0) {
|
||||
requestBody.options = options;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: AbortSignal.timeout(this.timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
this.logger.error(`Ollama API error: ${response.status} - ${errorText}`);
|
||||
return err(ServiceError.externalError('Ollama', `API error: ${response.status}`));
|
||||
}
|
||||
|
||||
const data: OllamaChatResponse = await response.json();
|
||||
|
||||
if (!data.message?.content) {
|
||||
this.logger.warn('No message content in Ollama response');
|
||||
return err(ServiceError.generationFailed('Ollama', 'No response generated'));
|
||||
}
|
||||
|
||||
// Calculate token usage from Ollama metrics
|
||||
const promptTokens = data.prompt_eval_count || 0;
|
||||
const completionTokens = data.eval_count || 0;
|
||||
|
||||
// Log performance metrics
|
||||
if (data.eval_count && data.eval_duration) {
|
||||
const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9;
|
||||
this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`);
|
||||
}
|
||||
|
||||
return ok({
|
||||
content: data.message.content,
|
||||
usage: {
|
||||
prompt_tokens: promptTokens,
|
||||
completion_tokens: completionTokens,
|
||||
total_tokens: promptTokens + completionTokens,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
this.logger.error('Ollama request timed out');
|
||||
return err(ServiceError.generationFailed('Ollama', 'Request timed out'));
|
||||
}
|
||||
|
||||
this.logger.error('Error calling Ollama API', error);
|
||||
return err(
|
||||
ServiceError.generationFailed(
|
||||
'Ollama',
|
||||
error instanceof Error ? error.message : 'Unknown error',
|
||||
error instanceof Error ? error : undefined
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async listModels(): Promise<string[]> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/tags`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (!response.ok) {
|
||||
return [];
|
||||
}
|
||||
const data = await response.json();
|
||||
return (data.models || []).map((m: { name: string }) => m.name);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
-- Migration: Add Ollama Gemma 3 4B model
|
||||
-- Run this on existing databases to add the local Ollama model
|
||||
|
||||
-- Insert Ollama model if it doesn't exist
|
||||
INSERT INTO models (id, name, description, provider, parameters, is_active, is_default, created_at, updated_at)
|
||||
VALUES (
|
||||
'550e8400-e29b-41d4-a716-446655440101',
|
||||
'Gemma 3 4B (Lokal)',
|
||||
'Schnelles lokales Modell - kostenlos, läuft auf Mac Mini',
|
||||
'ollama',
|
||||
'{"model": "gemma3:4b", "temperature": 0.7, "max_tokens": 4096}',
|
||||
true,
|
||||
true,
|
||||
NOW(),
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
description = EXCLUDED.description,
|
||||
provider = EXCLUDED.provider,
|
||||
parameters = EXCLUDED.parameters,
|
||||
is_active = EXCLUDED.is_active,
|
||||
updated_at = NOW();
|
||||
|
||||
-- Set the new Ollama model as default and unset others
|
||||
UPDATE models SET is_default = false WHERE id != '550e8400-e29b-41d4-a716-446655440101';
|
||||
UPDATE models SET is_default = true WHERE id = '550e8400-e29b-41d4-a716-446655440101';
|
||||
|
||||
-- Verify
|
||||
SELECT id, name, provider, is_default FROM models ORDER BY is_default DESC, name;
|
||||
|
|
@ -33,7 +33,23 @@ async function seed() {
|
|||
|
||||
const modelData = [
|
||||
// ============================================
|
||||
// OpenRouter Models (All models via OpenRouter)
|
||||
// Local Ollama Models (Free, runs on Mac Mini)
|
||||
// ============================================
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440101',
|
||||
name: 'Gemma 3 4B (Lokal)',
|
||||
description: 'Schnelles lokales Modell - kostenlos, läuft auf Mac Mini',
|
||||
provider: 'ollama',
|
||||
parameters: {
|
||||
model: 'gemma3:4b',
|
||||
temperature: 0.7,
|
||||
max_tokens: 4096,
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: true, // Default model - free and local
|
||||
},
|
||||
// ============================================
|
||||
// OpenRouter Models (Cloud, paid)
|
||||
// ============================================
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440201',
|
||||
|
|
@ -46,7 +62,7 @@ async function seed() {
|
|||
max_tokens: 4096,
|
||||
},
|
||||
isActive: true,
|
||||
isDefault: true, // Default model - fast and cost-effective
|
||||
isDefault: false,
|
||||
},
|
||||
{
|
||||
id: '550e8400-e29b-41d4-a716-446655440202',
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue