diff --git a/apps/chat/CLAUDE.md b/apps/chat/CLAUDE.md index 0632271fd..a21ab5b66 100644 --- a/apps/chat/CLAUDE.md +++ b/apps/chat/CLAUDE.md @@ -70,7 +70,7 @@ pnpm preview # Preview production build - **Mobile**: React Native 0.76.7 + Expo SDK 52, NativeWind, Expo Router - **Web**: SvelteKit 2.x, Svelte 5, Tailwind CSS 4 - **Landing**: Astro 5.16, Tailwind CSS -- **Backend**: NestJS 10, OpenRouter AI, Drizzle ORM, PostgreSQL +- **Backend**: NestJS 10, OpenRouter AI + Ollama (local), Drizzle ORM, PostgreSQL - **Auth**: Mana Core Auth (JWT) - **Types**: TypeScript 5.x @@ -94,9 +94,13 @@ pnpm preview # Preview production build #### Backend (.env) ```env -# Required - All AI models via OpenRouter +# Cloud AI models via OpenRouter (optional if using only local models) OPENROUTER_API_KEY=sk-or-v1-xxx # Get at https://openrouter.ai/keys +# Local AI via Ollama (optional, defaults to localhost:11434) +OLLAMA_URL=http://localhost:11434 # Or http://host.docker.internal:11434 in Docker +OLLAMA_TIMEOUT=120000 # Timeout in ms (default: 120s) + # Database (uses shared Docker PostgreSQL) DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/chat @@ -129,13 +133,19 @@ PUBLIC_BACKEND_URL=http://localhost:3002 - **Styling**: Tailwind CSS everywhere - **Formatting**: 100 char line limit, 2 space tabs, single quotes -## AI Models Available (via OpenRouter) +## AI Models Available -All models are accessed through OpenRouter, providing access to 100+ models with a single API key. +### Local Models (Ollama - Free) + +| Model ID | Name | Provider | Best For | +| -------- | ---- | -------- | -------- | +| ...440101 | Gemma 3 4B (Lokal) | ollama | Everyday tasks (default) - runs on Mac Mini | + +### Cloud Models (OpenRouter - Paid) | Model ID | Name | Price | Best For | | -------- | ---- | ----- | -------- | -| ...440201 | Llama 3.1 8B | $0.05/M | Everyday tasks (default) | +| ...440201 | Llama 3.1 8B | $0.05/M | Fast cloud alternative | | ...440202 | Llama 3.1 70B | $0.35/M | Complex reasoning | | ...440203 | DeepSeek V3 | $0.14/M | Reasoning at low cost | | ...440204 | Mistral Small | $0.10/M | General tasks | diff --git a/apps/chat/apps/backend/package.json b/apps/chat/apps/backend/package.json index d3a50cd6b..dd2c2e087 100644 --- a/apps/chat/apps/backend/package.json +++ b/apps/chat/apps/backend/package.json @@ -16,6 +16,7 @@ "db:push": "drizzle-kit push", "db:studio": "drizzle-kit studio", "db:seed": "tsx src/db/seed.ts", + "db:add-ollama": "psql $DATABASE_URL -f src/db/migrations/add-ollama-model.sql", "docker:build": "docker compose build", "docker:up": "docker compose up -d", "docker:down": "docker compose down", diff --git a/apps/chat/apps/backend/src/chat/chat.module.ts b/apps/chat/apps/backend/src/chat/chat.module.ts index 277a17dff..096e796c5 100644 --- a/apps/chat/apps/backend/src/chat/chat.module.ts +++ b/apps/chat/apps/backend/src/chat/chat.module.ts @@ -1,10 +1,11 @@ import { Module } from '@nestjs/common'; import { ChatController } from './chat.controller'; import { ChatService } from './chat.service'; +import { OllamaService } from './ollama.service'; @Module({ controllers: [ChatController], - providers: [ChatService], - exports: [ChatService], + providers: [ChatService, OllamaService], + exports: [ChatService, OllamaService], }) export class ChatModule {} diff --git a/apps/chat/apps/backend/src/chat/chat.service.ts b/apps/chat/apps/backend/src/chat/chat.service.ts index 20a569d59..db5075de4 100644 --- a/apps/chat/apps/backend/src/chat/chat.service.ts +++ b/apps/chat/apps/backend/src/chat/chat.service.ts @@ -9,18 +9,20 @@ import { models } from '../db/schema/models.schema'; import type { Model } from '../db/schema/models.schema'; import { ChatCompletionDto } from './dto/chat-completion.dto'; import type { ChatCompletionResponseDto } from './dto/chat-completion.dto'; +import { OllamaService } from './ollama.service'; @Injectable() export class ChatService { private readonly logger = new Logger(ChatService.name); - // OpenRouter config (primary provider) + // OpenRouter config (cloud provider) private readonly openRouterClient: OpenAI | null = null; constructor( private configService: ConfigService, - @Inject(DATABASE_CONNECTION) private readonly db: Database + @Inject(DATABASE_CONNECTION) private readonly db: Database, + private readonly ollamaService: OllamaService ) { - // OpenRouter setup (primary and only provider) + // OpenRouter setup (cloud provider) const openRouterApiKey = this.configService.get('OPENROUTER_API_KEY'); if (openRouterApiKey) { this.openRouterClient = new OpenAI({ @@ -33,7 +35,7 @@ export class ChatService { }); this.logger.log('OpenRouter client initialized'); } else { - this.logger.error('OPENROUTER_API_KEY is not set - Chat will not work!'); + this.logger.warn('OPENROUTER_API_KEY not set - only local Ollama models will work'); } } @@ -69,11 +71,46 @@ export class ChatService { // Log user context for tracking (optional) if (userId) { - this.logger.log(`User ${userId} creating chat completion with model ${dto.modelId}`); + this.logger.log( + `User ${userId} creating chat completion with model ${dto.modelId} (${model.provider})` + ); } - // All models go through OpenRouter - return this.createOpenRouterCompletion(model, dto); + // Route to appropriate provider based on model configuration + switch (model.provider) { + case 'ollama': + return this.createOllamaCompletion(model, dto); + case 'openrouter': + default: + return this.createOpenRouterCompletion(model, dto); + } + } + + private async createOllamaCompletion( + model: Model, + dto: ChatCompletionDto + ): AsyncResult { + const params = model.parameters as { + model?: string; + temperature?: number; + max_tokens?: number; + } | null; + + const modelName = params?.model || 'gemma3:4b'; + const temperature = dto.temperature ?? params?.temperature ?? 0.7; + const maxTokens = dto.maxTokens ?? params?.max_tokens ?? 4096; + + this.logger.log(`Sending request to Ollama model: ${modelName}`); + + return this.ollamaService.createChatCompletion( + modelName, + dto.messages.map((msg) => ({ + role: msg.role as 'system' | 'user' | 'assistant', + content: msg.content, + })), + temperature, + maxTokens + ); } private async createOpenRouterCompletion( diff --git a/apps/chat/apps/backend/src/chat/ollama.service.ts b/apps/chat/apps/backend/src/chat/ollama.service.ts new file mode 100644 index 000000000..31692b9cd --- /dev/null +++ b/apps/chat/apps/backend/src/chat/ollama.service.ts @@ -0,0 +1,169 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { ConfigService } from '@nestjs/config'; +import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors'; +import type { ChatCompletionResponseDto } from './dto/chat-completion.dto'; + +interface OllamaChatMessage { + role: 'system' | 'user' | 'assistant'; + content: string; +} + +interface OllamaChatResponse { + model: string; + message: { + role: string; + content: string; + }; + done: boolean; + total_duration?: number; + eval_count?: number; + eval_duration?: number; + prompt_eval_count?: number; +} + +@Injectable() +export class OllamaService { + private readonly logger = new Logger(OllamaService.name); + private readonly baseUrl: string; + private readonly timeout: number; + private isConnected = false; + + constructor(private configService: ConfigService) { + this.baseUrl = this.configService.get('OLLAMA_URL') || 'http://localhost:11434'; + this.timeout = this.configService.get('OLLAMA_TIMEOUT') || 120000; + + // Check connection on startup + this.checkConnection(); + } + + async checkConnection(): Promise { + try { + const response = await fetch(`${this.baseUrl}/api/version`, { + signal: AbortSignal.timeout(5000), + }); + if (response.ok) { + const data = await response.json(); + this.isConnected = true; + this.logger.log(`Ollama connected: v${data.version} at ${this.baseUrl}`); + return true; + } + this.isConnected = false; + return false; + } catch (error) { + this.isConnected = false; + this.logger.warn(`Ollama not available at ${this.baseUrl} - local models will not work`); + return false; + } + } + + isAvailable(): boolean { + return this.isConnected; + } + + async createChatCompletion( + modelName: string, + messages: OllamaChatMessage[], + temperature?: number, + maxTokens?: number + ): AsyncResult { + if (!this.isConnected) { + // Try to reconnect + await this.checkConnection(); + if (!this.isConnected) { + return err( + ServiceError.externalError('Ollama', `Ollama server not available at ${this.baseUrl}`) + ); + } + } + + this.logger.log(`Sending request to Ollama model: ${modelName}`); + + try { + const requestBody: Record = { + model: modelName, + messages, + stream: false, + }; + + // Add options if provided + const options: Record = {}; + if (temperature !== undefined) { + options.temperature = temperature; + } + if (maxTokens !== undefined) { + options.num_predict = maxTokens; + } + if (Object.keys(options).length > 0) { + requestBody.options = options; + } + + const response = await fetch(`${this.baseUrl}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(requestBody), + signal: AbortSignal.timeout(this.timeout), + }); + + if (!response.ok) { + const errorText = await response.text(); + this.logger.error(`Ollama API error: ${response.status} - ${errorText}`); + return err(ServiceError.externalError('Ollama', `API error: ${response.status}`)); + } + + const data: OllamaChatResponse = await response.json(); + + if (!data.message?.content) { + this.logger.warn('No message content in Ollama response'); + return err(ServiceError.generationFailed('Ollama', 'No response generated')); + } + + // Calculate token usage from Ollama metrics + const promptTokens = data.prompt_eval_count || 0; + const completionTokens = data.eval_count || 0; + + // Log performance metrics + if (data.eval_count && data.eval_duration) { + const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9; + this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`); + } + + return ok({ + content: data.message.content, + usage: { + prompt_tokens: promptTokens, + completion_tokens: completionTokens, + total_tokens: promptTokens + completionTokens, + }, + }); + } catch (error) { + if (error instanceof Error && error.name === 'TimeoutError') { + this.logger.error('Ollama request timed out'); + return err(ServiceError.generationFailed('Ollama', 'Request timed out')); + } + + this.logger.error('Error calling Ollama API', error); + return err( + ServiceError.generationFailed( + 'Ollama', + error instanceof Error ? error.message : 'Unknown error', + error instanceof Error ? error : undefined + ) + ); + } + } + + async listModels(): Promise { + try { + const response = await fetch(`${this.baseUrl}/api/tags`, { + signal: AbortSignal.timeout(5000), + }); + if (!response.ok) { + return []; + } + const data = await response.json(); + return (data.models || []).map((m: { name: string }) => m.name); + } catch { + return []; + } + } +} diff --git a/apps/chat/apps/backend/src/db/migrations/add-ollama-model.sql b/apps/chat/apps/backend/src/db/migrations/add-ollama-model.sql new file mode 100644 index 000000000..f1f5c3330 --- /dev/null +++ b/apps/chat/apps/backend/src/db/migrations/add-ollama-model.sql @@ -0,0 +1,30 @@ +-- Migration: Add Ollama Gemma 3 4B model +-- Run this on existing databases to add the local Ollama model + +-- Insert Ollama model if it doesn't exist +INSERT INTO models (id, name, description, provider, parameters, is_active, is_default, created_at, updated_at) +VALUES ( + '550e8400-e29b-41d4-a716-446655440101', + 'Gemma 3 4B (Lokal)', + 'Schnelles lokales Modell - kostenlos, läuft auf Mac Mini', + 'ollama', + '{"model": "gemma3:4b", "temperature": 0.7, "max_tokens": 4096}', + true, + true, + NOW(), + NOW() +) +ON CONFLICT (id) DO UPDATE SET + name = EXCLUDED.name, + description = EXCLUDED.description, + provider = EXCLUDED.provider, + parameters = EXCLUDED.parameters, + is_active = EXCLUDED.is_active, + updated_at = NOW(); + +-- Set the new Ollama model as default and unset others +UPDATE models SET is_default = false WHERE id != '550e8400-e29b-41d4-a716-446655440101'; +UPDATE models SET is_default = true WHERE id = '550e8400-e29b-41d4-a716-446655440101'; + +-- Verify +SELECT id, name, provider, is_default FROM models ORDER BY is_default DESC, name; diff --git a/apps/chat/apps/backend/src/db/seed.ts b/apps/chat/apps/backend/src/db/seed.ts index 109c9b3f3..04fefe793 100644 --- a/apps/chat/apps/backend/src/db/seed.ts +++ b/apps/chat/apps/backend/src/db/seed.ts @@ -33,7 +33,23 @@ async function seed() { const modelData = [ // ============================================ - // OpenRouter Models (All models via OpenRouter) + // Local Ollama Models (Free, runs on Mac Mini) + // ============================================ + { + id: '550e8400-e29b-41d4-a716-446655440101', + name: 'Gemma 3 4B (Lokal)', + description: 'Schnelles lokales Modell - kostenlos, läuft auf Mac Mini', + provider: 'ollama', + parameters: { + model: 'gemma3:4b', + temperature: 0.7, + max_tokens: 4096, + }, + isActive: true, + isDefault: true, // Default model - free and local + }, + // ============================================ + // OpenRouter Models (Cloud, paid) // ============================================ { id: '550e8400-e29b-41d4-a716-446655440201', @@ -46,7 +62,7 @@ async function seed() { max_tokens: 4096, }, isActive: true, - isDefault: true, // Default model - fast and cost-effective + isDefault: false, }, { id: '550e8400-e29b-41d4-a716-446655440202',