diff --git a/apps/chat/CLAUDE.md b/apps/chat/CLAUDE.md index a21ab5b66..ed05b2284 100644 --- a/apps/chat/CLAUDE.md +++ b/apps/chat/CLAUDE.md @@ -70,7 +70,7 @@ pnpm preview # Preview production build - **Mobile**: React Native 0.76.7 + Expo SDK 52, NativeWind, Expo Router - **Web**: SvelteKit 2.x, Svelte 5, Tailwind CSS 4 - **Landing**: Astro 5.16, Tailwind CSS -- **Backend**: NestJS 10, OpenRouter AI + Ollama (local), Drizzle ORM, PostgreSQL +- **Backend**: NestJS 10, OpenRouter AI + mana-llm (local), Drizzle ORM, PostgreSQL - **Auth**: Mana Core Auth (JWT) - **Types**: TypeScript 5.x @@ -97,9 +97,9 @@ pnpm preview # Preview production build # Cloud AI models via OpenRouter (optional if using only local models) OPENROUTER_API_KEY=sk-or-v1-xxx # Get at https://openrouter.ai/keys -# Local AI via Ollama (optional, defaults to localhost:11434) -OLLAMA_URL=http://localhost:11434 # Or http://host.docker.internal:11434 in Docker -OLLAMA_TIMEOUT=120000 # Timeout in ms (default: 120s) +# Local AI via mana-llm service +MANA_LLM_URL=http://localhost:3025 # mana-llm service URL +LLM_TIMEOUT=120000 # Timeout in ms (default: 120s) # Database (uses shared Docker PostgreSQL) DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/chat diff --git a/apps/chat/apps/backend/src/chat/ollama.service.ts b/apps/chat/apps/backend/src/chat/ollama.service.ts index 31692b9cd..dc8d476b5 100644 --- a/apps/chat/apps/backend/src/chat/ollama.service.ts +++ b/apps/chat/apps/backend/src/chat/ollama.service.ts @@ -3,22 +3,28 @@ import { ConfigService } from '@nestjs/config'; import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors'; import type { ChatCompletionResponseDto } from './dto/chat-completion.dto'; -interface OllamaChatMessage { +interface ChatMessage { role: 'system' | 'user' | 'assistant'; content: string; } -interface OllamaChatResponse { +interface ChatCompletionResponse { + id: string; model: string; - message: { - role: string; - content: string; + choices: { + message: { role: string; content: string }; + finish_reason: string; + }[]; + usage: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; }; - done: boolean; - total_duration?: number; - eval_count?: number; - eval_duration?: number; - prompt_eval_count?: number; +} + +interface LlmModel { + id: string; + owned_by: string; } @Injectable() @@ -29,8 +35,8 @@ export class OllamaService { private isConnected = false; constructor(private configService: ConfigService) { - this.baseUrl = this.configService.get('OLLAMA_URL') || 'http://localhost:11434'; - this.timeout = this.configService.get('OLLAMA_TIMEOUT') || 120000; + this.baseUrl = this.configService.get('MANA_LLM_URL') || 'http://localhost:3025'; + this.timeout = this.configService.get('LLM_TIMEOUT') || 120000; // Check connection on startup this.checkConnection(); @@ -38,20 +44,23 @@ export class OllamaService { async checkConnection(): Promise { try { - const response = await fetch(`${this.baseUrl}/api/version`, { + const response = await fetch(`${this.baseUrl}/health`, { signal: AbortSignal.timeout(5000), }); if (response.ok) { const data = await response.json(); - this.isConnected = true; - this.logger.log(`Ollama connected: v${data.version} at ${this.baseUrl}`); - return true; + this.isConnected = data.status === 'healthy' || data.status === 'degraded'; + if (this.isConnected) { + const providers = Object.keys(data.providers || {}).join(', '); + this.logger.log(`mana-llm connected: ${data.status}, providers: ${providers}`); + } + return this.isConnected; } this.isConnected = false; return false; } catch (error) { this.isConnected = false; - this.logger.warn(`Ollama not available at ${this.baseUrl} - local models will not work`); + this.logger.warn(`mana-llm not available at ${this.baseUrl} - local models will not work`); return false; } } @@ -62,7 +71,7 @@ export class OllamaService { async createChatCompletion( modelName: string, - messages: OllamaChatMessage[], + messages: ChatMessage[], temperature?: number, maxTokens?: number ): AsyncResult { @@ -71,33 +80,31 @@ export class OllamaService { await this.checkConnection(); if (!this.isConnected) { return err( - ServiceError.externalError('Ollama', `Ollama server not available at ${this.baseUrl}`) + ServiceError.externalError('mana-llm', `mana-llm server not available at ${this.baseUrl}`) ); } } - this.logger.log(`Sending request to Ollama model: ${modelName}`); + // Normalize model name to include ollama/ prefix if it doesn't have a provider + const normalizedModel = modelName.includes('/') ? modelName : `ollama/${modelName}`; + this.logger.log(`Sending request to mana-llm model: ${normalizedModel}`); try { const requestBody: Record = { - model: modelName, + model: normalizedModel, messages, stream: false, }; - // Add options if provided - const options: Record = {}; + // Add optional parameters if (temperature !== undefined) { - options.temperature = temperature; + requestBody.temperature = temperature; } if (maxTokens !== undefined) { - options.num_predict = maxTokens; - } - if (Object.keys(options).length > 0) { - requestBody.options = options; + requestBody.max_tokens = maxTokens; } - const response = await fetch(`${this.baseUrl}/api/chat`, { + const response = await fetch(`${this.baseUrl}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(requestBody), @@ -106,45 +113,44 @@ export class OllamaService { if (!response.ok) { const errorText = await response.text(); - this.logger.error(`Ollama API error: ${response.status} - ${errorText}`); - return err(ServiceError.externalError('Ollama', `API error: ${response.status}`)); + this.logger.error(`mana-llm API error: ${response.status} - ${errorText}`); + return err(ServiceError.externalError('mana-llm', `API error: ${response.status}`)); } - const data: OllamaChatResponse = await response.json(); + const data: ChatCompletionResponse = await response.json(); - if (!data.message?.content) { - this.logger.warn('No message content in Ollama response'); - return err(ServiceError.generationFailed('Ollama', 'No response generated')); + if (!data.choices?.[0]?.message?.content) { + this.logger.warn('No message content in mana-llm response'); + return err(ServiceError.generationFailed('mana-llm', 'No response generated')); } - // Calculate token usage from Ollama metrics - const promptTokens = data.prompt_eval_count || 0; - const completionTokens = data.eval_count || 0; + const usage = data.usage || { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }; // Log performance metrics - if (data.eval_count && data.eval_duration) { - const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9; - this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`); + if (usage.completion_tokens) { + this.logger.debug( + `Generated ${usage.completion_tokens} tokens (total: ${usage.total_tokens})` + ); } return ok({ - content: data.message.content, + content: data.choices[0].message.content, usage: { - prompt_tokens: promptTokens, - completion_tokens: completionTokens, - total_tokens: promptTokens + completionTokens, + prompt_tokens: usage.prompt_tokens, + completion_tokens: usage.completion_tokens, + total_tokens: usage.total_tokens, }, }); } catch (error) { if (error instanceof Error && error.name === 'TimeoutError') { - this.logger.error('Ollama request timed out'); - return err(ServiceError.generationFailed('Ollama', 'Request timed out')); + this.logger.error('mana-llm request timed out'); + return err(ServiceError.generationFailed('mana-llm', 'Request timed out')); } - this.logger.error('Error calling Ollama API', error); + this.logger.error('Error calling mana-llm API', error); return err( ServiceError.generationFailed( - 'Ollama', + 'mana-llm', error instanceof Error ? error.message : 'Unknown error', error instanceof Error ? error : undefined ) @@ -154,14 +160,14 @@ export class OllamaService { async listModels(): Promise { try { - const response = await fetch(`${this.baseUrl}/api/tags`, { + const response = await fetch(`${this.baseUrl}/v1/models`, { signal: AbortSignal.timeout(5000), }); if (!response.ok) { return []; } const data = await response.json(); - return (data.models || []).map((m: { name: string }) => m.name); + return (data.data || []).map((m: LlmModel) => m.id); } catch { return []; } diff --git a/services/matrix-ollama-bot/CLAUDE.md b/services/matrix-ollama-bot/CLAUDE.md index a6c35830c..db02699d1 100644 --- a/services/matrix-ollama-bot/CLAUDE.md +++ b/services/matrix-ollama-bot/CLAUDE.md @@ -8,7 +8,7 @@ Matrix Ollama Bot provides a GDPR-compliant chat interface to local LLM inferenc - **Framework**: NestJS 10 - **Matrix**: matrix-bot-sdk -- **LLM**: Ollama (local inference) +- **LLM**: mana-llm service (supports Ollama + cloud providers) ## Commands @@ -77,10 +77,10 @@ MATRIX_ACCESS_TOKEN=syt_xxx MATRIX_ALLOWED_ROOMS=#ollama-bot:mana.how MATRIX_STORAGE_PATH=./data/bot-storage.json -# Ollama -OLLAMA_URL=http://localhost:11434 -OLLAMA_MODEL=gemma3:4b -OLLAMA_TIMEOUT=120000 +# LLM (via mana-llm service) +MANA_LLM_URL=http://localhost:3025 +LLM_MODEL=ollama/gemma3:4b +LLM_TIMEOUT=120000 ``` ## Docker @@ -93,7 +93,7 @@ docker build -f services/matrix-ollama-bot/Dockerfile -t matrix-ollama-bot servi docker run -p 3311:3311 \ -e MATRIX_HOMESERVER_URL=http://synapse:8008 \ -e MATRIX_ACCESS_TOKEN=syt_xxx \ - -e OLLAMA_URL=http://host.docker.internal:11434 \ + -e MANA_LLM_URL=http://mana-llm:3025 \ -v matrix-ollama-bot-data:/app/data \ matrix-ollama-bot ``` diff --git a/services/matrix-ollama-bot/src/config/configuration.ts b/services/matrix-ollama-bot/src/config/configuration.ts index 72d1af582..97f9d5d46 100644 --- a/services/matrix-ollama-bot/src/config/configuration.ts +++ b/services/matrix-ollama-bot/src/config/configuration.ts @@ -6,10 +6,10 @@ export default () => ({ allowedRooms: process.env.MATRIX_ALLOWED_ROOMS?.split(',').filter(Boolean) || [], storagePath: process.env.MATRIX_STORAGE_PATH || './data/bot-storage.json', }, - ollama: { - url: process.env.OLLAMA_URL || 'http://localhost:11434', - model: process.env.OLLAMA_MODEL || 'gemma3:4b', - timeout: parseInt(process.env.OLLAMA_TIMEOUT || '120000', 10), + llm: { + url: process.env.MANA_LLM_URL || 'http://localhost:3025', + model: process.env.LLM_MODEL || 'ollama/gemma3:4b', + timeout: parseInt(process.env.LLM_TIMEOUT || '120000', 10), }, }); diff --git a/services/matrix-ollama-bot/src/ollama/ollama.service.ts b/services/matrix-ollama-bot/src/ollama/ollama.service.ts index d43590a2f..320549994 100644 --- a/services/matrix-ollama-bot/src/ollama/ollama.service.ts +++ b/services/matrix-ollama-bot/src/ollama/ollama.service.ts @@ -1,10 +1,36 @@ import { Injectable, Logger, OnModuleInit } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; -interface OllamaModel { +interface LlmModel { + id: string; name: string; size: number; - modified_at: string; + owned_by: string; +} + +interface ChatMessage { + role: 'user' | 'assistant' | 'system'; + content: string | ContentPart[]; +} + +interface ContentPart { + type: 'text' | 'image_url'; + text?: string; + image_url?: { url: string }; +} + +interface ChatCompletionResponse { + id: string; + model: string; + choices: { + message: { role: string; content: string }; + finish_reason: string; + }[]; + usage: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; } @Injectable() @@ -15,9 +41,9 @@ export class OllamaService implements OnModuleInit { private readonly timeout: number; constructor(private configService: ConfigService) { - this.baseUrl = this.configService.get('ollama.url') || 'http://localhost:11434'; - this.defaultModel = this.configService.get('ollama.model') || 'gemma3:4b'; - this.timeout = this.configService.get('ollama.timeout') || 120000; + this.baseUrl = this.configService.get('llm.url') || 'http://localhost:3025'; + this.defaultModel = this.configService.get('llm.model') || 'ollama/gemma3:4b'; + this.timeout = this.configService.get('llm.timeout') || 120000; } async onModuleInit() { @@ -26,23 +52,29 @@ export class OllamaService implements OnModuleInit { async checkConnection(): Promise { try { - const response = await fetch(`${this.baseUrl}/api/version`, { + const response = await fetch(`${this.baseUrl}/health`, { signal: AbortSignal.timeout(5000), }); const data = await response.json(); - this.logger.log(`Ollama connected: v${data.version}`); - return true; + this.logger.log(`mana-llm connected: ${data.status}, providers: ${Object.keys(data.providers || {}).join(', ')}`); + return data.status === 'healthy' || data.status === 'degraded'; } catch (error) { - this.logger.error(`Failed to connect to Ollama at ${this.baseUrl}:`, error); + this.logger.error(`Failed to connect to mana-llm at ${this.baseUrl}:`, error); return false; } } - async listModels(): Promise { + async listModels(): Promise<{ name: string; size: number; modified_at: string }[]> { try { - const response = await fetch(`${this.baseUrl}/api/tags`); + const response = await fetch(`${this.baseUrl}/v1/models`); const data = await response.json(); - return data.models || []; + + // Convert OpenAI format to legacy Ollama format for compatibility + return (data.data || []).map((m: LlmModel) => ({ + name: m.id, + size: 0, // mana-llm doesn't provide size + modified_at: new Date().toISOString(), + })); } catch (error) { this.logger.error('Failed to list models:', error); return []; @@ -53,10 +85,10 @@ export class OllamaService implements OnModuleInit { messages: { role: 'user' | 'assistant' | 'system'; content: string }[], model?: string ): Promise { - const selectedModel = model || this.defaultModel; + const selectedModel = model ? this.normalizeModel(model) : this.defaultModel; try { - const response = await fetch(`${this.baseUrl}/api/chat`, { + const response = await fetch(`${this.baseUrl}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ @@ -68,21 +100,23 @@ export class OllamaService implements OnModuleInit { }); if (!response.ok) { - throw new Error(`Ollama API error: ${response.status}`); + const errorText = await response.text(); + throw new Error(`mana-llm API error: ${response.status} - ${errorText}`); } - const data = await response.json(); + const data: ChatCompletionResponse = await response.json(); // Log performance metrics - if (data.eval_count && data.eval_duration) { - const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9; - this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`); + if (data.usage) { + this.logger.debug( + `Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})` + ); } - return data.message?.content || ''; + return data.choices[0]?.message?.content || ''; } catch (error) { if (error instanceof Error && error.name === 'TimeoutError') { - throw new Error('Ollama Timeout - Antwort dauerte zu lange'); + throw new Error('LLM Timeout - Antwort dauerte zu lange'); } throw error; } @@ -93,46 +127,65 @@ export class OllamaService implements OnModuleInit { } async chatWithImage(prompt: string, imageBase64: string, model?: string): Promise { - const selectedModel = model || this.defaultModel; + const selectedModel = model ? this.normalizeModel(model) : this.defaultModel; try { - const response = await fetch(`${this.baseUrl}/api/chat`, { + // Use OpenAI vision format + const messages: ChatMessage[] = [ + { + role: 'user', + content: [ + { type: 'text', text: prompt }, + { + type: 'image_url', + image_url: { url: `data:image/png;base64,${imageBase64}` }, + }, + ], + }, + ]; + + const response = await fetch(`${this.baseUrl}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: selectedModel, - messages: [ - { - role: 'user', - content: prompt, - images: [imageBase64], - }, - ], + messages, stream: false, }), signal: AbortSignal.timeout(this.timeout), }); if (!response.ok) { - throw new Error(`Ollama API error: ${response.status}`); + const errorText = await response.text(); + throw new Error(`mana-llm API error: ${response.status} - ${errorText}`); } - const data = await response.json(); + const data: ChatCompletionResponse = await response.json(); // Log performance metrics - if (data.eval_count && data.eval_duration) { - const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9; + if (data.usage) { this.logger.debug( - `Vision: Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s` + `Vision: Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})` ); } - return data.message?.content || ''; + return data.choices[0]?.message?.content || ''; } catch (error) { if (error instanceof Error && error.name === 'TimeoutError') { - throw new Error('Ollama Timeout - Bildanalyse dauerte zu lange'); + throw new Error('LLM Timeout - Bildanalyse dauerte zu lange'); } throw error; } } + + /** + * Normalize model name to include provider prefix if missing. + * e.g., "gemma3:4b" -> "ollama/gemma3:4b" + */ + private normalizeModel(model: string): string { + if (model.includes('/')) { + return model; + } + return `ollama/${model}`; + } } diff --git a/services/telegram-ollama-bot/CLAUDE.md b/services/telegram-ollama-bot/CLAUDE.md index df13eed23..37b537fe1 100644 --- a/services/telegram-ollama-bot/CLAUDE.md +++ b/services/telegram-ollama-bot/CLAUDE.md @@ -6,7 +6,7 @@ Telegram Bot für lokale LLM-Inferenz via Ollama auf dem Mac Mini Server. - **Framework**: NestJS 10 - **Telegram**: nestjs-telegraf + Telegraf -- **LLM**: Ollama API (Gemma 3 4B) +- **LLM**: mana-llm service (supports Ollama + cloud providers) ## Commands @@ -53,10 +53,10 @@ PORT=3301 TELEGRAM_BOT_TOKEN=xxx # Bot Token von @BotFather TELEGRAM_ALLOWED_USERS=123,456 # Optional: Nur diese User IDs erlauben -# Ollama -OLLAMA_URL=http://localhost:11434 # Ollama API URL -OLLAMA_MODEL=gemma3:4b # Standard-Modell -OLLAMA_TIMEOUT=120000 # Timeout in ms +# LLM (via mana-llm service) +MANA_LLM_URL=http://localhost:3025 # mana-llm service URL +LLM_MODEL=ollama/gemma3:4b # Standard-Modell (provider/model format) +LLM_TIMEOUT=120000 # Timeout in ms ``` ## Projekt-Struktur @@ -91,20 +91,20 @@ telegram-ollama-bot: environment: PORT: 3301 TELEGRAM_BOT_TOKEN: ${TELEGRAM_BOT_TOKEN} - OLLAMA_URL: http://host.docker.internal:11434 - OLLAMA_MODEL: gemma3:4b + MANA_LLM_URL: http://mana-llm:3025 + LLM_MODEL: ollama/gemma3:4b ports: - "3301:3301" ``` -### Option 2: Nativ (empfohlen für beste Ollama-Performance) +### Option 2: Nativ ```bash # Auf dem Mac Mini cd ~/projects/manacore-monorepo/services/telegram-ollama-bot pnpm install pnpm build -TELEGRAM_BOT_TOKEN=xxx OLLAMA_URL=http://localhost:11434 pnpm start:prod +TELEGRAM_BOT_TOKEN=xxx MANA_LLM_URL=http://localhost:3025 pnpm start:prod ``` ## Neuen Bot erstellen diff --git a/services/telegram-ollama-bot/src/config/configuration.ts b/services/telegram-ollama-bot/src/config/configuration.ts index 667b1ca75..be2728189 100644 --- a/services/telegram-ollama-bot/src/config/configuration.ts +++ b/services/telegram-ollama-bot/src/config/configuration.ts @@ -5,10 +5,10 @@ export default () => ({ allowedUsers: process.env.TELEGRAM_ALLOWED_USERS?.split(',').map((id) => parseInt(id, 10)) || [], }, - ollama: { - url: process.env.OLLAMA_URL || 'http://localhost:11434', - model: process.env.OLLAMA_MODEL || 'gemma3:4b', - timeout: parseInt(process.env.OLLAMA_TIMEOUT || '120000', 10), + llm: { + url: process.env.MANA_LLM_URL || 'http://localhost:3025', + model: process.env.LLM_MODEL || 'ollama/gemma3:4b', + timeout: parseInt(process.env.LLM_TIMEOUT || '120000', 10), }, }); diff --git a/services/telegram-ollama-bot/src/ollama/ollama.service.ts b/services/telegram-ollama-bot/src/ollama/ollama.service.ts index ebb38c7df..751378d7c 100644 --- a/services/telegram-ollama-bot/src/ollama/ollama.service.ts +++ b/services/telegram-ollama-bot/src/ollama/ollama.service.ts @@ -1,19 +1,23 @@ import { Injectable, Logger, OnModuleInit } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; -interface OllamaGenerateResponse { - model: string; - response: string; - done: boolean; - total_duration?: number; - eval_count?: number; - eval_duration?: number; +interface LlmModel { + id: string; + owned_by: string; } -interface OllamaModel { - name: string; - size: number; - modified_at: string; +interface ChatCompletionResponse { + id: string; + model: string; + choices: { + message: { role: string; content: string }; + finish_reason: string; + }[]; + usage: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; } @Injectable() @@ -24,9 +28,9 @@ export class OllamaService implements OnModuleInit { private readonly timeout: number; constructor(private configService: ConfigService) { - this.baseUrl = this.configService.get('ollama.url') || 'http://localhost:11434'; - this.defaultModel = this.configService.get('ollama.model') || 'gemma3:4b'; - this.timeout = this.configService.get('ollama.timeout') || 120000; + this.baseUrl = this.configService.get('llm.url') || 'http://localhost:3025'; + this.defaultModel = this.configService.get('llm.model') || 'ollama/gemma3:4b'; + this.timeout = this.configService.get('llm.timeout') || 120000; } async onModuleInit() { @@ -35,23 +39,31 @@ export class OllamaService implements OnModuleInit { async checkConnection(): Promise { try { - const response = await fetch(`${this.baseUrl}/api/version`, { + const response = await fetch(`${this.baseUrl}/health`, { signal: AbortSignal.timeout(5000), }); const data = await response.json(); - this.logger.log(`Ollama connected: v${data.version}`); - return true; + this.logger.log( + `mana-llm connected: ${data.status}, providers: ${Object.keys(data.providers || {}).join(', ')}` + ); + return data.status === 'healthy' || data.status === 'degraded'; } catch (error) { - this.logger.error(`Failed to connect to Ollama at ${this.baseUrl}:`, error); + this.logger.error(`Failed to connect to mana-llm at ${this.baseUrl}:`, error); return false; } } - async listModels(): Promise { + async listModels(): Promise<{ name: string; size: number; modified_at: string }[]> { try { - const response = await fetch(`${this.baseUrl}/api/tags`); + const response = await fetch(`${this.baseUrl}/v1/models`); const data = await response.json(); - return data.models || []; + + // Convert OpenAI format to legacy Ollama format for compatibility + return (data.data || []).map((m: LlmModel) => ({ + name: m.id, + size: 0, + modified_at: new Date().toISOString(), + })); } catch (error) { this.logger.error('Failed to list models:', error); return []; @@ -59,55 +71,17 @@ export class OllamaService implements OnModuleInit { } async generate(prompt: string, systemPrompt?: string, model?: string): Promise { - const selectedModel = model || this.defaultModel; - - const body: Record = { - model: selectedModel, - prompt, - stream: false, - }; + const selectedModel = model ? this.normalizeModel(model) : this.defaultModel; + // Convert generate to chat format + const messages: { role: 'user' | 'assistant' | 'system'; content: string }[] = []; if (systemPrompt) { - body.system = systemPrompt; + messages.push({ role: 'system', content: systemPrompt }); } + messages.push({ role: 'user', content: prompt }); try { - const response = await fetch(`${this.baseUrl}/api/generate`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(body), - signal: AbortSignal.timeout(this.timeout), - }); - - if (!response.ok) { - throw new Error(`Ollama API error: ${response.status}`); - } - - const data: OllamaGenerateResponse = await response.json(); - - // Log performance metrics - if (data.eval_count && data.eval_duration) { - const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9; - this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`); - } - - return data.response; - } catch (error) { - if (error instanceof Error && error.name === 'TimeoutError') { - throw new Error('Ollama Timeout - Antwort dauerte zu lange'); - } - throw error; - } - } - - async chat( - messages: { role: 'user' | 'assistant' | 'system'; content: string }[], - model?: string - ): Promise { - const selectedModel = model || this.defaultModel; - - try { - const response = await fetch(`${this.baseUrl}/api/chat`, { + const response = await fetch(`${this.baseUrl}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ @@ -119,14 +93,63 @@ export class OllamaService implements OnModuleInit { }); if (!response.ok) { - throw new Error(`Ollama API error: ${response.status}`); + const errorText = await response.text(); + throw new Error(`mana-llm API error: ${response.status} - ${errorText}`); } - const data = await response.json(); - return data.message?.content || ''; + const data: ChatCompletionResponse = await response.json(); + + // Log performance metrics + if (data.usage) { + this.logger.debug( + `Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})` + ); + } + + return data.choices[0]?.message?.content || ''; } catch (error) { if (error instanceof Error && error.name === 'TimeoutError') { - throw new Error('Ollama Timeout - Antwort dauerte zu lange'); + throw new Error('LLM Timeout - Antwort dauerte zu lange'); + } + throw error; + } + } + + async chat( + messages: { role: 'user' | 'assistant' | 'system'; content: string }[], + model?: string + ): Promise { + const selectedModel = model ? this.normalizeModel(model) : this.defaultModel; + + try { + const response = await fetch(`${this.baseUrl}/v1/chat/completions`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: selectedModel, + messages, + stream: false, + }), + signal: AbortSignal.timeout(this.timeout), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`mana-llm API error: ${response.status} - ${errorText}`); + } + + const data: ChatCompletionResponse = await response.json(); + + if (data.usage) { + this.logger.debug( + `Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})` + ); + } + + return data.choices[0]?.message?.content || ''; + } catch (error) { + if (error instanceof Error && error.name === 'TimeoutError') { + throw new Error('LLM Timeout - Antwort dauerte zu lange'); } throw error; } @@ -135,4 +158,14 @@ export class OllamaService implements OnModuleInit { getDefaultModel(): string { return this.defaultModel; } + + /** + * Normalize model name to include provider prefix if missing. + */ + private normalizeModel(model: string): string { + if (model.includes('/')) { + return model; + } + return `ollama/${model}`; + } } diff --git a/services/telegram-project-doc-bot/CLAUDE.md b/services/telegram-project-doc-bot/CLAUDE.md index 60c57331c..625ec6efd 100644 --- a/services/telegram-project-doc-bot/CLAUDE.md +++ b/services/telegram-project-doc-bot/CLAUDE.md @@ -9,7 +9,7 @@ Telegram Bot zum Sammeln von Projektdokumentation (Fotos, Sprachnotizen, Text) u - **Database**: PostgreSQL + Drizzle ORM - **Storage**: S3 (MinIO lokal, Hetzner in Produktion) - **AI - Transcription**: OpenAI Whisper -- **AI - Generation**: Ollama (lokal) oder OpenAI GPT +- **AI - Generation**: mana-llm service oder OpenAI GPT ## Commands @@ -90,9 +90,9 @@ S3_BUCKET=projectdoc-storage OPENAI_API_KEY=sk-xxx # AI - Generation -LLM_PROVIDER=ollama # ollama oder openai -OLLAMA_URL=http://localhost:11434 -OLLAMA_MODEL=gemma3:4b +LLM_PROVIDER=mana-llm # mana-llm oder openai +MANA_LLM_URL=http://localhost:3025 # mana-llm service URL +LLM_MODEL=ollama/gemma3:4b # Model with provider prefix ``` ## Projekt-Struktur diff --git a/services/telegram-project-doc-bot/src/config/configuration.ts b/services/telegram-project-doc-bot/src/config/configuration.ts index 467903802..d59aa34a1 100644 --- a/services/telegram-project-doc-bot/src/config/configuration.ts +++ b/services/telegram-project-doc-bot/src/config/configuration.ts @@ -26,10 +26,10 @@ export default () => ({ model: process.env.STT_MODEL || 'whisper', // 'whisper' or 'voxtral' }, llm: { - provider: process.env.LLM_PROVIDER || 'ollama', - ollama: { - url: process.env.OLLAMA_URL || 'http://localhost:11434', - model: process.env.OLLAMA_MODEL || 'gemma3:4b', + provider: process.env.LLM_PROVIDER || 'mana-llm', + manaLlm: { + url: process.env.MANA_LLM_URL || 'http://localhost:3025', + model: process.env.LLM_MODEL || 'ollama/gemma3:4b', }, }, }); diff --git a/services/telegram-project-doc-bot/src/generation/generation.service.ts b/services/telegram-project-doc-bot/src/generation/generation.service.ts index 3cd293b0e..9536e59c7 100644 --- a/services/telegram-project-doc-bot/src/generation/generation.service.ts +++ b/services/telegram-project-doc-bot/src/generation/generation.service.ts @@ -14,8 +14,8 @@ type BlogStyle = keyof typeof BLOG_STYLES; export class GenerationService { private readonly logger = new Logger(GenerationService.name); private readonly llmProvider: string; - private readonly ollamaUrl: string; - private readonly ollamaModel: string; + private readonly manaLlmUrl: string; + private readonly manaLlmModel: string; private readonly openai: OpenAI | null; constructor( @@ -23,9 +23,10 @@ export class GenerationService { private db: PostgresJsDatabase, private configService: ConfigService ) { - this.llmProvider = this.configService.get('llm.provider') || 'ollama'; - this.ollamaUrl = this.configService.get('llm.ollama.url') || 'http://localhost:11434'; - this.ollamaModel = this.configService.get('llm.ollama.model') || 'gemma3:4b'; + this.llmProvider = this.configService.get('llm.provider') || 'mana-llm'; + this.manaLlmUrl = this.configService.get('llm.manaLlm.url') || 'http://localhost:3025'; + this.manaLlmModel = + this.configService.get('llm.manaLlm.model') || 'ollama/gemma3:4b'; const apiKey = this.configService.get('openai.apiKey'); this.openai = apiKey ? new OpenAI({ apiKey }) : null; @@ -148,7 +149,7 @@ Beginne direkt mit dem Blogbeitrag (ohne Einleitung wie "Hier ist der Blogbeitra return this.callOpenAI(prompt); } - return this.callOllama(prompt); + return this.callManaLlm(prompt); } private async callOpenAI(prompt: string): Promise { @@ -166,24 +167,27 @@ Beginne direkt mit dem Blogbeitrag (ohne Einleitung wie "Hier ist der Blogbeitra return response.choices[0]?.message?.content || ''; } - private async callOllama(prompt: string): Promise { - const response = await fetch(`${this.ollamaUrl}/api/generate`, { + private async callManaLlm(prompt: string): Promise { + const response = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ - model: this.ollamaModel, - prompt, + model: this.manaLlmModel, + messages: [{ role: 'user', content: prompt }], + temperature: 0.7, + max_tokens: 4000, stream: false, }), signal: AbortSignal.timeout(180000), // 3 minutes timeout }); if (!response.ok) { - throw new Error(`Ollama API error: ${response.status}`); + const errorText = await response.text(); + throw new Error(`mana-llm API error: ${response.status} - ${errorText}`); } const data = await response.json(); - return data.response || ''; + return data.choices?.[0]?.message?.content || ''; } async getLatestGeneration(projectId: string): Promise {