mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 05:41:09 +02:00
♻️ refactor: migrate bots and chat-backend to mana-llm
Migrate all LLM consumers from direct Ollama calls to centralized mana-llm service with OpenAI-compatible API. Migrated services: - matrix-ollama-bot - telegram-ollama-bot - chat-backend - telegram-project-doc-bot New env vars: MANA_LLM_URL, LLM_MODEL, LLM_TIMEOUT Replaces: OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT
This commit is contained in:
parent
7b2ac78032
commit
384244fe50
11 changed files with 300 additions and 204 deletions
|
|
@ -70,7 +70,7 @@ pnpm preview # Preview production build
|
|||
- **Mobile**: React Native 0.76.7 + Expo SDK 52, NativeWind, Expo Router
|
||||
- **Web**: SvelteKit 2.x, Svelte 5, Tailwind CSS 4
|
||||
- **Landing**: Astro 5.16, Tailwind CSS
|
||||
- **Backend**: NestJS 10, OpenRouter AI + Ollama (local), Drizzle ORM, PostgreSQL
|
||||
- **Backend**: NestJS 10, OpenRouter AI + mana-llm (local), Drizzle ORM, PostgreSQL
|
||||
- **Auth**: Mana Core Auth (JWT)
|
||||
- **Types**: TypeScript 5.x
|
||||
|
||||
|
|
@ -97,9 +97,9 @@ pnpm preview # Preview production build
|
|||
# Cloud AI models via OpenRouter (optional if using only local models)
|
||||
OPENROUTER_API_KEY=sk-or-v1-xxx # Get at https://openrouter.ai/keys
|
||||
|
||||
# Local AI via Ollama (optional, defaults to localhost:11434)
|
||||
OLLAMA_URL=http://localhost:11434 # Or http://host.docker.internal:11434 in Docker
|
||||
OLLAMA_TIMEOUT=120000 # Timeout in ms (default: 120s)
|
||||
# Local AI via mana-llm service
|
||||
MANA_LLM_URL=http://localhost:3025 # mana-llm service URL
|
||||
LLM_TIMEOUT=120000 # Timeout in ms (default: 120s)
|
||||
|
||||
# Database (uses shared Docker PostgreSQL)
|
||||
DATABASE_URL=postgresql://manacore:devpassword@localhost:5432/chat
|
||||
|
|
|
|||
|
|
@ -3,22 +3,28 @@ import { ConfigService } from '@nestjs/config';
|
|||
import { AsyncResult, ok, err, ServiceError } from '@manacore/shared-errors';
|
||||
import type { ChatCompletionResponseDto } from './dto/chat-completion.dto';
|
||||
|
||||
interface OllamaChatMessage {
|
||||
interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
interface OllamaChatResponse {
|
||||
interface ChatCompletionResponse {
|
||||
id: string;
|
||||
model: string;
|
||||
message: {
|
||||
role: string;
|
||||
content: string;
|
||||
choices: {
|
||||
message: { role: string; content: string };
|
||||
finish_reason: string;
|
||||
}[];
|
||||
usage: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
done: boolean;
|
||||
total_duration?: number;
|
||||
eval_count?: number;
|
||||
eval_duration?: number;
|
||||
prompt_eval_count?: number;
|
||||
}
|
||||
|
||||
interface LlmModel {
|
||||
id: string;
|
||||
owned_by: string;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
|
|
@ -29,8 +35,8 @@ export class OllamaService {
|
|||
private isConnected = false;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.baseUrl = this.configService.get<string>('OLLAMA_URL') || 'http://localhost:11434';
|
||||
this.timeout = this.configService.get<number>('OLLAMA_TIMEOUT') || 120000;
|
||||
this.baseUrl = this.configService.get<string>('MANA_LLM_URL') || 'http://localhost:3025';
|
||||
this.timeout = this.configService.get<number>('LLM_TIMEOUT') || 120000;
|
||||
|
||||
// Check connection on startup
|
||||
this.checkConnection();
|
||||
|
|
@ -38,20 +44,23 @@ export class OllamaService {
|
|||
|
||||
async checkConnection(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/version`, {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (response.ok) {
|
||||
const data = await response.json();
|
||||
this.isConnected = true;
|
||||
this.logger.log(`Ollama connected: v${data.version} at ${this.baseUrl}`);
|
||||
return true;
|
||||
this.isConnected = data.status === 'healthy' || data.status === 'degraded';
|
||||
if (this.isConnected) {
|
||||
const providers = Object.keys(data.providers || {}).join(', ');
|
||||
this.logger.log(`mana-llm connected: ${data.status}, providers: ${providers}`);
|
||||
}
|
||||
return this.isConnected;
|
||||
}
|
||||
this.isConnected = false;
|
||||
return false;
|
||||
} catch (error) {
|
||||
this.isConnected = false;
|
||||
this.logger.warn(`Ollama not available at ${this.baseUrl} - local models will not work`);
|
||||
this.logger.warn(`mana-llm not available at ${this.baseUrl} - local models will not work`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -62,7 +71,7 @@ export class OllamaService {
|
|||
|
||||
async createChatCompletion(
|
||||
modelName: string,
|
||||
messages: OllamaChatMessage[],
|
||||
messages: ChatMessage[],
|
||||
temperature?: number,
|
||||
maxTokens?: number
|
||||
): AsyncResult<ChatCompletionResponseDto> {
|
||||
|
|
@ -71,33 +80,31 @@ export class OllamaService {
|
|||
await this.checkConnection();
|
||||
if (!this.isConnected) {
|
||||
return err(
|
||||
ServiceError.externalError('Ollama', `Ollama server not available at ${this.baseUrl}`)
|
||||
ServiceError.externalError('mana-llm', `mana-llm server not available at ${this.baseUrl}`)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(`Sending request to Ollama model: ${modelName}`);
|
||||
// Normalize model name to include ollama/ prefix if it doesn't have a provider
|
||||
const normalizedModel = modelName.includes('/') ? modelName : `ollama/${modelName}`;
|
||||
this.logger.log(`Sending request to mana-llm model: ${normalizedModel}`);
|
||||
|
||||
try {
|
||||
const requestBody: Record<string, unknown> = {
|
||||
model: modelName,
|
||||
model: normalizedModel,
|
||||
messages,
|
||||
stream: false,
|
||||
};
|
||||
|
||||
// Add options if provided
|
||||
const options: Record<string, unknown> = {};
|
||||
// Add optional parameters
|
||||
if (temperature !== undefined) {
|
||||
options.temperature = temperature;
|
||||
requestBody.temperature = temperature;
|
||||
}
|
||||
if (maxTokens !== undefined) {
|
||||
options.num_predict = maxTokens;
|
||||
}
|
||||
if (Object.keys(options).length > 0) {
|
||||
requestBody.options = options;
|
||||
requestBody.max_tokens = maxTokens;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(requestBody),
|
||||
|
|
@ -106,45 +113,44 @@ export class OllamaService {
|
|||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
this.logger.error(`Ollama API error: ${response.status} - ${errorText}`);
|
||||
return err(ServiceError.externalError('Ollama', `API error: ${response.status}`));
|
||||
this.logger.error(`mana-llm API error: ${response.status} - ${errorText}`);
|
||||
return err(ServiceError.externalError('mana-llm', `API error: ${response.status}`));
|
||||
}
|
||||
|
||||
const data: OllamaChatResponse = await response.json();
|
||||
const data: ChatCompletionResponse = await response.json();
|
||||
|
||||
if (!data.message?.content) {
|
||||
this.logger.warn('No message content in Ollama response');
|
||||
return err(ServiceError.generationFailed('Ollama', 'No response generated'));
|
||||
if (!data.choices?.[0]?.message?.content) {
|
||||
this.logger.warn('No message content in mana-llm response');
|
||||
return err(ServiceError.generationFailed('mana-llm', 'No response generated'));
|
||||
}
|
||||
|
||||
// Calculate token usage from Ollama metrics
|
||||
const promptTokens = data.prompt_eval_count || 0;
|
||||
const completionTokens = data.eval_count || 0;
|
||||
const usage = data.usage || { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
|
||||
|
||||
// Log performance metrics
|
||||
if (data.eval_count && data.eval_duration) {
|
||||
const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9;
|
||||
this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`);
|
||||
if (usage.completion_tokens) {
|
||||
this.logger.debug(
|
||||
`Generated ${usage.completion_tokens} tokens (total: ${usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return ok({
|
||||
content: data.message.content,
|
||||
content: data.choices[0].message.content,
|
||||
usage: {
|
||||
prompt_tokens: promptTokens,
|
||||
completion_tokens: completionTokens,
|
||||
total_tokens: promptTokens + completionTokens,
|
||||
prompt_tokens: usage.prompt_tokens,
|
||||
completion_tokens: usage.completion_tokens,
|
||||
total_tokens: usage.total_tokens,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
this.logger.error('Ollama request timed out');
|
||||
return err(ServiceError.generationFailed('Ollama', 'Request timed out'));
|
||||
this.logger.error('mana-llm request timed out');
|
||||
return err(ServiceError.generationFailed('mana-llm', 'Request timed out'));
|
||||
}
|
||||
|
||||
this.logger.error('Error calling Ollama API', error);
|
||||
this.logger.error('Error calling mana-llm API', error);
|
||||
return err(
|
||||
ServiceError.generationFailed(
|
||||
'Ollama',
|
||||
'mana-llm',
|
||||
error instanceof Error ? error.message : 'Unknown error',
|
||||
error instanceof Error ? error : undefined
|
||||
)
|
||||
|
|
@ -154,14 +160,14 @@ export class OllamaService {
|
|||
|
||||
async listModels(): Promise<string[]> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/tags`, {
|
||||
const response = await fetch(`${this.baseUrl}/v1/models`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
if (!response.ok) {
|
||||
return [];
|
||||
}
|
||||
const data = await response.json();
|
||||
return (data.models || []).map((m: { name: string }) => m.name);
|
||||
return (data.data || []).map((m: LlmModel) => m.id);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ Matrix Ollama Bot provides a GDPR-compliant chat interface to local LLM inferenc
|
|||
|
||||
- **Framework**: NestJS 10
|
||||
- **Matrix**: matrix-bot-sdk
|
||||
- **LLM**: Ollama (local inference)
|
||||
- **LLM**: mana-llm service (supports Ollama + cloud providers)
|
||||
|
||||
## Commands
|
||||
|
||||
|
|
@ -77,10 +77,10 @@ MATRIX_ACCESS_TOKEN=syt_xxx
|
|||
MATRIX_ALLOWED_ROOMS=#ollama-bot:mana.how
|
||||
MATRIX_STORAGE_PATH=./data/bot-storage.json
|
||||
|
||||
# Ollama
|
||||
OLLAMA_URL=http://localhost:11434
|
||||
OLLAMA_MODEL=gemma3:4b
|
||||
OLLAMA_TIMEOUT=120000
|
||||
# LLM (via mana-llm service)
|
||||
MANA_LLM_URL=http://localhost:3025
|
||||
LLM_MODEL=ollama/gemma3:4b
|
||||
LLM_TIMEOUT=120000
|
||||
```
|
||||
|
||||
## Docker
|
||||
|
|
@ -93,7 +93,7 @@ docker build -f services/matrix-ollama-bot/Dockerfile -t matrix-ollama-bot servi
|
|||
docker run -p 3311:3311 \
|
||||
-e MATRIX_HOMESERVER_URL=http://synapse:8008 \
|
||||
-e MATRIX_ACCESS_TOKEN=syt_xxx \
|
||||
-e OLLAMA_URL=http://host.docker.internal:11434 \
|
||||
-e MANA_LLM_URL=http://mana-llm:3025 \
|
||||
-v matrix-ollama-bot-data:/app/data \
|
||||
matrix-ollama-bot
|
||||
```
|
||||
|
|
|
|||
|
|
@ -6,10 +6,10 @@ export default () => ({
|
|||
allowedRooms: process.env.MATRIX_ALLOWED_ROOMS?.split(',').filter(Boolean) || [],
|
||||
storagePath: process.env.MATRIX_STORAGE_PATH || './data/bot-storage.json',
|
||||
},
|
||||
ollama: {
|
||||
url: process.env.OLLAMA_URL || 'http://localhost:11434',
|
||||
model: process.env.OLLAMA_MODEL || 'gemma3:4b',
|
||||
timeout: parseInt(process.env.OLLAMA_TIMEOUT || '120000', 10),
|
||||
llm: {
|
||||
url: process.env.MANA_LLM_URL || 'http://localhost:3025',
|
||||
model: process.env.LLM_MODEL || 'ollama/gemma3:4b',
|
||||
timeout: parseInt(process.env.LLM_TIMEOUT || '120000', 10),
|
||||
},
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,36 @@
|
|||
import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
|
||||
interface OllamaModel {
|
||||
interface LlmModel {
|
||||
id: string;
|
||||
name: string;
|
||||
size: number;
|
||||
modified_at: string;
|
||||
owned_by: string;
|
||||
}
|
||||
|
||||
interface ChatMessage {
|
||||
role: 'user' | 'assistant' | 'system';
|
||||
content: string | ContentPart[];
|
||||
}
|
||||
|
||||
interface ContentPart {
|
||||
type: 'text' | 'image_url';
|
||||
text?: string;
|
||||
image_url?: { url: string };
|
||||
}
|
||||
|
||||
interface ChatCompletionResponse {
|
||||
id: string;
|
||||
model: string;
|
||||
choices: {
|
||||
message: { role: string; content: string };
|
||||
finish_reason: string;
|
||||
}[];
|
||||
usage: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
|
|
@ -15,9 +41,9 @@ export class OllamaService implements OnModuleInit {
|
|||
private readonly timeout: number;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.baseUrl = this.configService.get<string>('ollama.url') || 'http://localhost:11434';
|
||||
this.defaultModel = this.configService.get<string>('ollama.model') || 'gemma3:4b';
|
||||
this.timeout = this.configService.get<number>('ollama.timeout') || 120000;
|
||||
this.baseUrl = this.configService.get<string>('llm.url') || 'http://localhost:3025';
|
||||
this.defaultModel = this.configService.get<string>('llm.model') || 'ollama/gemma3:4b';
|
||||
this.timeout = this.configService.get<number>('llm.timeout') || 120000;
|
||||
}
|
||||
|
||||
async onModuleInit() {
|
||||
|
|
@ -26,23 +52,29 @@ export class OllamaService implements OnModuleInit {
|
|||
|
||||
async checkConnection(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/version`, {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
const data = await response.json();
|
||||
this.logger.log(`Ollama connected: v${data.version}`);
|
||||
return true;
|
||||
this.logger.log(`mana-llm connected: ${data.status}, providers: ${Object.keys(data.providers || {}).join(', ')}`);
|
||||
return data.status === 'healthy' || data.status === 'degraded';
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to connect to Ollama at ${this.baseUrl}:`, error);
|
||||
this.logger.error(`Failed to connect to mana-llm at ${this.baseUrl}:`, error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async listModels(): Promise<OllamaModel[]> {
|
||||
async listModels(): Promise<{ name: string; size: number; modified_at: string }[]> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/tags`);
|
||||
const response = await fetch(`${this.baseUrl}/v1/models`);
|
||||
const data = await response.json();
|
||||
return data.models || [];
|
||||
|
||||
// Convert OpenAI format to legacy Ollama format for compatibility
|
||||
return (data.data || []).map((m: LlmModel) => ({
|
||||
name: m.id,
|
||||
size: 0, // mana-llm doesn't provide size
|
||||
modified_at: new Date().toISOString(),
|
||||
}));
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to list models:', error);
|
||||
return [];
|
||||
|
|
@ -53,10 +85,10 @@ export class OllamaService implements OnModuleInit {
|
|||
messages: { role: 'user' | 'assistant' | 'system'; content: string }[],
|
||||
model?: string
|
||||
): Promise<string> {
|
||||
const selectedModel = model || this.defaultModel;
|
||||
const selectedModel = model ? this.normalizeModel(model) : this.defaultModel;
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
|
|
@ -68,21 +100,23 @@ export class OllamaService implements OnModuleInit {
|
|||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
const errorText = await response.text();
|
||||
throw new Error(`mana-llm API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const data: ChatCompletionResponse = await response.json();
|
||||
|
||||
// Log performance metrics
|
||||
if (data.eval_count && data.eval_duration) {
|
||||
const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9;
|
||||
this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`);
|
||||
if (data.usage) {
|
||||
this.logger.debug(
|
||||
`Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return data.message?.content || '';
|
||||
return data.choices[0]?.message?.content || '';
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('Ollama Timeout - Antwort dauerte zu lange');
|
||||
throw new Error('LLM Timeout - Antwort dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
|
@ -93,46 +127,65 @@ export class OllamaService implements OnModuleInit {
|
|||
}
|
||||
|
||||
async chatWithImage(prompt: string, imageBase64: string, model?: string): Promise<string> {
|
||||
const selectedModel = model || this.defaultModel;
|
||||
const selectedModel = model ? this.normalizeModel(model) : this.defaultModel;
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
// Use OpenAI vision format
|
||||
const messages: ChatMessage[] = [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: prompt },
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: { url: `data:image/png;base64,${imageBase64}` },
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: selectedModel,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: prompt,
|
||||
images: [imageBase64],
|
||||
},
|
||||
],
|
||||
messages,
|
||||
stream: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(this.timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
const errorText = await response.text();
|
||||
throw new Error(`mana-llm API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const data: ChatCompletionResponse = await response.json();
|
||||
|
||||
// Log performance metrics
|
||||
if (data.eval_count && data.eval_duration) {
|
||||
const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9;
|
||||
if (data.usage) {
|
||||
this.logger.debug(
|
||||
`Vision: Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`
|
||||
`Vision: Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return data.message?.content || '';
|
||||
return data.choices[0]?.message?.content || '';
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('Ollama Timeout - Bildanalyse dauerte zu lange');
|
||||
throw new Error('LLM Timeout - Bildanalyse dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize model name to include provider prefix if missing.
|
||||
* e.g., "gemma3:4b" -> "ollama/gemma3:4b"
|
||||
*/
|
||||
private normalizeModel(model: string): string {
|
||||
if (model.includes('/')) {
|
||||
return model;
|
||||
}
|
||||
return `ollama/${model}`;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ Telegram Bot für lokale LLM-Inferenz via Ollama auf dem Mac Mini Server.
|
|||
|
||||
- **Framework**: NestJS 10
|
||||
- **Telegram**: nestjs-telegraf + Telegraf
|
||||
- **LLM**: Ollama API (Gemma 3 4B)
|
||||
- **LLM**: mana-llm service (supports Ollama + cloud providers)
|
||||
|
||||
## Commands
|
||||
|
||||
|
|
@ -53,10 +53,10 @@ PORT=3301
|
|||
TELEGRAM_BOT_TOKEN=xxx # Bot Token von @BotFather
|
||||
TELEGRAM_ALLOWED_USERS=123,456 # Optional: Nur diese User IDs erlauben
|
||||
|
||||
# Ollama
|
||||
OLLAMA_URL=http://localhost:11434 # Ollama API URL
|
||||
OLLAMA_MODEL=gemma3:4b # Standard-Modell
|
||||
OLLAMA_TIMEOUT=120000 # Timeout in ms
|
||||
# LLM (via mana-llm service)
|
||||
MANA_LLM_URL=http://localhost:3025 # mana-llm service URL
|
||||
LLM_MODEL=ollama/gemma3:4b # Standard-Modell (provider/model format)
|
||||
LLM_TIMEOUT=120000 # Timeout in ms
|
||||
```
|
||||
|
||||
## Projekt-Struktur
|
||||
|
|
@ -91,20 +91,20 @@ telegram-ollama-bot:
|
|||
environment:
|
||||
PORT: 3301
|
||||
TELEGRAM_BOT_TOKEN: ${TELEGRAM_BOT_TOKEN}
|
||||
OLLAMA_URL: http://host.docker.internal:11434
|
||||
OLLAMA_MODEL: gemma3:4b
|
||||
MANA_LLM_URL: http://mana-llm:3025
|
||||
LLM_MODEL: ollama/gemma3:4b
|
||||
ports:
|
||||
- "3301:3301"
|
||||
```
|
||||
|
||||
### Option 2: Nativ (empfohlen für beste Ollama-Performance)
|
||||
### Option 2: Nativ
|
||||
|
||||
```bash
|
||||
# Auf dem Mac Mini
|
||||
cd ~/projects/manacore-monorepo/services/telegram-ollama-bot
|
||||
pnpm install
|
||||
pnpm build
|
||||
TELEGRAM_BOT_TOKEN=xxx OLLAMA_URL=http://localhost:11434 pnpm start:prod
|
||||
TELEGRAM_BOT_TOKEN=xxx MANA_LLM_URL=http://localhost:3025 pnpm start:prod
|
||||
```
|
||||
|
||||
## Neuen Bot erstellen
|
||||
|
|
|
|||
|
|
@ -5,10 +5,10 @@ export default () => ({
|
|||
allowedUsers:
|
||||
process.env.TELEGRAM_ALLOWED_USERS?.split(',').map((id) => parseInt(id, 10)) || [],
|
||||
},
|
||||
ollama: {
|
||||
url: process.env.OLLAMA_URL || 'http://localhost:11434',
|
||||
model: process.env.OLLAMA_MODEL || 'gemma3:4b',
|
||||
timeout: parseInt(process.env.OLLAMA_TIMEOUT || '120000', 10),
|
||||
llm: {
|
||||
url: process.env.MANA_LLM_URL || 'http://localhost:3025',
|
||||
model: process.env.LLM_MODEL || 'ollama/gemma3:4b',
|
||||
timeout: parseInt(process.env.LLM_TIMEOUT || '120000', 10),
|
||||
},
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,19 +1,23 @@
|
|||
import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
|
||||
interface OllamaGenerateResponse {
|
||||
model: string;
|
||||
response: string;
|
||||
done: boolean;
|
||||
total_duration?: number;
|
||||
eval_count?: number;
|
||||
eval_duration?: number;
|
||||
interface LlmModel {
|
||||
id: string;
|
||||
owned_by: string;
|
||||
}
|
||||
|
||||
interface OllamaModel {
|
||||
name: string;
|
||||
size: number;
|
||||
modified_at: string;
|
||||
interface ChatCompletionResponse {
|
||||
id: string;
|
||||
model: string;
|
||||
choices: {
|
||||
message: { role: string; content: string };
|
||||
finish_reason: string;
|
||||
}[];
|
||||
usage: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
|
|
@ -24,9 +28,9 @@ export class OllamaService implements OnModuleInit {
|
|||
private readonly timeout: number;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.baseUrl = this.configService.get<string>('ollama.url') || 'http://localhost:11434';
|
||||
this.defaultModel = this.configService.get<string>('ollama.model') || 'gemma3:4b';
|
||||
this.timeout = this.configService.get<number>('ollama.timeout') || 120000;
|
||||
this.baseUrl = this.configService.get<string>('llm.url') || 'http://localhost:3025';
|
||||
this.defaultModel = this.configService.get<string>('llm.model') || 'ollama/gemma3:4b';
|
||||
this.timeout = this.configService.get<number>('llm.timeout') || 120000;
|
||||
}
|
||||
|
||||
async onModuleInit() {
|
||||
|
|
@ -35,23 +39,31 @@ export class OllamaService implements OnModuleInit {
|
|||
|
||||
async checkConnection(): Promise<boolean> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/version`, {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
const data = await response.json();
|
||||
this.logger.log(`Ollama connected: v${data.version}`);
|
||||
return true;
|
||||
this.logger.log(
|
||||
`mana-llm connected: ${data.status}, providers: ${Object.keys(data.providers || {}).join(', ')}`
|
||||
);
|
||||
return data.status === 'healthy' || data.status === 'degraded';
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to connect to Ollama at ${this.baseUrl}:`, error);
|
||||
this.logger.error(`Failed to connect to mana-llm at ${this.baseUrl}:`, error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async listModels(): Promise<OllamaModel[]> {
|
||||
async listModels(): Promise<{ name: string; size: number; modified_at: string }[]> {
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/tags`);
|
||||
const response = await fetch(`${this.baseUrl}/v1/models`);
|
||||
const data = await response.json();
|
||||
return data.models || [];
|
||||
|
||||
// Convert OpenAI format to legacy Ollama format for compatibility
|
||||
return (data.data || []).map((m: LlmModel) => ({
|
||||
name: m.id,
|
||||
size: 0,
|
||||
modified_at: new Date().toISOString(),
|
||||
}));
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to list models:', error);
|
||||
return [];
|
||||
|
|
@ -59,55 +71,17 @@ export class OllamaService implements OnModuleInit {
|
|||
}
|
||||
|
||||
async generate(prompt: string, systemPrompt?: string, model?: string): Promise<string> {
|
||||
const selectedModel = model || this.defaultModel;
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model: selectedModel,
|
||||
prompt,
|
||||
stream: false,
|
||||
};
|
||||
const selectedModel = model ? this.normalizeModel(model) : this.defaultModel;
|
||||
|
||||
// Convert generate to chat format
|
||||
const messages: { role: 'user' | 'assistant' | 'system'; content: string }[] = [];
|
||||
if (systemPrompt) {
|
||||
body.system = systemPrompt;
|
||||
messages.push({ role: 'system', content: systemPrompt });
|
||||
}
|
||||
messages.push({ role: 'user', content: prompt });
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
signal: AbortSignal.timeout(this.timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const data: OllamaGenerateResponse = await response.json();
|
||||
|
||||
// Log performance metrics
|
||||
if (data.eval_count && data.eval_duration) {
|
||||
const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9;
|
||||
this.logger.debug(`Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`);
|
||||
}
|
||||
|
||||
return data.response;
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('Ollama Timeout - Antwort dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async chat(
|
||||
messages: { role: 'user' | 'assistant' | 'system'; content: string }[],
|
||||
model?: string
|
||||
): Promise<string> {
|
||||
const selectedModel = model || this.defaultModel;
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
|
|
@ -119,14 +93,63 @@ export class OllamaService implements OnModuleInit {
|
|||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
const errorText = await response.text();
|
||||
throw new Error(`mana-llm API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.message?.content || '';
|
||||
const data: ChatCompletionResponse = await response.json();
|
||||
|
||||
// Log performance metrics
|
||||
if (data.usage) {
|
||||
this.logger.debug(
|
||||
`Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return data.choices[0]?.message?.content || '';
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('Ollama Timeout - Antwort dauerte zu lange');
|
||||
throw new Error('LLM Timeout - Antwort dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async chat(
|
||||
messages: { role: 'user' | 'assistant' | 'system'; content: string }[],
|
||||
model?: string
|
||||
): Promise<string> {
|
||||
const selectedModel = model ? this.normalizeModel(model) : this.defaultModel;
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: selectedModel,
|
||||
messages,
|
||||
stream: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(this.timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`mana-llm API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data: ChatCompletionResponse = await response.json();
|
||||
|
||||
if (data.usage) {
|
||||
this.logger.debug(
|
||||
`Generated ${data.usage.completion_tokens} tokens (total: ${data.usage.total_tokens})`
|
||||
);
|
||||
}
|
||||
|
||||
return data.choices[0]?.message?.content || '';
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('LLM Timeout - Antwort dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
|
@ -135,4 +158,14 @@ export class OllamaService implements OnModuleInit {
|
|||
getDefaultModel(): string {
|
||||
return this.defaultModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize model name to include provider prefix if missing.
|
||||
*/
|
||||
private normalizeModel(model: string): string {
|
||||
if (model.includes('/')) {
|
||||
return model;
|
||||
}
|
||||
return `ollama/${model}`;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ Telegram Bot zum Sammeln von Projektdokumentation (Fotos, Sprachnotizen, Text) u
|
|||
- **Database**: PostgreSQL + Drizzle ORM
|
||||
- **Storage**: S3 (MinIO lokal, Hetzner in Produktion)
|
||||
- **AI - Transcription**: OpenAI Whisper
|
||||
- **AI - Generation**: Ollama (lokal) oder OpenAI GPT
|
||||
- **AI - Generation**: mana-llm service oder OpenAI GPT
|
||||
|
||||
## Commands
|
||||
|
||||
|
|
@ -90,9 +90,9 @@ S3_BUCKET=projectdoc-storage
|
|||
OPENAI_API_KEY=sk-xxx
|
||||
|
||||
# AI - Generation
|
||||
LLM_PROVIDER=ollama # ollama oder openai
|
||||
OLLAMA_URL=http://localhost:11434
|
||||
OLLAMA_MODEL=gemma3:4b
|
||||
LLM_PROVIDER=mana-llm # mana-llm oder openai
|
||||
MANA_LLM_URL=http://localhost:3025 # mana-llm service URL
|
||||
LLM_MODEL=ollama/gemma3:4b # Model with provider prefix
|
||||
```
|
||||
|
||||
## Projekt-Struktur
|
||||
|
|
|
|||
|
|
@ -26,10 +26,10 @@ export default () => ({
|
|||
model: process.env.STT_MODEL || 'whisper', // 'whisper' or 'voxtral'
|
||||
},
|
||||
llm: {
|
||||
provider: process.env.LLM_PROVIDER || 'ollama',
|
||||
ollama: {
|
||||
url: process.env.OLLAMA_URL || 'http://localhost:11434',
|
||||
model: process.env.OLLAMA_MODEL || 'gemma3:4b',
|
||||
provider: process.env.LLM_PROVIDER || 'mana-llm',
|
||||
manaLlm: {
|
||||
url: process.env.MANA_LLM_URL || 'http://localhost:3025',
|
||||
model: process.env.LLM_MODEL || 'ollama/gemma3:4b',
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ type BlogStyle = keyof typeof BLOG_STYLES;
|
|||
export class GenerationService {
|
||||
private readonly logger = new Logger(GenerationService.name);
|
||||
private readonly llmProvider: string;
|
||||
private readonly ollamaUrl: string;
|
||||
private readonly ollamaModel: string;
|
||||
private readonly manaLlmUrl: string;
|
||||
private readonly manaLlmModel: string;
|
||||
private readonly openai: OpenAI | null;
|
||||
|
||||
constructor(
|
||||
|
|
@ -23,9 +23,10 @@ export class GenerationService {
|
|||
private db: PostgresJsDatabase<typeof schema>,
|
||||
private configService: ConfigService
|
||||
) {
|
||||
this.llmProvider = this.configService.get<string>('llm.provider') || 'ollama';
|
||||
this.ollamaUrl = this.configService.get<string>('llm.ollama.url') || 'http://localhost:11434';
|
||||
this.ollamaModel = this.configService.get<string>('llm.ollama.model') || 'gemma3:4b';
|
||||
this.llmProvider = this.configService.get<string>('llm.provider') || 'mana-llm';
|
||||
this.manaLlmUrl = this.configService.get<string>('llm.manaLlm.url') || 'http://localhost:3025';
|
||||
this.manaLlmModel =
|
||||
this.configService.get<string>('llm.manaLlm.model') || 'ollama/gemma3:4b';
|
||||
|
||||
const apiKey = this.configService.get<string>('openai.apiKey');
|
||||
this.openai = apiKey ? new OpenAI({ apiKey }) : null;
|
||||
|
|
@ -148,7 +149,7 @@ Beginne direkt mit dem Blogbeitrag (ohne Einleitung wie "Hier ist der Blogbeitra
|
|||
return this.callOpenAI(prompt);
|
||||
}
|
||||
|
||||
return this.callOllama(prompt);
|
||||
return this.callManaLlm(prompt);
|
||||
}
|
||||
|
||||
private async callOpenAI(prompt: string): Promise<string> {
|
||||
|
|
@ -166,24 +167,27 @@ Beginne direkt mit dem Blogbeitrag (ohne Einleitung wie "Hier ist der Blogbeitra
|
|||
return response.choices[0]?.message?.content || '';
|
||||
}
|
||||
|
||||
private async callOllama(prompt: string): Promise<string> {
|
||||
const response = await fetch(`${this.ollamaUrl}/api/generate`, {
|
||||
private async callManaLlm(prompt: string): Promise<string> {
|
||||
const response = await fetch(`${this.manaLlmUrl}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: this.ollamaModel,
|
||||
prompt,
|
||||
model: this.manaLlmModel,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
temperature: 0.7,
|
||||
max_tokens: 4000,
|
||||
stream: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(180000), // 3 minutes timeout
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
const errorText = await response.text();
|
||||
throw new Error(`mana-llm API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.response || '';
|
||||
return data.choices?.[0]?.message?.content || '';
|
||||
}
|
||||
|
||||
async getLatestGeneration(projectId: string): Promise<Generation | undefined> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue