diff --git a/services/matrix-ollama-bot/.dockerignore b/services/matrix-ollama-bot/.dockerignore new file mode 100644 index 000000000..d6a8859ae --- /dev/null +++ b/services/matrix-ollama-bot/.dockerignore @@ -0,0 +1,6 @@ +node_modules +dist +.git +*.log +.env* +data diff --git a/services/matrix-ollama-bot/Dockerfile b/services/matrix-ollama-bot/Dockerfile index bd2c930fa..82cdf37cb 100644 --- a/services/matrix-ollama-bot/Dockerfile +++ b/services/matrix-ollama-bot/Dockerfile @@ -3,37 +3,31 @@ FROM node:20-alpine AS builder WORKDIR /app -# Install pnpm -RUN corepack enable && corepack prepare pnpm@9.15.0 --activate +# Copy package files (exclude pnpm-lock.yaml to use npm) +COPY package.json ./ -# Copy package files -COPY package.json pnpm-lock.yaml* ./ - -# Install dependencies (ignore optional native modules) -RUN pnpm install --frozen-lockfile --ignore-scripts || pnpm install --ignore-scripts +# Install dependencies using npm (more compatible with standard tooling) +RUN npm install # Copy source COPY . . -# Build -RUN pnpm build +# Build using TypeScript +RUN rm -rf dist && npx tsc -p tsconfig.build.json # Production stage FROM node:20-alpine AS runner WORKDIR /app -# Install pnpm -RUN corepack enable && corepack prepare pnpm@9.15.0 --activate - # Create data directory for bot storage RUN mkdir -p /app/data # Copy package files -COPY package.json pnpm-lock.yaml* ./ +COPY package.json ./ -# Install production dependencies only (ignore optional native modules) -RUN pnpm install --prod --frozen-lockfile --ignore-scripts || pnpm install --prod --ignore-scripts +# Install production dependencies only +RUN npm install --omit=dev # Copy built files COPY --from=builder /app/dist ./dist diff --git a/services/matrix-ollama-bot/package.json b/services/matrix-ollama-bot/package.json index b0d5f3f7d..1cdd10cd2 100644 --- a/services/matrix-ollama-bot/package.json +++ b/services/matrix-ollama-bot/package.json @@ -12,9 +12,12 @@ "@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0" } }, + "overrides": { + "@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0" + }, "scripts": { - "prebuild": "rimraf dist", - "build": "nest build", + "prebuild": "rm -rf dist || true", + "build": "tsc -p tsconfig.build.json", "format": "prettier --write \"src/**/*.ts\"", "start": "nest start", "start:dev": "nest start --watch", @@ -36,7 +39,6 @@ "@nestjs/cli": "^10.4.9", "@nestjs/schematics": "^10.2.3", "@types/node": "^22.10.5", - "rimraf": "^6.0.1", "typescript": "^5.7.3" } } diff --git a/services/matrix-ollama-bot/src/bot/matrix.service.ts b/services/matrix-ollama-bot/src/bot/matrix.service.ts index 3dd2f9e43..42cf11367 100644 --- a/services/matrix-ollama-bot/src/bot/matrix.service.ts +++ b/services/matrix-ollama-bot/src/bot/matrix.service.ts @@ -15,8 +15,15 @@ interface UserSession { systemPrompt: string; model: string; history: { role: 'user' | 'assistant'; content: string }[]; + pendingImage?: { url: string; mimeType: string }; } +// Models excluded from !all comparison (specialized, not for general chat) +const NON_CHAT_MODELS = ['deepseek-r1:1.5b']; + +// Models that support vision/image input +const VISION_MODELS = ['llava', 'llava:7b', 'llava:13b', 'bakllava', 'moondream']; + @Injectable() export class MatrixService implements OnModuleInit, OnModuleDestroy { private readonly logger = new Logger(MatrixService.name); @@ -100,8 +107,29 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy { return; } + const content = event.content as { + msgtype?: string; + body?: string; + url?: string; + info?: { mimetype?: string }; + }; + + // Handle image messages - store for later use with !vision + if (content.msgtype === 'm.image' && content.url) { + const session = this.getSession(event.sender); + session.pendingImage = { + url: content.url, + mimeType: content.info?.mimetype || 'image/png', + }; + this.logger.log(`Image received from ${event.sender}, stored for !vision command`); + await this.sendMessage( + roomId, + `📷 Bild empfangen! Nutze jetzt:\n- \`!vision [Frage zum Bild]\` - Bild mit einem Modell analysieren\n- \`!vision:all [Frage]\` - Bild mit allen Vision-Modellen vergleichen` + ); + return; + } + // Only handle text messages - const content = event.content as { msgtype?: string; body?: string }; if (content.msgtype !== 'm.text') return; const body = content.body; @@ -153,6 +181,14 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy { await this.handleAllModels(roomId, sender, argString); break; + case 'vision': + await this.handleVision(roomId, sender, argString); + break; + + case 'vision:all': + await this.handleVisionAll(roomId, sender, argString); + break; + default: await this.sendMessage( roomId, @@ -168,11 +204,17 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy { - \`!help\` - Diese Hilfe anzeigen - \`!models\` - Verfügbare Modelle anzeigen - \`!model [name]\` - Modell wechseln -- \`!all [frage]\` - **Alle Modelle vergleichen** +- \`!all [frage]\` - **Alle Chat-Modelle vergleichen** - \`!mode [modus]\` - System-Prompt ändern - \`!clear\` - Chat-Verlauf löschen - \`!status\` - Ollama Status prüfen +**Bild-Analyse (Vision):** +1. Sende ein Bild in den Chat +2. Nutze dann: + - \`!vision [frage]\` - Bild analysieren + - \`!vision:all [frage]\` - **Alle Vision-Modelle vergleichen** + **Modi:** - \`default\` - Allgemeiner Assistent - \`classify\` - Text-Klassifizierung @@ -183,8 +225,10 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy { **Verwendung:** Schreibe einfach eine Nachricht und ich antworte! -**Beispiel Modellvergleich:** -\`!all Was ist der Sinn des Lebens?\` +**Beispiele:** +- \`!all Was ist der Sinn des Lebens?\` +- [Bild senden] → \`!vision Was siehst du?\` +- [Bild senden] → \`!vision:all Beschreibe das Bild\` **Aktuelles Modell:** \`${this.ollamaService.getDefaultModel()}\``; @@ -303,20 +347,26 @@ Schreibe einfach eine Nachricht und ich antworte! if (!message.trim()) { await this.sendMessage( roomId, - `**Verwendung:** \`!all [Deine Frage]\`\n\nBeispiel: \`!all Was ist 2+2?\`\n\nDie Frage wird an alle Modelle gesendet und du siehst die Antworten zum Vergleich.` + `**Verwendung:** \`!all [Deine Frage]\`\n\nBeispiel: \`!all Was ist 2+2?\`\n\nDie Frage wird an alle Chat-Modelle gesendet und du siehst die Antworten zum Vergleich.` ); return; } - const models = await this.ollamaService.listModels(); + const allModels = await this.ollamaService.listModels(); + // Filter out non-chat models (OCR, specialized models) + const models = allModels.filter((m) => !NON_CHAT_MODELS.includes(m.name)); + if (models.length === 0) { - await this.sendMessage(roomId, '❌ Keine Modelle gefunden. Ist Ollama gestartet?'); + await this.sendMessage(roomId, '❌ Keine Chat-Modelle gefunden. Ist Ollama gestartet?'); return; } + const skipped = allModels.length - models.length; + const skippedNote = skipped > 0 ? ` (${skipped} spezialisierte Modelle übersprungen)` : ''; + await this.sendMessage( roomId, - `🔄 **Vergleiche ${models.length} Modelle...**\n\nFrage: "${message}"` + `🔄 **Vergleiche ${models.length} Chat-Modelle...**${skippedNote}\n\nFrage: "${message}"` ); // Send typing indicator @@ -405,6 +455,154 @@ Schreibe einfach eine Nachricht und ich antworte! } } + private async handleVision(roomId: string, sender: string, prompt: string) { + const session = this.getSession(sender); + + if (!session.pendingImage) { + await this.sendMessage( + roomId, + `❌ Kein Bild vorhanden!\n\nSende zuerst ein Bild, dann nutze \`!vision [Frage zum Bild]\`` + ); + return; + } + + if (!prompt.trim()) { + await this.sendMessage( + roomId, + `**Verwendung:** \`!vision [Deine Frage zum Bild]\`\n\nBeispiel: \`!vision Was siehst du auf diesem Bild?\`` + ); + return; + } + + // Find available vision models + const allModels = await this.ollamaService.listModels(); + const visionModels = allModels.filter((m) => VISION_MODELS.some((v) => m.name.includes(v))); + + if (visionModels.length === 0) { + await this.sendMessage( + roomId, + `❌ Keine Vision-Modelle gefunden!\n\nInstalliere ein Vision-Modell mit:\n\`ollama pull llava\`` + ); + return; + } + + const model = visionModels[0].name; + await this.sendMessage(roomId, `🔍 Analysiere Bild mit \`${model}\`...`); + await this.client.setTyping(roomId, true, 120000); + + try { + // Download image from Matrix + const imageData = await this.downloadMatrixImage(session.pendingImage.url); + + const response = await this.ollamaService.chatWithImage(prompt, imageData, model); + + await this.client.setTyping(roomId, false); + await this.sendMessage(roomId, `**${model}:**\n\n${response}`); + } catch (error) { + await this.client.setTyping(roomId, false); + const errorMsg = error instanceof Error ? error.message : 'Unbekannter Fehler'; + await this.sendMessage(roomId, `❌ Fehler bei der Bildanalyse: ${errorMsg}`); + } + } + + private async handleVisionAll(roomId: string, sender: string, prompt: string) { + const session = this.getSession(sender); + + if (!session.pendingImage) { + await this.sendMessage( + roomId, + `❌ Kein Bild vorhanden!\n\nSende zuerst ein Bild, dann nutze \`!vision:all [Frage zum Bild]\`` + ); + return; + } + + if (!prompt.trim()) { + await this.sendMessage( + roomId, + `**Verwendung:** \`!vision:all [Deine Frage zum Bild]\`\n\nBeispiel: \`!vision:all Beschreibe was du siehst\`` + ); + return; + } + + // Find available vision models + const allModels = await this.ollamaService.listModels(); + const visionModels = allModels.filter((m) => VISION_MODELS.some((v) => m.name.includes(v))); + + if (visionModels.length === 0) { + await this.sendMessage( + roomId, + `❌ Keine Vision-Modelle gefunden!\n\nInstalliere Vision-Modelle mit:\n\`ollama pull llava\`\n\`ollama pull moondream\`` + ); + return; + } + + await this.sendMessage( + roomId, + `🔄 **Vergleiche ${visionModels.length} Vision-Modelle...**\n\nFrage: "${prompt}"` + ); + await this.client.setTyping(roomId, true, 300000); + + try { + // Download image from Matrix once + const imageData = await this.downloadMatrixImage(session.pendingImage.url); + + const results: { model: string; response: string; duration: number; error?: string }[] = []; + + for (const model of visionModels) { + const startTime = Date.now(); + try { + this.logger.log(`Querying vision model ${model.name}...`); + const response = await this.ollamaService.chatWithImage(prompt, imageData, model.name); + const duration = Date.now() - startTime; + results.push({ model: model.name, response, duration }); + } catch (error) { + const duration = Date.now() - startTime; + const errorMessage = error instanceof Error ? error.message : 'Unbekannter Fehler'; + results.push({ model: model.name, response: '', duration, error: errorMessage }); + } + } + + await this.client.setTyping(roomId, false); + + // Format results + let resultText = `**📊 Vision-Modellvergleich**\n\n**Frage:** "${prompt}"\n\n---\n\n`; + + for (const result of results) { + const durationSec = (result.duration / 1000).toFixed(1); + if (result.error) { + resultText += `**${result.model}** ⏱️ ${durationSec}s\n❌ Fehler: ${result.error}\n\n---\n\n`; + } else { + const truncatedResponse = + result.response.length > 500 + ? result.response.substring(0, 500) + '...' + : result.response; + resultText += `**${result.model}** ⏱️ ${durationSec}s\n${truncatedResponse}\n\n---\n\n`; + } + } + + await this.sendMessage(roomId, resultText); + } catch (error) { + await this.client.setTyping(roomId, false); + const errorMsg = error instanceof Error ? error.message : 'Unbekannter Fehler'; + await this.sendMessage(roomId, `❌ Fehler: ${errorMsg}`); + } + } + + private async downloadMatrixImage(mxcUrl: string): Promise { + // Convert mxc:// URL to HTTP URL and download + const httpUrl = this.client.mxcToHttp(mxcUrl); + this.logger.log(`Downloading image from ${httpUrl}`); + + const response = await fetch(httpUrl); + if (!response.ok) { + throw new Error(`Failed to download image: ${response.status}`); + } + + const buffer = await response.arrayBuffer(); + const base64 = Buffer.from(buffer).toString('base64'); + return base64; + } + private async sendMessage(roomId: string, message: string) { // Convert markdown to basic HTML for Matrix const htmlBody = this.markdownToHtml(message); diff --git a/services/matrix-ollama-bot/src/ollama/ollama.service.ts b/services/matrix-ollama-bot/src/ollama/ollama.service.ts index 7d0dbb29c..d43590a2f 100644 --- a/services/matrix-ollama-bot/src/ollama/ollama.service.ts +++ b/services/matrix-ollama-bot/src/ollama/ollama.service.ts @@ -91,4 +91,48 @@ export class OllamaService implements OnModuleInit { getDefaultModel(): string { return this.defaultModel; } + + async chatWithImage(prompt: string, imageBase64: string, model?: string): Promise { + const selectedModel = model || this.defaultModel; + + try { + const response = await fetch(`${this.baseUrl}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: selectedModel, + messages: [ + { + role: 'user', + content: prompt, + images: [imageBase64], + }, + ], + stream: false, + }), + signal: AbortSignal.timeout(this.timeout), + }); + + if (!response.ok) { + throw new Error(`Ollama API error: ${response.status}`); + } + + const data = await response.json(); + + // Log performance metrics + if (data.eval_count && data.eval_duration) { + const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9; + this.logger.debug( + `Vision: Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s` + ); + } + + return data.message?.content || ''; + } catch (error) { + if (error instanceof Error && error.name === 'TimeoutError') { + throw new Error('Ollama Timeout - Bildanalyse dauerte zu lange'); + } + throw error; + } + } } diff --git a/services/matrix-ollama-bot/tsconfig.build.json b/services/matrix-ollama-bot/tsconfig.build.json new file mode 100644 index 000000000..f2050bc26 --- /dev/null +++ b/services/matrix-ollama-bot/tsconfig.build.json @@ -0,0 +1,8 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "rootDir": "./src" + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.spec.ts", "**/*.test.ts"] +}