feat(matrix-ollama-bot): add vision commands and filter non-chat models

- Add !vision command to analyze images with vision models - Add !vision:all command to compare all vision models - Filter out specialized models (deepseek-r1) from !all comparison - Add chatWithImage method to OllamaService for vision requests - Switch Dockerfile from pnpm to npm for better compatibility - Add .dockerignore and tsconfig.build.json Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-14 20:01:09 +02:00 · 2026-01-28 14:23:30 +01:00 · 2026-01-28 14:23:30 +01:00 · e4145324b2
commit e4145324b2
parent 437d612e81
6 changed files with 278 additions and 26 deletions
--- a/services/matrix-ollama-bot/.dockerignore
+++ b/services/matrix-ollama-bot/.dockerignore
@ -0,0 +1,6 @@
+node_modules
+dist
+.git
+*.log
+.env*
+data
--- a/services/matrix-ollama-bot/Dockerfile
+++ b/services/matrix-ollama-bot/Dockerfile
@ -3,37 +3,31 @@ FROM node:20-alpine AS builder

 WORKDIR /app

-# Install pnpm
-RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
+# Copy package files (exclude pnpm-lock.yaml to use npm)
+COPY package.json ./

-# Copy package files
-COPY package.json pnpm-lock.yaml* ./
-
-# Install dependencies (ignore optional native modules)
-RUN pnpm install --frozen-lockfile --ignore-scripts || pnpm install --ignore-scripts
+# Install dependencies using npm (more compatible with standard tooling)
+RUN npm install

 # Copy source
 COPY . .

-# Build
-RUN pnpm build
+# Build using TypeScript
+RUN rm -rf dist && npx tsc -p tsconfig.build.json

 # Production stage
 FROM node:20-alpine AS runner

 WORKDIR /app

-# Install pnpm
-RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
-
 # Create data directory for bot storage
 RUN mkdir -p /app/data

 # Copy package files
-COPY package.json pnpm-lock.yaml* ./
+COPY package.json ./

-# Install production dependencies only (ignore optional native modules)
-RUN pnpm install --prod --frozen-lockfile --ignore-scripts || pnpm install --prod --ignore-scripts
+# Install production dependencies only
+RUN npm install --omit=dev

 # Copy built files
 COPY --from=builder /app/dist ./dist
--- a/services/matrix-ollama-bot/package.json
+++ b/services/matrix-ollama-bot/package.json
@ -12,9 +12,12 @@
 			"@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0"
 		}
 	},
+	"overrides": {
+		"@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0"
+	},
 	"scripts": {
-		"prebuild": "rimraf dist",
-		"build": "nest build",
+		"prebuild": "rm -rf dist || true",
+		"build": "tsc -p tsconfig.build.json",
 		"format": "prettier --write \"src/**/*.ts\"",
 		"start": "nest start",
 		"start:dev": "nest start --watch",
@ -36,7 +39,6 @@
 		"@nestjs/cli": "^10.4.9",
 		"@nestjs/schematics": "^10.2.3",
 		"@types/node": "^22.10.5",
-		"rimraf": "^6.0.1",
 		"typescript": "^5.7.3"
 	}
 }
--- a/services/matrix-ollama-bot/src/bot/matrix.service.ts
+++ b/services/matrix-ollama-bot/src/bot/matrix.service.ts
@ -15,8 +15,15 @@ interface UserSession {
 	systemPrompt: string;
 	model: string;
 	history: { role: 'user' | 'assistant'; content: string }[];
+	pendingImage?: { url: string; mimeType: string };
 }

+// Models excluded from !all comparison (specialized, not for general chat)
+const NON_CHAT_MODELS = ['deepseek-r1:1.5b'];
+
+// Models that support vision/image input
+const VISION_MODELS = ['llava', 'llava:7b', 'llava:13b', 'bakllava', 'moondream'];
+
@Injectable()
 export class MatrixService implements OnModuleInit, OnModuleDestroy {
 	private readonly logger = new Logger(MatrixService.name);
@ -100,8 +107,29 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy {
 			return;
 		}

+		const content = event.content as {
+			msgtype?: string;
+			body?: string;
+			url?: string;
+			info?: { mimetype?: string };
+		};
+
+		// Handle image messages - store for later use with !vision
+		if (content.msgtype === 'm.image' && content.url) {
+			const session = this.getSession(event.sender);
+			session.pendingImage = {
+				url: content.url,
+				mimeType: content.info?.mimetype || 'image/png',
+			};
+			this.logger.log(`Image received from ${event.sender}, stored for !vision command`);
+			await this.sendMessage(
+				roomId,
+				`📷 Bild empfangen! Nutze jetzt:\n- \`!vision [Frage zum Bild]\` - Bild mit einem Modell analysieren\n- \`!vision:all [Frage]\` - Bild mit allen Vision-Modellen vergleichen`
+			);
+			return;
+		}
+
 		// Only handle text messages
-		const content = event.content as { msgtype?: string; body?: string };
 		if (content.msgtype !== 'm.text') return;

 		const body = content.body;
@ -153,6 +181,14 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy {
 				await this.handleAllModels(roomId, sender, argString);
 				break;

+			case 'vision':
+				await this.handleVision(roomId, sender, argString);
+				break;
+
+			case 'vision:all':
+				await this.handleVisionAll(roomId, sender, argString);
+				break;
+
 			default:
 				await this.sendMessage(
 					roomId,
@ -168,11 +204,17 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy {
 - \`!help\` - Diese Hilfe anzeigen
 - \`!models\` - Verfügbare Modelle anzeigen
 - \`!model [name]\` - Modell wechseln
- \`!all [frage]\` - **Alle Modelle vergleichen**
+- \`!all [frage]\` - **Alle Chat-Modelle vergleichen**
 - \`!mode [modus]\` - System-Prompt ändern
 - \`!clear\` - Chat-Verlauf löschen
 - \`!status\` - Ollama Status prüfen

+**Bild-Analyse (Vision):**
+1. Sende ein Bild in den Chat
+2. Nutze dann:
+   - \`!vision [frage]\` - Bild analysieren
+   - \`!vision:all [frage]\` - **Alle Vision-Modelle vergleichen**
+
 **Modi:**
 - \`default\` - Allgemeiner Assistent
 - \`classify\` - Text-Klassifizierung
@ -183,8 +225,10 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy {
 **Verwendung:**
 Schreibe einfach eine Nachricht und ich antworte!

-**Beispiel Modellvergleich:**
-\`!all Was ist der Sinn des Lebens?\`
+**Beispiele:**
+- \`!all Was ist der Sinn des Lebens?\`
+- [Bild senden] → \`!vision Was siehst du?\`
+- [Bild senden] → \`!vision:all Beschreibe das Bild\`

 **Aktuelles Modell:** \`${this.ollamaService.getDefaultModel()}\``;

@ -303,20 +347,26 @@ Schreibe einfach eine Nachricht und ich antworte!
 		if (!message.trim()) {
 			await this.sendMessage(
 				roomId,
-				`**Verwendung:** \`!all [Deine Frage]\`\n\nBeispiel: \`!all Was ist 2+2?\`\n\nDie Frage wird an alle Modelle gesendet und du siehst die Antworten zum Vergleich.`
+				`**Verwendung:** \`!all [Deine Frage]\`\n\nBeispiel: \`!all Was ist 2+2?\`\n\nDie Frage wird an alle Chat-Modelle gesendet und du siehst die Antworten zum Vergleich.`
 			);
 			return;
 		}

-		const models = await this.ollamaService.listModels();
+		const allModels = await this.ollamaService.listModels();
+		// Filter out non-chat models (OCR, specialized models)
+		const models = allModels.filter((m) => !NON_CHAT_MODELS.includes(m.name));
+
 		if (models.length === 0) {
-			await this.sendMessage(roomId, '❌ Keine Modelle gefunden. Ist Ollama gestartet?');
+			await this.sendMessage(roomId, '❌ Keine Chat-Modelle gefunden. Ist Ollama gestartet?');
 			return;
 		}

+		const skipped = allModels.length - models.length;
+		const skippedNote = skipped > 0 ? ` (${skipped} spezialisierte Modelle übersprungen)` : '';
+
 		await this.sendMessage(
 			roomId,
-			`🔄 **Vergleiche ${models.length} Modelle...**\n\nFrage: "${message}"`
+			`🔄 **Vergleiche ${models.length} Chat-Modelle...**${skippedNote}\n\nFrage: "${message}"`
 		);

 		// Send typing indicator
@ -405,6 +455,154 @@ Schreibe einfach eine Nachricht und ich antworte!
 		}
 	}

+	private async handleVision(roomId: string, sender: string, prompt: string) {
+		const session = this.getSession(sender);
+
+		if (!session.pendingImage) {
+			await this.sendMessage(
+				roomId,
+				`❌ Kein Bild vorhanden!\n\nSende zuerst ein Bild, dann nutze \`!vision [Frage zum Bild]\``
+			);
+			return;
+		}
+
+		if (!prompt.trim()) {
+			await this.sendMessage(
+				roomId,
+				`**Verwendung:** \`!vision [Deine Frage zum Bild]\`\n\nBeispiel: \`!vision Was siehst du auf diesem Bild?\``
+			);
+			return;
+		}
+
+		// Find available vision models
+		const allModels = await this.ollamaService.listModels();
+		const visionModels = allModels.filter((m) => VISION_MODELS.some((v) => m.name.includes(v)));
+
+		if (visionModels.length === 0) {
+			await this.sendMessage(
+				roomId,
+				`❌ Keine Vision-Modelle gefunden!\n\nInstalliere ein Vision-Modell mit:\n\`ollama pull llava\``
+			);
+			return;
+		}
+
+		const model = visionModels[0].name;
+		await this.sendMessage(roomId, `🔍 Analysiere Bild mit \`${model}\`...`);
+		await this.client.setTyping(roomId, true, 120000);
+
+		try {
+			// Download image from Matrix
+			const imageData = await this.downloadMatrixImage(session.pendingImage.url);
+
+			const response = await this.ollamaService.chatWithImage(prompt, imageData, model);
+
+			await this.client.setTyping(roomId, false);
+			await this.sendMessage(roomId, `**${model}:**\n\n${response}`);
+		} catch (error) {
+			await this.client.setTyping(roomId, false);
+			const errorMsg = error instanceof Error ? error.message : 'Unbekannter Fehler';
+			await this.sendMessage(roomId, `❌ Fehler bei der Bildanalyse: ${errorMsg}`);
+		}
+	}
+
+	private async handleVisionAll(roomId: string, sender: string, prompt: string) {
+		const session = this.getSession(sender);
+
+		if (!session.pendingImage) {
+			await this.sendMessage(
+				roomId,
+				`❌ Kein Bild vorhanden!\n\nSende zuerst ein Bild, dann nutze \`!vision:all [Frage zum Bild]\``
+			);
+			return;
+		}
+
+		if (!prompt.trim()) {
+			await this.sendMessage(
+				roomId,
+				`**Verwendung:** \`!vision:all [Deine Frage zum Bild]\`\n\nBeispiel: \`!vision:all Beschreibe was du siehst\``
+			);
+			return;
+		}
+
+		// Find available vision models
+		const allModels = await this.ollamaService.listModels();
+		const visionModels = allModels.filter((m) => VISION_MODELS.some((v) => m.name.includes(v)));
+
+		if (visionModels.length === 0) {
+			await this.sendMessage(
+				roomId,
+				`❌ Keine Vision-Modelle gefunden!\n\nInstalliere Vision-Modelle mit:\n\`ollama pull llava\`\n\`ollama pull moondream\``
+			);
+			return;
+		}
+
+		await this.sendMessage(
+			roomId,
+			`🔄 **Vergleiche ${visionModels.length} Vision-Modelle...**\n\nFrage: "${prompt}"`
+		);
+		await this.client.setTyping(roomId, true, 300000);
+
+		try {
+			// Download image from Matrix once
+			const imageData = await this.downloadMatrixImage(session.pendingImage.url);
+
+			const results: { model: string; response: string; duration: number; error?: string }[] = [];
+
+			for (const model of visionModels) {
+				const startTime = Date.now();
+				try {
+					this.logger.log(`Querying vision model ${model.name}...`);
+					const response = await this.ollamaService.chatWithImage(prompt, imageData, model.name);
+					const duration = Date.now() - startTime;
+					results.push({ model: model.name, response, duration });
+				} catch (error) {
+					const duration = Date.now() - startTime;
+					const errorMessage = error instanceof Error ? error.message : 'Unbekannter Fehler';
+					results.push({ model: model.name, response: '', duration, error: errorMessage });
+				}
+			}
+
+			await this.client.setTyping(roomId, false);
+
+			// Format results
+			let resultText = `**📊 Vision-Modellvergleich**\n\n**Frage:** "${prompt}"\n\n---\n\n`;
+
+			for (const result of results) {
+				const durationSec = (result.duration / 1000).toFixed(1);
+				if (result.error) {
+					resultText += `**${result.model}** ⏱️ ${durationSec}s\n❌ Fehler: ${result.error}\n\n---\n\n`;
+				} else {
+					const truncatedResponse =
+						result.response.length > 500
+							? result.response.substring(0, 500) + '...'
+							: result.response;
+					resultText += `**${result.model}** ⏱️ ${durationSec}s\n${truncatedResponse}\n\n---\n\n`;
+				}
+			}
+
+			await this.sendMessage(roomId, resultText);
+		} catch (error) {
+			await this.client.setTyping(roomId, false);
+			const errorMsg = error instanceof Error ? error.message : 'Unbekannter Fehler';
+			await this.sendMessage(roomId, `❌ Fehler: ${errorMsg}`);
+		}
+	}
+
+	private async downloadMatrixImage(mxcUrl: string): Promise<string> {
+		// Convert mxc:// URL to HTTP URL and download
+		const httpUrl = this.client.mxcToHttp(mxcUrl);
+		this.logger.log(`Downloading image from ${httpUrl}`);
+
+		const response = await fetch(httpUrl);
+		if (!response.ok) {
+			throw new Error(`Failed to download image: ${response.status}`);
+		}
+
+		const buffer = await response.arrayBuffer();
+		const base64 = Buffer.from(buffer).toString('base64');
+		return base64;
+	}
+
 	private async sendMessage(roomId: string, message: string) {
 		// Convert markdown to basic HTML for Matrix
 		const htmlBody = this.markdownToHtml(message);
--- a/services/matrix-ollama-bot/src/ollama/ollama.service.ts
+++ b/services/matrix-ollama-bot/src/ollama/ollama.service.ts
@ -91,4 +91,48 @@ export class OllamaService implements OnModuleInit {
 	getDefaultModel(): string {
 		return this.defaultModel;
 	}
+
+	async chatWithImage(prompt: string, imageBase64: string, model?: string): Promise<string> {
+		const selectedModel = model || this.defaultModel;
+
+		try {
+			const response = await fetch(`${this.baseUrl}/api/chat`, {
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify({
+					model: selectedModel,
+					messages: [
+						{
+							role: 'user',
+							content: prompt,
+							images: [imageBase64],
+						},
+					],
+					stream: false,
+				}),
+				signal: AbortSignal.timeout(this.timeout),
+			});
+
+			if (!response.ok) {
+				throw new Error(`Ollama API error: ${response.status}`);
+			}
+
+			const data = await response.json();
+
+			// Log performance metrics
+			if (data.eval_count && data.eval_duration) {
+				const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9;
+				this.logger.debug(
+					`Vision: Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`
+				);
+			}
+
+			return data.message?.content || '';
+		} catch (error) {
+			if (error instanceof Error && error.name === 'TimeoutError') {
+				throw new Error('Ollama Timeout - Bildanalyse dauerte zu lange');
+			}
+			throw error;
+		}
+	}
 }
--- a/services/matrix-ollama-bot/tsconfig.build.json
+++ b/services/matrix-ollama-bot/tsconfig.build.json
@ -0,0 +1,8 @@
+{
+	"extends": "./tsconfig.json",
+	"compilerOptions": {
+		"rootDir": "./src"
+	},
+	"include": ["src/**/*"],
+	"exclude": ["node_modules", "dist", "**/*.spec.ts", "**/*.test.ts"]
+}