mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 20:01:09 +02:00
feat(matrix-ollama-bot): add vision commands and filter non-chat models
- Add !vision command to analyze images with vision models - Add !vision:all command to compare all vision models - Filter out specialized models (deepseek-r1) from !all comparison - Add chatWithImage method to OllamaService for vision requests - Switch Dockerfile from pnpm to npm for better compatibility - Add .dockerignore and tsconfig.build.json Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
437d612e81
commit
e4145324b2
6 changed files with 278 additions and 26 deletions
6
services/matrix-ollama-bot/.dockerignore
Normal file
6
services/matrix-ollama-bot/.dockerignore
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
node_modules
|
||||
dist
|
||||
.git
|
||||
*.log
|
||||
.env*
|
||||
data
|
||||
|
|
@ -3,37 +3,31 @@ FROM node:20-alpine AS builder
|
|||
|
||||
WORKDIR /app
|
||||
|
||||
# Install pnpm
|
||||
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
|
||||
# Copy package files (exclude pnpm-lock.yaml to use npm)
|
||||
COPY package.json ./
|
||||
|
||||
# Copy package files
|
||||
COPY package.json pnpm-lock.yaml* ./
|
||||
|
||||
# Install dependencies (ignore optional native modules)
|
||||
RUN pnpm install --frozen-lockfile --ignore-scripts || pnpm install --ignore-scripts
|
||||
# Install dependencies using npm (more compatible with standard tooling)
|
||||
RUN npm install
|
||||
|
||||
# Copy source
|
||||
COPY . .
|
||||
|
||||
# Build
|
||||
RUN pnpm build
|
||||
# Build using TypeScript
|
||||
RUN rm -rf dist && npx tsc -p tsconfig.build.json
|
||||
|
||||
# Production stage
|
||||
FROM node:20-alpine AS runner
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install pnpm
|
||||
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
|
||||
|
||||
# Create data directory for bot storage
|
||||
RUN mkdir -p /app/data
|
||||
|
||||
# Copy package files
|
||||
COPY package.json pnpm-lock.yaml* ./
|
||||
COPY package.json ./
|
||||
|
||||
# Install production dependencies only (ignore optional native modules)
|
||||
RUN pnpm install --prod --frozen-lockfile --ignore-scripts || pnpm install --prod --ignore-scripts
|
||||
# Install production dependencies only
|
||||
RUN npm install --omit=dev
|
||||
|
||||
# Copy built files
|
||||
COPY --from=builder /app/dist ./dist
|
||||
|
|
|
|||
|
|
@ -12,9 +12,12 @@
|
|||
"@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0"
|
||||
}
|
||||
},
|
||||
"overrides": {
|
||||
"@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0"
|
||||
},
|
||||
"scripts": {
|
||||
"prebuild": "rimraf dist",
|
||||
"build": "nest build",
|
||||
"prebuild": "rm -rf dist || true",
|
||||
"build": "tsc -p tsconfig.build.json",
|
||||
"format": "prettier --write \"src/**/*.ts\"",
|
||||
"start": "nest start",
|
||||
"start:dev": "nest start --watch",
|
||||
|
|
@ -36,7 +39,6 @@
|
|||
"@nestjs/cli": "^10.4.9",
|
||||
"@nestjs/schematics": "^10.2.3",
|
||||
"@types/node": "^22.10.5",
|
||||
"rimraf": "^6.0.1",
|
||||
"typescript": "^5.7.3"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,8 +15,15 @@ interface UserSession {
|
|||
systemPrompt: string;
|
||||
model: string;
|
||||
history: { role: 'user' | 'assistant'; content: string }[];
|
||||
pendingImage?: { url: string; mimeType: string };
|
||||
}
|
||||
|
||||
// Models excluded from !all comparison (specialized, not for general chat)
|
||||
const NON_CHAT_MODELS = ['deepseek-r1:1.5b'];
|
||||
|
||||
// Models that support vision/image input
|
||||
const VISION_MODELS = ['llava', 'llava:7b', 'llava:13b', 'bakllava', 'moondream'];
|
||||
|
||||
@Injectable()
|
||||
export class MatrixService implements OnModuleInit, OnModuleDestroy {
|
||||
private readonly logger = new Logger(MatrixService.name);
|
||||
|
|
@ -100,8 +107,29 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy {
|
|||
return;
|
||||
}
|
||||
|
||||
const content = event.content as {
|
||||
msgtype?: string;
|
||||
body?: string;
|
||||
url?: string;
|
||||
info?: { mimetype?: string };
|
||||
};
|
||||
|
||||
// Handle image messages - store for later use with !vision
|
||||
if (content.msgtype === 'm.image' && content.url) {
|
||||
const session = this.getSession(event.sender);
|
||||
session.pendingImage = {
|
||||
url: content.url,
|
||||
mimeType: content.info?.mimetype || 'image/png',
|
||||
};
|
||||
this.logger.log(`Image received from ${event.sender}, stored for !vision command`);
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`📷 Bild empfangen! Nutze jetzt:\n- \`!vision [Frage zum Bild]\` - Bild mit einem Modell analysieren\n- \`!vision:all [Frage]\` - Bild mit allen Vision-Modellen vergleichen`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Only handle text messages
|
||||
const content = event.content as { msgtype?: string; body?: string };
|
||||
if (content.msgtype !== 'm.text') return;
|
||||
|
||||
const body = content.body;
|
||||
|
|
@ -153,6 +181,14 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy {
|
|||
await this.handleAllModels(roomId, sender, argString);
|
||||
break;
|
||||
|
||||
case 'vision':
|
||||
await this.handleVision(roomId, sender, argString);
|
||||
break;
|
||||
|
||||
case 'vision:all':
|
||||
await this.handleVisionAll(roomId, sender, argString);
|
||||
break;
|
||||
|
||||
default:
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
|
|
@ -168,11 +204,17 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy {
|
|||
- \`!help\` - Diese Hilfe anzeigen
|
||||
- \`!models\` - Verfügbare Modelle anzeigen
|
||||
- \`!model [name]\` - Modell wechseln
|
||||
- \`!all [frage]\` - **Alle Modelle vergleichen**
|
||||
- \`!all [frage]\` - **Alle Chat-Modelle vergleichen**
|
||||
- \`!mode [modus]\` - System-Prompt ändern
|
||||
- \`!clear\` - Chat-Verlauf löschen
|
||||
- \`!status\` - Ollama Status prüfen
|
||||
|
||||
**Bild-Analyse (Vision):**
|
||||
1. Sende ein Bild in den Chat
|
||||
2. Nutze dann:
|
||||
- \`!vision [frage]\` - Bild analysieren
|
||||
- \`!vision:all [frage]\` - **Alle Vision-Modelle vergleichen**
|
||||
|
||||
**Modi:**
|
||||
- \`default\` - Allgemeiner Assistent
|
||||
- \`classify\` - Text-Klassifizierung
|
||||
|
|
@ -183,8 +225,10 @@ export class MatrixService implements OnModuleInit, OnModuleDestroy {
|
|||
**Verwendung:**
|
||||
Schreibe einfach eine Nachricht und ich antworte!
|
||||
|
||||
**Beispiel Modellvergleich:**
|
||||
\`!all Was ist der Sinn des Lebens?\`
|
||||
**Beispiele:**
|
||||
- \`!all Was ist der Sinn des Lebens?\`
|
||||
- [Bild senden] → \`!vision Was siehst du?\`
|
||||
- [Bild senden] → \`!vision:all Beschreibe das Bild\`
|
||||
|
||||
**Aktuelles Modell:** \`${this.ollamaService.getDefaultModel()}\``;
|
||||
|
||||
|
|
@ -303,20 +347,26 @@ Schreibe einfach eine Nachricht und ich antworte!
|
|||
if (!message.trim()) {
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`**Verwendung:** \`!all [Deine Frage]\`\n\nBeispiel: \`!all Was ist 2+2?\`\n\nDie Frage wird an alle Modelle gesendet und du siehst die Antworten zum Vergleich.`
|
||||
`**Verwendung:** \`!all [Deine Frage]\`\n\nBeispiel: \`!all Was ist 2+2?\`\n\nDie Frage wird an alle Chat-Modelle gesendet und du siehst die Antworten zum Vergleich.`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const models = await this.ollamaService.listModels();
|
||||
const allModels = await this.ollamaService.listModels();
|
||||
// Filter out non-chat models (OCR, specialized models)
|
||||
const models = allModels.filter((m) => !NON_CHAT_MODELS.includes(m.name));
|
||||
|
||||
if (models.length === 0) {
|
||||
await this.sendMessage(roomId, '❌ Keine Modelle gefunden. Ist Ollama gestartet?');
|
||||
await this.sendMessage(roomId, '❌ Keine Chat-Modelle gefunden. Ist Ollama gestartet?');
|
||||
return;
|
||||
}
|
||||
|
||||
const skipped = allModels.length - models.length;
|
||||
const skippedNote = skipped > 0 ? ` (${skipped} spezialisierte Modelle übersprungen)` : '';
|
||||
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`🔄 **Vergleiche ${models.length} Modelle...**\n\nFrage: "${message}"`
|
||||
`🔄 **Vergleiche ${models.length} Chat-Modelle...**${skippedNote}\n\nFrage: "${message}"`
|
||||
);
|
||||
|
||||
// Send typing indicator
|
||||
|
|
@ -405,6 +455,154 @@ Schreibe einfach eine Nachricht und ich antworte!
|
|||
}
|
||||
}
|
||||
|
||||
private async handleVision(roomId: string, sender: string, prompt: string) {
|
||||
const session = this.getSession(sender);
|
||||
|
||||
if (!session.pendingImage) {
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`❌ Kein Bild vorhanden!\n\nSende zuerst ein Bild, dann nutze \`!vision [Frage zum Bild]\``
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!prompt.trim()) {
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`**Verwendung:** \`!vision [Deine Frage zum Bild]\`\n\nBeispiel: \`!vision Was siehst du auf diesem Bild?\``
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Find available vision models
|
||||
const allModels = await this.ollamaService.listModels();
|
||||
const visionModels = allModels.filter((m) => VISION_MODELS.some((v) => m.name.includes(v)));
|
||||
|
||||
if (visionModels.length === 0) {
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`❌ Keine Vision-Modelle gefunden!\n\nInstalliere ein Vision-Modell mit:\n\`ollama pull llava\``
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const model = visionModels[0].name;
|
||||
await this.sendMessage(roomId, `🔍 Analysiere Bild mit \`${model}\`...`);
|
||||
await this.client.setTyping(roomId, true, 120000);
|
||||
|
||||
try {
|
||||
// Download image from Matrix
|
||||
const imageData = await this.downloadMatrixImage(session.pendingImage.url);
|
||||
|
||||
const response = await this.ollamaService.chatWithImage(prompt, imageData, model);
|
||||
|
||||
await this.client.setTyping(roomId, false);
|
||||
await this.sendMessage(roomId, `**${model}:**\n\n${response}`);
|
||||
} catch (error) {
|
||||
await this.client.setTyping(roomId, false);
|
||||
const errorMsg = error instanceof Error ? error.message : 'Unbekannter Fehler';
|
||||
await this.sendMessage(roomId, `❌ Fehler bei der Bildanalyse: ${errorMsg}`);
|
||||
}
|
||||
}
|
||||
|
||||
private async handleVisionAll(roomId: string, sender: string, prompt: string) {
|
||||
const session = this.getSession(sender);
|
||||
|
||||
if (!session.pendingImage) {
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`❌ Kein Bild vorhanden!\n\nSende zuerst ein Bild, dann nutze \`!vision:all [Frage zum Bild]\``
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!prompt.trim()) {
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`**Verwendung:** \`!vision:all [Deine Frage zum Bild]\`\n\nBeispiel: \`!vision:all Beschreibe was du siehst\``
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Find available vision models
|
||||
const allModels = await this.ollamaService.listModels();
|
||||
const visionModels = allModels.filter((m) => VISION_MODELS.some((v) => m.name.includes(v)));
|
||||
|
||||
if (visionModels.length === 0) {
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`❌ Keine Vision-Modelle gefunden!\n\nInstalliere Vision-Modelle mit:\n\`ollama pull llava\`\n\`ollama pull moondream\``
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
await this.sendMessage(
|
||||
roomId,
|
||||
`🔄 **Vergleiche ${visionModels.length} Vision-Modelle...**\n\nFrage: "${prompt}"`
|
||||
);
|
||||
await this.client.setTyping(roomId, true, 300000);
|
||||
|
||||
try {
|
||||
// Download image from Matrix once
|
||||
const imageData = await this.downloadMatrixImage(session.pendingImage.url);
|
||||
|
||||
const results: { model: string; response: string; duration: number; error?: string }[] = [];
|
||||
|
||||
for (const model of visionModels) {
|
||||
const startTime = Date.now();
|
||||
try {
|
||||
this.logger.log(`Querying vision model ${model.name}...`);
|
||||
const response = await this.ollamaService.chatWithImage(prompt, imageData, model.name);
|
||||
const duration = Date.now() - startTime;
|
||||
results.push({ model: model.name, response, duration });
|
||||
} catch (error) {
|
||||
const duration = Date.now() - startTime;
|
||||
const errorMessage = error instanceof Error ? error.message : 'Unbekannter Fehler';
|
||||
results.push({ model: model.name, response: '', duration, error: errorMessage });
|
||||
}
|
||||
}
|
||||
|
||||
await this.client.setTyping(roomId, false);
|
||||
|
||||
// Format results
|
||||
let resultText = `**📊 Vision-Modellvergleich**\n\n**Frage:** "${prompt}"\n\n---\n\n`;
|
||||
|
||||
for (const result of results) {
|
||||
const durationSec = (result.duration / 1000).toFixed(1);
|
||||
if (result.error) {
|
||||
resultText += `**${result.model}** ⏱️ ${durationSec}s\n❌ Fehler: ${result.error}\n\n---\n\n`;
|
||||
} else {
|
||||
const truncatedResponse =
|
||||
result.response.length > 500
|
||||
? result.response.substring(0, 500) + '...'
|
||||
: result.response;
|
||||
resultText += `**${result.model}** ⏱️ ${durationSec}s\n${truncatedResponse}\n\n---\n\n`;
|
||||
}
|
||||
}
|
||||
|
||||
await this.sendMessage(roomId, resultText);
|
||||
} catch (error) {
|
||||
await this.client.setTyping(roomId, false);
|
||||
const errorMsg = error instanceof Error ? error.message : 'Unbekannter Fehler';
|
||||
await this.sendMessage(roomId, `❌ Fehler: ${errorMsg}`);
|
||||
}
|
||||
}
|
||||
|
||||
private async downloadMatrixImage(mxcUrl: string): Promise<string> {
|
||||
// Convert mxc:// URL to HTTP URL and download
|
||||
const httpUrl = this.client.mxcToHttp(mxcUrl);
|
||||
this.logger.log(`Downloading image from ${httpUrl}`);
|
||||
|
||||
const response = await fetch(httpUrl);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to download image: ${response.status}`);
|
||||
}
|
||||
|
||||
const buffer = await response.arrayBuffer();
|
||||
const base64 = Buffer.from(buffer).toString('base64');
|
||||
return base64;
|
||||
}
|
||||
|
||||
private async sendMessage(roomId: string, message: string) {
|
||||
// Convert markdown to basic HTML for Matrix
|
||||
const htmlBody = this.markdownToHtml(message);
|
||||
|
|
|
|||
|
|
@ -91,4 +91,48 @@ export class OllamaService implements OnModuleInit {
|
|||
getDefaultModel(): string {
|
||||
return this.defaultModel;
|
||||
}
|
||||
|
||||
async chatWithImage(prompt: string, imageBase64: string, model?: string): Promise<string> {
|
||||
const selectedModel = model || this.defaultModel;
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: selectedModel,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: prompt,
|
||||
images: [imageBase64],
|
||||
},
|
||||
],
|
||||
stream: false,
|
||||
}),
|
||||
signal: AbortSignal.timeout(this.timeout),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Ollama API error: ${response.status}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Log performance metrics
|
||||
if (data.eval_count && data.eval_duration) {
|
||||
const tokensPerSec = (data.eval_count / data.eval_duration) * 1e9;
|
||||
this.logger.debug(
|
||||
`Vision: Generated ${data.eval_count} tokens at ${tokensPerSec.toFixed(1)} t/s`
|
||||
);
|
||||
}
|
||||
|
||||
return data.message?.content || '';
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'TimeoutError') {
|
||||
throw new Error('Ollama Timeout - Bildanalyse dauerte zu lange');
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
8
services/matrix-ollama-bot/tsconfig.build.json
Normal file
8
services/matrix-ollama-bot/tsconfig.build.json
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"extends": "./tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"rootDir": "./src"
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "**/*.spec.ts", "**/*.test.ts"]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue