mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 19:01:08 +02:00
feat(matrix-stt-bot): add speech-to-text Matrix bot
- New bot that transcribes voice messages to text - Uses mana-stt service (Whisper/Voxtral) for transcription - Supports German and English with auto-detection - Commands: !language, !model, !status, !help - Runs on port 3024 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
d7236d61fa
commit
e357f9f292
16 changed files with 1451 additions and 584 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -123,3 +123,4 @@ pip-delete-this-directory.txt
|
|||
|
||||
# ML Models (large files, downloaded on demand)
|
||||
mlx_models/
|
||||
services/matrix-stt-bot/data/
|
||||
|
|
|
|||
1128
pnpm-lock.yaml
generated
1128
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load diff
15
services/matrix-stt-bot/.env.example
Normal file
15
services/matrix-stt-bot/.env.example
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
# Server
|
||||
PORT=3024
|
||||
|
||||
# Matrix Configuration
|
||||
MATRIX_HOMESERVER_URL=http://localhost:8008
|
||||
MATRIX_ACCESS_TOKEN=syt_xxx
|
||||
MATRIX_ALLOWED_ROOMS=
|
||||
MATRIX_STORAGE_PATH=./data/bot-storage.json
|
||||
|
||||
# STT Service (mana-stt)
|
||||
STT_URL=http://localhost:3020
|
||||
|
||||
# STT Defaults
|
||||
DEFAULT_LANGUAGE=de
|
||||
DEFAULT_MODEL=whisper
|
||||
189
services/matrix-stt-bot/CLAUDE.md
Normal file
189
services/matrix-stt-bot/CLAUDE.md
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
# Matrix STT Bot - Claude Code Guidelines
|
||||
|
||||
## Overview
|
||||
|
||||
Matrix STT Bot converts audio/voice messages to text and sends them back as text messages. Uses the mana-stt service (port 3020) for transcription.
|
||||
|
||||
## Tech Stack
|
||||
|
||||
- **Framework**: NestJS 10
|
||||
- **Matrix**: matrix-bot-sdk
|
||||
- **STT Backend**: mana-stt service (Whisper, Voxtral)
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Development
|
||||
pnpm install
|
||||
pnpm start:dev # Start with hot reload
|
||||
|
||||
# Build
|
||||
pnpm build # Production build
|
||||
|
||||
# Type check
|
||||
pnpm type-check # Check TypeScript types
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
services/matrix-stt-bot/
|
||||
├── src/
|
||||
│ ├── main.ts # Application entry point (port 3024)
|
||||
│ ├── app.module.ts # Root module
|
||||
│ ├── config/
|
||||
│ │ └── configuration.ts # Configuration & help text
|
||||
│ ├── bot/
|
||||
│ │ ├── bot.module.ts
|
||||
│ │ └── matrix.service.ts # Matrix client & message handler
|
||||
│ └── stt/
|
||||
│ ├── stt.module.ts
|
||||
│ └── stt.service.ts # mana-stt API client
|
||||
├── Dockerfile
|
||||
└── package.json
|
||||
```
|
||||
|
||||
## Bot Commands
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `!help` / `!hilfe` | Show help text |
|
||||
| `!language [de\|en\|auto]` | Change transcription language |
|
||||
| `!model [whisper\|voxtral\|auto]` | Change STT model |
|
||||
| `!status` | Show current settings |
|
||||
| (voice message) | Transcribe to text |
|
||||
|
||||
## Message Flow
|
||||
|
||||
1. User sends voice/audio message
|
||||
2. Bot receives via matrix-bot-sdk
|
||||
3. Audio downloaded from Matrix
|
||||
4. STT service transcribes audio
|
||||
5. Text message sent back to room
|
||||
|
||||
## Environment Variables
|
||||
|
||||
```env
|
||||
# Server
|
||||
PORT=3024
|
||||
|
||||
# Matrix
|
||||
MATRIX_HOMESERVER_URL=http://localhost:8008
|
||||
MATRIX_ACCESS_TOKEN=syt_xxx
|
||||
MATRIX_ALLOWED_ROOMS=!roomid:matrix.mana.how
|
||||
MATRIX_STORAGE_PATH=./data/bot-storage.json
|
||||
|
||||
# STT Service
|
||||
STT_URL=http://localhost:3020
|
||||
|
||||
# Defaults
|
||||
DEFAULT_LANGUAGE=de
|
||||
DEFAULT_MODEL=whisper
|
||||
```
|
||||
|
||||
## STT API Integration
|
||||
|
||||
The bot sends audio to mana-stt for transcription:
|
||||
|
||||
```typescript
|
||||
// Default Whisper endpoint
|
||||
POST /transcribe
|
||||
FormData: file=audio.ogg, language=de
|
||||
|
||||
// Voxtral endpoint (with speaker diarization)
|
||||
POST /transcribe/voxtral
|
||||
FormData: file=audio.ogg, language=de
|
||||
|
||||
// Auto-select endpoint
|
||||
POST /transcribe/auto
|
||||
FormData: file=audio.ogg, prefer=whisper
|
||||
|
||||
// Response
|
||||
{
|
||||
"text": "Das ist der transkribierte Text...",
|
||||
"language": "de",
|
||||
"model": "whisper-large-v3-turbo",
|
||||
"duration": 3.5
|
||||
}
|
||||
```
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Description |
|
||||
|-------|-------------|
|
||||
| `whisper` | Whisper Large V3 (local, fast, 99+ languages) |
|
||||
| `voxtral` | Voxtral Mini (cloud, speaker diarization) |
|
||||
| `auto` | Automatic model selection |
|
||||
|
||||
## Supported Languages
|
||||
|
||||
| Code | Language |
|
||||
|------|----------|
|
||||
| `de` | German (default) |
|
||||
| `en` | English |
|
||||
| `auto` | Automatic detection |
|
||||
|
||||
## Supported Audio Formats
|
||||
|
||||
- OGG, MP3, WAV, M4A, FLAC, WebM, Opus
|
||||
- Matrix voice messages (typically OGG/Opus)
|
||||
|
||||
## Docker
|
||||
|
||||
```bash
|
||||
# Build
|
||||
docker build -f services/matrix-stt-bot/Dockerfile -t matrix-stt-bot .
|
||||
|
||||
# Run
|
||||
docker run -p 3024:3024 \
|
||||
-e MATRIX_HOMESERVER_URL=http://synapse:8008 \
|
||||
-e MATRIX_ACCESS_TOKEN=syt_xxx \
|
||||
-e STT_URL=http://mana-stt:3020 \
|
||||
-v matrix-stt-bot-data:/app/data \
|
||||
matrix-stt-bot
|
||||
```
|
||||
|
||||
## Health Check
|
||||
|
||||
```bash
|
||||
curl http://localhost:3024/health
|
||||
```
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **mana-stt**: Must be running on port 3020 (or configured via `STT_URL`)
|
||||
- **Matrix homeserver**: Synapse or compatible homeserver
|
||||
|
||||
## User Settings
|
||||
|
||||
Settings are stored in-memory per Matrix user ID:
|
||||
- Language selection persists during bot runtime
|
||||
- Model selection persists during bot runtime
|
||||
- Settings reset when bot restarts
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
# 1. Ensure mana-stt is running
|
||||
curl http://localhost:3020/health
|
||||
|
||||
# 2. Start the bot
|
||||
cd services/matrix-stt-bot
|
||||
pnpm start:dev
|
||||
|
||||
# 3. Check bot health
|
||||
curl http://localhost:3024/health
|
||||
|
||||
# 4. In Matrix:
|
||||
# - Invite bot to a room
|
||||
# - Send a voice message
|
||||
# - Receive text transcription
|
||||
```
|
||||
|
||||
## Related Services
|
||||
|
||||
| Service | Port | Description |
|
||||
|---------|------|-------------|
|
||||
| mana-stt | 3020 | STT backend service |
|
||||
| matrix-tts-bot | 3023 | Text-to-speech bot (reverse of this) |
|
||||
| mana-tts | 3022 | TTS backend service |
|
||||
60
services/matrix-stt-bot/Dockerfile
Normal file
60
services/matrix-stt-bot/Dockerfile
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
# Build stage
|
||||
FROM node:20-alpine AS builder
|
||||
|
||||
# Install pnpm
|
||||
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy workspace files
|
||||
COPY pnpm-workspace.yaml package.json pnpm-lock.yaml ./
|
||||
COPY packages/matrix-bot-common/package.json packages/matrix-bot-common/
|
||||
COPY packages/bot-services/package.json packages/bot-services/
|
||||
COPY services/matrix-stt-bot/package.json services/matrix-stt-bot/
|
||||
|
||||
# Install dependencies
|
||||
RUN pnpm install --frozen-lockfile
|
||||
|
||||
# Copy source files
|
||||
COPY packages/matrix-bot-common packages/matrix-bot-common
|
||||
COPY packages/bot-services packages/bot-services
|
||||
COPY services/matrix-stt-bot services/matrix-stt-bot
|
||||
|
||||
# Build shared packages
|
||||
RUN pnpm --filter @manacore/matrix-bot-common build
|
||||
RUN pnpm --filter @manacore/bot-services build
|
||||
|
||||
# Build the bot
|
||||
RUN pnpm --filter @manacore/matrix-stt-bot build
|
||||
|
||||
# Production stage
|
||||
FROM node:20-alpine
|
||||
|
||||
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy built files
|
||||
COPY --from=builder /app/node_modules ./node_modules
|
||||
COPY --from=builder /app/packages/matrix-bot-common/dist ./packages/matrix-bot-common/dist
|
||||
COPY --from=builder /app/packages/matrix-bot-common/package.json ./packages/matrix-bot-common/
|
||||
COPY --from=builder /app/packages/bot-services/dist ./packages/bot-services/dist
|
||||
COPY --from=builder /app/packages/bot-services/package.json ./packages/bot-services/
|
||||
COPY --from=builder /app/services/matrix-stt-bot/dist ./dist
|
||||
COPY --from=builder /app/services/matrix-stt-bot/package.json ./
|
||||
COPY --from=builder /app/services/matrix-stt-bot/node_modules ./node_modules
|
||||
|
||||
# Create data directory
|
||||
RUN mkdir -p /app/data
|
||||
|
||||
# Set environment
|
||||
ENV NODE_ENV=production
|
||||
ENV PORT=3024
|
||||
|
||||
EXPOSE 3024
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD wget --no-verbose --tries=1 --spider http://localhost:3024/health || exit 1
|
||||
|
||||
CMD ["node", "dist/main.js"]
|
||||
8
services/matrix-stt-bot/nest-cli.json
Normal file
8
services/matrix-stt-bot/nest-cli.json
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"$schema": "https://json.schemastore.org/nest-cli",
|
||||
"collection": "@nestjs/schematics",
|
||||
"sourceRoot": "src",
|
||||
"compilerOptions": {
|
||||
"deleteOutDir": true
|
||||
}
|
||||
}
|
||||
42
services/matrix-stt-bot/package.json
Normal file
42
services/matrix-stt-bot/package.json
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"name": "@manacore/matrix-stt-bot",
|
||||
"version": "1.0.0",
|
||||
"description": "Matrix bot for speech-to-text transcription",
|
||||
"private": true,
|
||||
"pnpm": {
|
||||
"neverBuiltDependencies": [
|
||||
"@matrix-org/matrix-sdk-crypto-nodejs"
|
||||
],
|
||||
"overrides": {
|
||||
"@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0"
|
||||
}
|
||||
},
|
||||
"overrides": {
|
||||
"@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0"
|
||||
},
|
||||
"scripts": {
|
||||
"prebuild": "rm -rf dist || true",
|
||||
"build": "tsc -p tsconfig.build.json",
|
||||
"start": "nest start",
|
||||
"start:dev": "nest start --watch",
|
||||
"start:debug": "nest start --debug --watch",
|
||||
"start:prod": "node dist/main",
|
||||
"type-check": "tsc --noEmit"
|
||||
},
|
||||
"dependencies": {
|
||||
"@manacore/bot-services": "workspace:*",
|
||||
"@manacore/matrix-bot-common": "workspace:*",
|
||||
"@nestjs/common": "^10.4.17",
|
||||
"@nestjs/config": "^3.3.0",
|
||||
"@nestjs/core": "^10.4.17",
|
||||
"@nestjs/platform-express": "^10.4.17",
|
||||
"matrix-bot-sdk": "^0.7.1",
|
||||
"reflect-metadata": "^0.2.2",
|
||||
"rxjs": "^7.8.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@nestjs/cli": "^10.4.9",
|
||||
"@types/node": "^22.10.7",
|
||||
"typescript": "^5.7.3"
|
||||
}
|
||||
}
|
||||
20
services/matrix-stt-bot/src/app.module.ts
Normal file
20
services/matrix-stt-bot/src/app.module.ts
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import { HealthController, createHealthProvider } from '@manacore/matrix-bot-common';
|
||||
import { BotModule } from './bot/bot.module';
|
||||
import { SttModule } from './stt/stt.module';
|
||||
import configuration from './config/configuration';
|
||||
|
||||
@Module({
|
||||
imports: [
|
||||
ConfigModule.forRoot({
|
||||
isGlobal: true,
|
||||
load: [configuration],
|
||||
}),
|
||||
SttModule,
|
||||
BotModule,
|
||||
],
|
||||
controllers: [HealthController],
|
||||
providers: [createHealthProvider('matrix-stt-bot')],
|
||||
})
|
||||
export class AppModule {}
|
||||
9
services/matrix-stt-bot/src/bot/bot.module.ts
Normal file
9
services/matrix-stt-bot/src/bot/bot.module.ts
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { MatrixService } from './matrix.service';
|
||||
import { SttModule } from '../stt/stt.module';
|
||||
|
||||
@Module({
|
||||
imports: [SttModule],
|
||||
providers: [MatrixService],
|
||||
})
|
||||
export class BotModule {}
|
||||
338
services/matrix-stt-bot/src/bot/matrix.service.ts
Normal file
338
services/matrix-stt-bot/src/bot/matrix.service.ts
Normal file
|
|
@ -0,0 +1,338 @@
|
|||
import { Injectable } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import {
|
||||
BaseMatrixService,
|
||||
MatrixBotConfig,
|
||||
MatrixRoomEvent,
|
||||
KeywordCommandDetector,
|
||||
COMMON_KEYWORDS,
|
||||
} from '@manacore/matrix-bot-common';
|
||||
import { SttService, SttLanguage, SttModel } from '../stt/stt.service';
|
||||
import { HELP_TEXT, WELCOME_TEXT } from '../config/configuration';
|
||||
|
||||
interface UserSettings {
|
||||
language: SttLanguage;
|
||||
model: SttModel;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class MatrixService extends BaseMatrixService {
|
||||
private readonly defaultLanguage: SttLanguage;
|
||||
private readonly defaultModel: SttModel;
|
||||
|
||||
// User settings storage (in-memory)
|
||||
private userSettings: Map<string, UserSettings> = new Map();
|
||||
|
||||
// Track processed events to prevent duplicates
|
||||
private processedEvents: Set<string> = new Set();
|
||||
|
||||
private readonly keywordDetector = new KeywordCommandDetector([
|
||||
...COMMON_KEYWORDS,
|
||||
{ keywords: ['language', 'sprache', 'sprache aendern'], command: 'language' },
|
||||
{ keywords: ['model', 'modell'], command: 'model' },
|
||||
]);
|
||||
|
||||
constructor(
|
||||
configService: ConfigService,
|
||||
private sttService: SttService
|
||||
) {
|
||||
super(configService);
|
||||
this.defaultLanguage =
|
||||
(this.configService.get<string>('stt.defaultLanguage') as SttLanguage) || 'de';
|
||||
this.defaultModel =
|
||||
(this.configService.get<string>('stt.defaultModel') as SttModel) || 'whisper';
|
||||
}
|
||||
|
||||
protected getConfig(): MatrixBotConfig {
|
||||
return {
|
||||
homeserverUrl:
|
||||
this.configService.get<string>('matrix.homeserverUrl') || 'http://localhost:8008',
|
||||
accessToken: this.configService.get<string>('matrix.accessToken') || '',
|
||||
storagePath:
|
||||
this.configService.get<string>('matrix.storagePath') || './data/bot-storage.json',
|
||||
allowedRooms: this.configService.get<string[]>('matrix.allowedRooms') || [],
|
||||
};
|
||||
}
|
||||
|
||||
protected getIntroductionMessage(): string {
|
||||
return WELCOME_TEXT;
|
||||
}
|
||||
|
||||
protected async onRoomMessage(roomId: string, event: MatrixRoomEvent): Promise<void> {
|
||||
// Ignore own messages
|
||||
if (event.sender === this.botUserId) return;
|
||||
|
||||
// Prevent duplicate processing
|
||||
const eventId = event.event_id;
|
||||
if (eventId && this.processedEvents.has(eventId)) {
|
||||
return;
|
||||
}
|
||||
if (eventId) {
|
||||
this.processedEvents.add(eventId);
|
||||
// Clean up old events (keep last 1000)
|
||||
if (this.processedEvents.size > 1000) {
|
||||
const iterator = this.processedEvents.values();
|
||||
const firstValue = iterator.next().value;
|
||||
if (firstValue) {
|
||||
this.processedEvents.delete(firstValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check room allowlist
|
||||
if (!this.isRoomAllowed(roomId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const msgtype = event.content?.msgtype;
|
||||
const userId = event.sender;
|
||||
|
||||
// Handle audio messages (main functionality)
|
||||
if (msgtype === 'm.audio' || msgtype === 'm.file') {
|
||||
const mimetype = String(event.content?.info?.mimetype || '');
|
||||
if (mimetype.startsWith('audio/') || this.isAudioFile(event.content?.body)) {
|
||||
await this.handleAudioMessage(roomId, event, userId);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle text commands
|
||||
if (msgtype === 'm.text') {
|
||||
const body = event.content?.body?.trim();
|
||||
if (body) {
|
||||
await this.handleTextMessage(roomId, event, body);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private isAudioFile(filename?: string): boolean {
|
||||
if (!filename) return false;
|
||||
const audioExtensions = ['.ogg', '.mp3', '.wav', '.m4a', '.flac', '.webm', '.opus'];
|
||||
return audioExtensions.some((ext) => filename.toLowerCase().endsWith(ext));
|
||||
}
|
||||
|
||||
protected override async handleAudioMessage(
|
||||
roomId: string,
|
||||
event: MatrixRoomEvent,
|
||||
userId: string
|
||||
): Promise<void> {
|
||||
try {
|
||||
const mxcUrl = event.content.url;
|
||||
if (!mxcUrl) {
|
||||
this.logger.warn('Audio message without URL');
|
||||
return;
|
||||
}
|
||||
|
||||
// Show typing indicator
|
||||
await this.client.setTyping(roomId, true, 30000);
|
||||
|
||||
// Download audio
|
||||
const audioBuffer = await this.downloadMedia(mxcUrl);
|
||||
|
||||
// Get user settings
|
||||
const settings = this.getUserSettings(userId);
|
||||
|
||||
// Transcribe
|
||||
const result = await this.sttService.transcribe(
|
||||
audioBuffer,
|
||||
settings.language,
|
||||
settings.model
|
||||
);
|
||||
|
||||
// Stop typing indicator
|
||||
await this.client.setTyping(roomId, false);
|
||||
|
||||
if (!result.text || result.text.trim() === '') {
|
||||
await this.sendReply(roomId, event, 'Keine Sprache erkannt.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Format response
|
||||
let response = `**Transkription:**\n\n${result.text}`;
|
||||
|
||||
// Add metadata if available
|
||||
const metadata: string[] = [];
|
||||
if (result.language) {
|
||||
metadata.push(`Sprache: ${result.language}`);
|
||||
}
|
||||
if (result.model) {
|
||||
metadata.push(`Modell: ${result.model}`);
|
||||
}
|
||||
if (result.duration) {
|
||||
metadata.push(`Dauer: ${result.duration.toFixed(1)}s`);
|
||||
}
|
||||
|
||||
if (metadata.length > 0) {
|
||||
response += `\n\n*${metadata.join(' | ')}*`;
|
||||
}
|
||||
|
||||
await this.sendReply(roomId, event, response);
|
||||
|
||||
this.logger.debug(`Transcribed audio for ${userId}: "${result.text.substring(0, 50)}..."`);
|
||||
} catch (error) {
|
||||
await this.client.setTyping(roomId, false);
|
||||
this.logger.error(`Audio transcription error: ${error}`);
|
||||
await this.sendReply(
|
||||
roomId,
|
||||
event,
|
||||
'Fehler bei der Transkription. Ist der STT-Service erreichbar?'
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
protected async handleTextMessage(
|
||||
roomId: string,
|
||||
event: MatrixRoomEvent,
|
||||
body: string
|
||||
): Promise<void> {
|
||||
const userId = event.sender;
|
||||
|
||||
try {
|
||||
// Check for keyword commands first
|
||||
const keywordCommand = this.keywordDetector.detect(body);
|
||||
if (keywordCommand) {
|
||||
body = `!${keywordCommand}`;
|
||||
}
|
||||
|
||||
// Handle ! commands
|
||||
if (body.startsWith('!')) {
|
||||
const [command, ...args] = body.slice(1).split(' ');
|
||||
await this.executeCommand(roomId, event, userId, command.toLowerCase(), args.join(' '));
|
||||
return;
|
||||
}
|
||||
|
||||
// For regular text messages, just acknowledge
|
||||
// (This bot is primarily for audio transcription)
|
||||
} catch (error) {
|
||||
this.logger.error(`Error handling message: ${error}`);
|
||||
await this.sendReply(roomId, event, 'Ein Fehler ist aufgetreten.');
|
||||
}
|
||||
}
|
||||
|
||||
private async executeCommand(
|
||||
roomId: string,
|
||||
event: MatrixRoomEvent,
|
||||
userId: string,
|
||||
command: string,
|
||||
args: string
|
||||
) {
|
||||
switch (command) {
|
||||
case 'help':
|
||||
case 'hilfe':
|
||||
await this.sendReply(roomId, event, HELP_TEXT);
|
||||
break;
|
||||
|
||||
case 'language':
|
||||
case 'sprache':
|
||||
await this.handleLanguageCommand(roomId, event, userId, args);
|
||||
break;
|
||||
|
||||
case 'model':
|
||||
case 'modell':
|
||||
await this.handleModelCommand(roomId, event, userId, args);
|
||||
break;
|
||||
|
||||
case 'status':
|
||||
await this.handleStatusCommand(roomId, event, userId);
|
||||
break;
|
||||
|
||||
default:
|
||||
// Silently ignore unknown commands
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private async handleLanguageCommand(
|
||||
roomId: string,
|
||||
event: MatrixRoomEvent,
|
||||
userId: string,
|
||||
args: string
|
||||
) {
|
||||
if (!args.trim()) {
|
||||
await this.sendReply(
|
||||
roomId,
|
||||
event,
|
||||
'**Verwendung:** `!language [de|en|auto]`\n\nBeispiel: `!language de`'
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const lang = args.trim().toLowerCase();
|
||||
const validLanguages: SttLanguage[] = ['de', 'en', 'auto'];
|
||||
|
||||
if (!validLanguages.includes(lang as SttLanguage)) {
|
||||
await this.sendReply(
|
||||
roomId,
|
||||
event,
|
||||
`Ungueltige Sprache "${lang}".\n\nVerfuegbar: de, en, auto`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const settings = this.getUserSettings(userId);
|
||||
settings.language = lang as SttLanguage;
|
||||
this.userSettings.set(userId, settings);
|
||||
|
||||
await this.sendReply(roomId, event, `Sprache geaendert zu: **${lang}**`);
|
||||
}
|
||||
|
||||
private async handleModelCommand(
|
||||
roomId: string,
|
||||
event: MatrixRoomEvent,
|
||||
userId: string,
|
||||
args: string
|
||||
) {
|
||||
if (!args.trim()) {
|
||||
await this.sendReply(
|
||||
roomId,
|
||||
event,
|
||||
'**Verwendung:** `!model [whisper|voxtral|auto]`\n\nBeispiel: `!model whisper`\n\n' +
|
||||
'**Modelle:**\n' +
|
||||
'- `whisper` - Whisper Large V3 (lokal, schnell)\n' +
|
||||
'- `voxtral` - Voxtral Mini (Cloud, Speaker Diarization)\n' +
|
||||
'- `auto` - Automatische Auswahl'
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const model = args.trim().toLowerCase();
|
||||
const validModels: SttModel[] = ['whisper', 'voxtral', 'auto'];
|
||||
|
||||
if (!validModels.includes(model as SttModel)) {
|
||||
await this.sendReply(
|
||||
roomId,
|
||||
event,
|
||||
`Ungueltiges Modell "${model}".\n\nVerfuegbar: whisper, voxtral, auto`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const settings = this.getUserSettings(userId);
|
||||
settings.model = model as SttModel;
|
||||
this.userSettings.set(userId, settings);
|
||||
|
||||
await this.sendReply(roomId, event, `Modell geaendert zu: **${model}**`);
|
||||
}
|
||||
|
||||
private async handleStatusCommand(roomId: string, event: MatrixRoomEvent, userId: string) {
|
||||
const settings = this.getUserSettings(userId);
|
||||
const sttHealthy = await this.sttService.isHealthy();
|
||||
|
||||
let response = '**Aktuelle Einstellungen:**\n\n';
|
||||
response += `Sprache: \`${settings.language}\`\n`;
|
||||
response += `Modell: \`${settings.model}\`\n\n`;
|
||||
response += `STT-Service: ${sttHealthy ? 'Online' : 'Offline'}`;
|
||||
|
||||
await this.sendReply(roomId, event, response);
|
||||
}
|
||||
|
||||
private getUserSettings(userId: string): UserSettings {
|
||||
if (!this.userSettings.has(userId)) {
|
||||
this.userSettings.set(userId, {
|
||||
language: this.defaultLanguage,
|
||||
model: this.defaultModel,
|
||||
});
|
||||
}
|
||||
return this.userSettings.get(userId)!;
|
||||
}
|
||||
}
|
||||
43
services/matrix-stt-bot/src/config/configuration.ts
Normal file
43
services/matrix-stt-bot/src/config/configuration.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
export default () => ({
|
||||
port: parseInt(process.env.PORT || '3024', 10),
|
||||
matrix: {
|
||||
homeserverUrl: process.env.MATRIX_HOMESERVER_URL || 'http://localhost:8008',
|
||||
accessToken: process.env.MATRIX_ACCESS_TOKEN || '',
|
||||
allowedRooms: (process.env.MATRIX_ALLOWED_ROOMS || '').split(',').filter(Boolean),
|
||||
storagePath: process.env.MATRIX_STORAGE_PATH || './data/bot-storage.json',
|
||||
},
|
||||
stt: {
|
||||
url: process.env.STT_URL || 'http://localhost:3020',
|
||||
apiKey: process.env.STT_API_KEY || '',
|
||||
defaultLanguage: process.env.DEFAULT_LANGUAGE || 'de',
|
||||
defaultModel: process.env.DEFAULT_MODEL || 'whisper',
|
||||
},
|
||||
});
|
||||
|
||||
export const HELP_TEXT = `**STT Bot - Hilfe**
|
||||
|
||||
Ich wandle deine Sprachnachrichten in Text um!
|
||||
|
||||
**Befehle:**
|
||||
- \`!language [de|en|auto]\` - Sprache aendern
|
||||
- \`!model [whisper|voxtral]\` - Modell waehlen
|
||||
- \`!status\` - Aktuelle Einstellungen
|
||||
- \`!help\` - Diese Hilfe
|
||||
|
||||
**Verwendung:**
|
||||
Sende einfach eine Sprachnachricht und ich schreibe dir den Text zurueck.
|
||||
|
||||
**Modelle:**
|
||||
- \`whisper\` - Whisper Large V3 (lokal, schnell, Standard)
|
||||
- \`voxtral\` - Voxtral Mini (Cloud, Speaker Diarization)
|
||||
|
||||
**Sprachen:**
|
||||
- \`de\` - Deutsch (Standard)
|
||||
- \`en\` - English
|
||||
- \`auto\` - Automatische Erkennung`;
|
||||
|
||||
export const WELCOME_TEXT = `**STT Bot**
|
||||
|
||||
Ich wandle Sprachnachrichten in Text um!
|
||||
|
||||
Sende einfach eine Sprachnachricht oder \`!help\` fuer Hilfe.`;
|
||||
16
services/matrix-stt-bot/src/main.ts
Normal file
16
services/matrix-stt-bot/src/main.ts
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
import { NestFactory } from '@nestjs/core';
|
||||
import { AppModule } from './app.module';
|
||||
import { Logger } from '@nestjs/common';
|
||||
|
||||
async function bootstrap() {
|
||||
const app = await NestFactory.create(AppModule);
|
||||
const port = process.env.PORT || 3024;
|
||||
|
||||
await app.listen(port);
|
||||
|
||||
const logger = new Logger('Bootstrap');
|
||||
logger.log(`Matrix STT Bot running on port ${port}`);
|
||||
logger.log(`Health check: http://localhost:${port}/health`);
|
||||
}
|
||||
|
||||
bootstrap();
|
||||
8
services/matrix-stt-bot/src/stt/stt.module.ts
Normal file
8
services/matrix-stt-bot/src/stt/stt.module.ts
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { SttService } from './stt.service';
|
||||
|
||||
@Module({
|
||||
providers: [SttService],
|
||||
exports: [SttService],
|
||||
})
|
||||
export class SttModule {}
|
||||
130
services/matrix-stt-bot/src/stt/stt.service.ts
Normal file
130
services/matrix-stt-bot/src/stt/stt.service.ts
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
|
||||
export interface TranscriptionResult {
|
||||
text: string;
|
||||
language?: string;
|
||||
model?: string;
|
||||
duration?: number;
|
||||
}
|
||||
|
||||
export type SttModel = 'whisper' | 'voxtral' | 'auto';
|
||||
export type SttLanguage = 'de' | 'en' | 'auto';
|
||||
|
||||
@Injectable()
|
||||
export class SttService {
|
||||
private readonly logger = new Logger(SttService.name);
|
||||
private readonly sttUrl: string;
|
||||
private readonly apiKey: string;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.sttUrl = this.configService.get<string>('stt.url', 'http://localhost:3020');
|
||||
this.apiKey = this.configService.get<string>('stt.apiKey', '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Transcribe audio to text
|
||||
*/
|
||||
async transcribe(
|
||||
audioBuffer: Buffer,
|
||||
language: SttLanguage = 'de',
|
||||
model: SttModel = 'whisper'
|
||||
): Promise<TranscriptionResult> {
|
||||
const endpoint = this.getEndpoint(model);
|
||||
|
||||
this.logger.debug(
|
||||
`Transcribing ${audioBuffer.length} bytes with ${model}, language=${language}`
|
||||
);
|
||||
|
||||
const formData = new FormData();
|
||||
const blob = new Blob([new Uint8Array(audioBuffer)], { type: 'audio/ogg' });
|
||||
formData.append('file', blob, 'audio.ogg');
|
||||
|
||||
if (language !== 'auto') {
|
||||
formData.append('language', language);
|
||||
}
|
||||
|
||||
try {
|
||||
const headers: Record<string, string> = {};
|
||||
if (this.apiKey) {
|
||||
headers['X-API-Key'] = this.apiKey;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.sttUrl}${endpoint}`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
this.logger.error(`STT failed: ${response.status} - ${errorText}`);
|
||||
throw new Error(`STT service error: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
this.logger.debug(`Transcription completed: "${result.text?.substring(0, 50)}..."`);
|
||||
|
||||
return {
|
||||
text: result.text || '',
|
||||
language: result.language,
|
||||
model: result.model || model,
|
||||
duration: result.duration,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Transcription failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the appropriate endpoint for the model
|
||||
*/
|
||||
private getEndpoint(model: SttModel): string {
|
||||
switch (model) {
|
||||
case 'voxtral':
|
||||
return '/transcribe/voxtral';
|
||||
case 'auto':
|
||||
return '/transcribe/auto';
|
||||
case 'whisper':
|
||||
default:
|
||||
return '/transcribe';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if STT service is healthy
|
||||
*/
|
||||
async isHealthy(): Promise<boolean> {
|
||||
try {
|
||||
const headers: Record<string, string> = {};
|
||||
if (this.apiKey) {
|
||||
headers['X-API-Key'] = this.apiKey;
|
||||
}
|
||||
const response = await fetch(`${this.sttUrl}/models`, { headers });
|
||||
return response.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get available models
|
||||
*/
|
||||
async getModels(): Promise<string[]> {
|
||||
try {
|
||||
const headers: Record<string, string> = {};
|
||||
if (this.apiKey) {
|
||||
headers['X-API-Key'] = this.apiKey;
|
||||
}
|
||||
const response = await fetch(`${this.sttUrl}/models`, { headers });
|
||||
if (!response.ok) {
|
||||
return ['whisper', 'voxtral'];
|
||||
}
|
||||
const data = await response.json();
|
||||
return data.models || ['whisper', 'voxtral'];
|
||||
} catch {
|
||||
return ['whisper', 'voxtral'];
|
||||
}
|
||||
}
|
||||
}
|
||||
4
services/matrix-stt-bot/tsconfig.build.json
Normal file
4
services/matrix-stt-bot/tsconfig.build.json
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"extends": "./tsconfig.json",
|
||||
"exclude": ["node_modules", "test", "dist", "**/*spec.ts"]
|
||||
}
|
||||
24
services/matrix-stt-bot/tsconfig.json
Normal file
24
services/matrix-stt-bot/tsconfig.json
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"module": "commonjs",
|
||||
"declaration": true,
|
||||
"removeComments": true,
|
||||
"emitDecoratorMetadata": true,
|
||||
"experimentalDecorators": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"target": "ES2021",
|
||||
"sourceMap": true,
|
||||
"outDir": "./dist",
|
||||
"baseUrl": "./",
|
||||
"incremental": true,
|
||||
"skipLibCheck": true,
|
||||
"strictNullChecks": true,
|
||||
"noImplicitAny": true,
|
||||
"strictBindCallApply": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"esModuleInterop": true,
|
||||
"moduleResolution": "node",
|
||||
"resolveJsonModule": true
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue