feat(matrix): add TTS bot for text-to-speech conversion

- NestJS bot that converts text messages to speech via mana-tts
- Commands: !voice, !voices, !speed, !status, !help
- User settings stored in-memory (voice, speed per user)
- Docker config for Mac Mini deployment
- Setup script for bot registration

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Till-JS 2026-01-29 16:03:26 +01:00
parent 7442b09471
commit 58a051645b
20 changed files with 1564 additions and 239 deletions

View file

@ -1245,6 +1245,39 @@ services:
retries: 3
start_period: 40s
# ============================================
# Matrix TTS Bot (Text-to-Speech)
# ============================================
matrix-tts-bot:
image: matrix-tts-bot:latest
container_name: manacore-matrix-tts-bot
restart: always
depends_on:
synapse:
condition: service_healthy
environment:
NODE_ENV: production
PORT: 3023
TZ: Europe/Berlin
MATRIX_HOMESERVER_URL: http://synapse:8008
MATRIX_ACCESS_TOKEN: ${MATRIX_TTS_BOT_TOKEN}
MATRIX_ALLOWED_ROOMS: ${MATRIX_TTS_BOT_ROOMS:-}
TTS_URL: http://host.docker.internal:3022
DEFAULT_VOICE: af_heart
DEFAULT_SPEED: 1.0
MAX_TEXT_LENGTH: 500
volumes:
- matrix_tts_bot_data:/app/data
ports:
- "3023:3023"
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:3023/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
# ============================================
# Auto-Update (Watchtower)
# ============================================
@ -1305,3 +1338,5 @@ volumes:
name: manacore-matrix-zitare-bot
matrix_clock_bot_data:
name: manacore-matrix-clock-bot
matrix_tts_bot_data:
name: manacore-matrix-tts-bot

736
pnpm-lock.yaml generated

File diff suppressed because it is too large Load diff

160
scripts/mac-mini/setup-tts-bot.sh Executable file
View file

@ -0,0 +1,160 @@
#!/bin/bash
# Register and setup Matrix TTS Bot
# Run this after Matrix Synapse is running
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
echo "============================================"
echo " Matrix TTS Bot Setup"
echo "============================================"
echo ""
# Default values
HOMESERVER_URL="${MATRIX_HOMESERVER_URL:-http://localhost:8008}"
BOT_USERNAME="tts"
BOT_DISPLAY_NAME="TTS"
# Check if Synapse is running
echo "Checking Synapse..."
if ! curl -s "${HOMESERVER_URL}/health" > /dev/null 2>&1; then
echo -e "${RED}Error: Synapse is not reachable at ${HOMESERVER_URL}${NC}"
echo "Start it with: docker compose -f docker-compose.macmini.yml up -d synapse"
exit 1
fi
echo -e "${GREEN}Synapse is running${NC}"
echo ""
# Check if registration secret is available
if [ -z "$SYNAPSE_REGISTRATION_SECRET" ]; then
echo -e "${YELLOW}SYNAPSE_REGISTRATION_SECRET not set.${NC}"
echo "Please provide the registration secret from your .env file:"
read -sp "Registration Secret: " SYNAPSE_REGISTRATION_SECRET
echo ""
fi
# Generate bot password
BOT_PASSWORD=$(openssl rand -base64 24)
echo "Registering bot user @${BOT_USERNAME}..."
# Generate HMAC for registration
generate_mac() {
local nonce=$1
local user=$2
local password=$3
local user_type=$4
local admin=$5
local mac_input="${nonce}\x00${user}\x00${password}\x00${user_type}\x00${admin}"
echo -n "$mac_input" | openssl dgst -sha1 -hmac "$SYNAPSE_REGISTRATION_SECRET" | cut -d' ' -f2
}
# Get nonce
NONCE=$(curl -s "${HOMESERVER_URL}/_synapse/admin/v1/register" | jq -r '.nonce')
if [ -z "$NONCE" ] || [ "$NONCE" = "null" ]; then
echo -e "${RED}Failed to get registration nonce. Is admin registration enabled?${NC}"
exit 1
fi
# Calculate MAC
MAC=$(generate_mac "$NONCE" "$BOT_USERNAME" "$BOT_PASSWORD" "bot" "false")
# Register user
REGISTER_RESPONSE=$(curl -s -X POST "${HOMESERVER_URL}/_synapse/admin/v1/register" \
-H "Content-Type: application/json" \
-d "{
\"nonce\": \"${NONCE}\",
\"username\": \"${BOT_USERNAME}\",
\"password\": \"${BOT_PASSWORD}\",
\"displayname\": \"${BOT_DISPLAY_NAME}\",
\"user_type\": \"bot\",
\"admin\": false,
\"mac\": \"${MAC}\"
}")
# Check if registration was successful
if echo "$REGISTER_RESPONSE" | jq -e '.access_token' > /dev/null 2>&1; then
ACCESS_TOKEN=$(echo "$REGISTER_RESPONSE" | jq -r '.access_token')
USER_ID=$(echo "$REGISTER_RESPONSE" | jq -r '.user_id')
echo -e "${GREEN}Bot registered successfully!${NC}"
echo ""
echo -e "${CYAN}User ID:${NC} ${USER_ID}"
echo ""
else
ERROR=$(echo "$REGISTER_RESPONSE" | jq -r '.error // .errcode // "Unknown error"')
# Check if user already exists
if echo "$ERROR" | grep -qi "user.*exists\|already.*registered\|M_USER_IN_USE"; then
echo -e "${YELLOW}User @${BOT_USERNAME} already exists. Getting access token via login...${NC}"
echo "Please enter the existing bot password:"
read -sp "Password: " EXISTING_PASSWORD
echo ""
LOGIN_RESPONSE=$(curl -s -X POST "${HOMESERVER_URL}/_matrix/client/r0/login" \
-H "Content-Type: application/json" \
-d "{
\"type\": \"m.login.password\",
\"user\": \"${BOT_USERNAME}\",
\"password\": \"${EXISTING_PASSWORD}\"
}")
if echo "$LOGIN_RESPONSE" | jq -e '.access_token' > /dev/null 2>&1; then
ACCESS_TOKEN=$(echo "$LOGIN_RESPONSE" | jq -r '.access_token')
USER_ID=$(echo "$LOGIN_RESPONSE" | jq -r '.user_id')
echo -e "${GREEN}Login successful!${NC}"
else
echo -e "${RED}Login failed. Please check the password.${NC}"
exit 1
fi
else
echo -e "${RED}Registration failed: ${ERROR}${NC}"
exit 1
fi
fi
echo ""
echo "============================================"
echo " Add to .env file"
echo "============================================"
echo ""
echo -e "${CYAN}# Matrix TTS Bot${NC}"
echo "MATRIX_TTS_BOT_TOKEN=${ACCESS_TOKEN}"
echo ""
# Optional: Set display name and avatar
echo "Setting display name..."
curl -s -X PUT "${HOMESERVER_URL}/_matrix/client/r0/profile/${USER_ID}/displayname" \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H "Content-Type: application/json" \
-d "{\"displayname\": \"🔊 ${BOT_DISPLAY_NAME}\"}" > /dev/null
echo ""
echo "============================================"
echo " Next Steps"
echo "============================================"
echo ""
echo "1. Add the MATRIX_TTS_BOT_TOKEN to your .env file"
echo ""
echo "2. Build the bot image:"
echo " docker build -t matrix-tts-bot ./services/matrix-tts-bot"
echo ""
echo "3. Start the bot:"
echo " docker compose -f docker-compose.macmini.yml up -d matrix-tts-bot"
echo ""
echo "4. Invite the bot to a room in Element:"
echo " /invite @tts:mana.how"
echo ""
echo -e "${GREEN}Setup complete!${NC}"

View file

@ -0,0 +1,16 @@
# Server
PORT=3023
# Matrix Configuration
MATRIX_HOMESERVER_URL=http://localhost:8008
MATRIX_ACCESS_TOKEN=syt_xxx
MATRIX_ALLOWED_ROOMS=
MATRIX_STORAGE_PATH=./data/bot-storage.json
# TTS Service (mana-tts)
TTS_URL=http://localhost:3022
# TTS Defaults
DEFAULT_VOICE=af_heart
DEFAULT_SPEED=1.0
MAX_TEXT_LENGTH=500

View file

@ -0,0 +1,165 @@
# Matrix TTS Bot - Claude Code Guidelines
## Overview
Matrix TTS Bot converts text messages to speech and sends them back as audio messages. Uses the mana-tts service (port 3022) for synthesis.
## Tech Stack
- **Framework**: NestJS 10
- **Matrix**: matrix-bot-sdk
- **TTS Backend**: mana-tts service (Kokoro/F5-TTS)
## Commands
```bash
# Development
pnpm install
pnpm start:dev # Start with hot reload
# Build
pnpm build # Production build
# Type check
pnpm type-check # Check TypeScript types
```
## Project Structure
```
services/matrix-tts-bot/
├── src/
│ ├── main.ts # Application entry point (port 3023)
│ ├── app.module.ts # Root module
│ ├── health.controller.ts # Health check endpoint
│ ├── config/
│ │ └── configuration.ts # Configuration & help text
│ ├── bot/
│ │ ├── bot.module.ts
│ │ └── matrix.service.ts # Matrix client & message handler
│ └── tts/
│ ├── tts.module.ts
│ └── tts.service.ts # mana-tts API client
├── Dockerfile
└── package.json
```
## Bot Commands
| Command | Description |
|---------|-------------|
| `!help` / `!hilfe` | Show help text |
| `!voice [name]` | Change voice (e.g., `!voice bm_daniel`) |
| `!voices` | List available voices |
| `!speed [0.5-2.0]` | Change speech speed |
| `!status` | Show current settings |
| (any text) | Convert to speech |
## Message Flow
1. User sends text message
2. Bot receives via matrix-bot-sdk
3. TTS service synthesizes audio
4. Audio uploaded to Matrix
5. Audio message sent back to room
## Environment Variables
```env
# Server
PORT=3023
# Matrix
MATRIX_HOMESERVER_URL=http://localhost:8008
MATRIX_ACCESS_TOKEN=syt_xxx
MATRIX_ALLOWED_ROOMS=!roomid:matrix.mana.how
MATRIX_STORAGE_PATH=./data/bot-storage.json
# TTS Service
TTS_URL=http://localhost:3022
# Defaults
DEFAULT_VOICE=af_heart
DEFAULT_SPEED=1.0
MAX_TEXT_LENGTH=500
```
## TTS API Integration
The bot uses mana-tts `/synthesize/kokoro` endpoint:
```typescript
// Request
POST /synthesize/kokoro
{
"text": "Hello world",
"voice": "af_heart",
"speed": 1.0,
"output_format": "wav"
}
// Response: audio/wav binary
```
## Example Voices
| Voice ID | Description |
|----------|-------------|
| `af_heart` | American female (warm) |
| `af_bella` | American female (expressive) |
| `af_sarah` | American female (neutral) |
| `am_michael` | American male (trustworthy) |
| `bm_daniel` | British male (classic) |
| `bf_emma` | British female (professional) |
## Docker
```bash
# Build
docker build -f services/matrix-tts-bot/Dockerfile -t matrix-tts-bot services/matrix-tts-bot
# Run
docker run -p 3023:3023 \
-e MATRIX_HOMESERVER_URL=http://synapse:8008 \
-e MATRIX_ACCESS_TOKEN=syt_xxx \
-e TTS_URL=http://mana-tts:3022 \
-v matrix-tts-bot-data:/app/data \
matrix-tts-bot
```
## Health Check
```bash
curl http://localhost:3023/health
```
## Dependencies
- **mana-tts**: Must be running on port 3022 (or configured via `TTS_URL`)
- **Matrix homeserver**: Synapse or compatible homeserver
## User Settings
Settings are stored in-memory per Matrix user ID:
- Voice selection persists during bot runtime
- Speed setting persists during bot runtime
- Settings reset when bot restarts
## Testing
```bash
# 1. Ensure mana-tts is running
curl http://localhost:3022/health
# 2. Start the bot
cd services/matrix-tts-bot
pnpm start:dev
# 3. Check bot health
curl http://localhost:3023/health
# 4. In Matrix:
# - Invite bot to a room
# - Send a text message
# - Receive audio response
```

View file

@ -0,0 +1,44 @@
FROM node:20-alpine AS builder
WORKDIR /app
# Install pnpm
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
# Copy package files
COPY package.json pnpm-lock.yaml* ./
# Install dependencies
RUN pnpm install --frozen-lockfile
# Copy source
COPY . .
# Build
RUN pnpm build
# Production image
FROM node:20-alpine
WORKDIR /app
# Install pnpm
RUN corepack enable && corepack prepare pnpm@9.15.0 --activate
# Copy package files and built code
COPY --from=builder /app/package.json ./
COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/dist ./dist
# Create data directory
RUN mkdir -p /app/data
# Run as non-root user
RUN addgroup -g 1001 -S nodejs && \
adduser -S nestjs -u 1001 && \
chown -R nestjs:nodejs /app
USER nestjs
EXPOSE 3023
CMD ["node", "dist/main.js"]

View file

View file

@ -0,0 +1,7 @@
{
"syncToken": "s418_17406_0_34_85_1_3_40_0_1_2",
"filter": null,
"appserviceUsers": {},
"appserviceTransactions": {},
"kvStore": {}
}

View file

@ -0,0 +1,8 @@
{
"$schema": "https://json.schemastore.org/nest-cli",
"collection": "@nestjs/schematics",
"sourceRoot": "src",
"compilerOptions": {
"deleteOutDir": true
}
}

View file

@ -0,0 +1,40 @@
{
"name": "@manacore/matrix-tts-bot",
"version": "1.0.0",
"description": "Matrix bot for text-to-speech conversion",
"private": true,
"pnpm": {
"neverBuiltDependencies": [
"@matrix-org/matrix-sdk-crypto-nodejs"
],
"overrides": {
"@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0"
}
},
"overrides": {
"@matrix-org/matrix-sdk-crypto-nodejs": "npm:empty-npm-package@1.0.0"
},
"scripts": {
"prebuild": "rm -rf dist || true",
"build": "tsc -p tsconfig.build.json",
"start": "nest start",
"start:dev": "nest start --watch",
"start:debug": "nest start --debug --watch",
"start:prod": "node dist/main",
"type-check": "tsc --noEmit"
},
"dependencies": {
"@nestjs/common": "^10.4.17",
"@nestjs/config": "^3.3.0",
"@nestjs/core": "^10.4.17",
"@nestjs/platform-express": "^10.4.17",
"matrix-bot-sdk": "^0.7.1",
"reflect-metadata": "^0.2.2",
"rxjs": "^7.8.1"
},
"devDependencies": {
"@nestjs/cli": "^10.4.9",
"@types/node": "^22.10.7",
"typescript": "^5.7.3"
}
}

View file

@ -0,0 +1,19 @@
import { Module } from '@nestjs/common';
import { ConfigModule } from '@nestjs/config';
import { HealthController } from './health.controller';
import { BotModule } from './bot/bot.module';
import { TtsModule } from './tts/tts.module';
import configuration from './config/configuration';
@Module({
imports: [
ConfigModule.forRoot({
isGlobal: true,
load: [configuration],
}),
TtsModule,
BotModule,
],
controllers: [HealthController],
})
export class AppModule {}

View file

@ -0,0 +1,9 @@
import { Module } from '@nestjs/common';
import { MatrixService } from './matrix.service';
import { TtsModule } from '../tts/tts.module';
@Module({
imports: [TtsModule],
providers: [MatrixService],
})
export class BotModule {}

View file

@ -0,0 +1,363 @@
import { Injectable, Logger, OnModuleInit, OnModuleDestroy } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import {
MatrixClient,
SimpleFsStorageProvider,
AutojoinRoomsMixin,
RichReply,
} from 'matrix-bot-sdk';
import * as path from 'path';
import * as fs from 'fs';
import { TtsService } from '../tts/tts.service';
import { HELP_TEXT, WELCOME_TEXT } from '../config/configuration';
interface UserSettings {
voice: string;
speed: number;
}
@Injectable()
export class MatrixService implements OnModuleInit, OnModuleDestroy {
private readonly logger = new Logger(MatrixService.name);
private client!: MatrixClient;
private readonly homeserverUrl: string;
private readonly accessToken: string;
private readonly allowedRooms: string[];
private readonly storagePath: string;
private readonly defaultVoice: string;
private readonly defaultSpeed: number;
private readonly maxTextLength: number;
private botUserId: string = '';
// User settings storage (in-memory)
private userSettings: Map<string, UserSettings> = new Map();
constructor(
private configService: ConfigService,
private ttsService: TtsService
) {
this.homeserverUrl = this.configService.get<string>(
'matrix.homeserverUrl',
'http://localhost:8008'
);
this.accessToken = this.configService.get<string>('matrix.accessToken', '');
this.allowedRooms = this.configService.get<string[]>('matrix.allowedRooms', []);
this.storagePath = this.configService.get<string>(
'matrix.storagePath',
'./data/bot-storage.json'
);
this.defaultVoice = this.configService.get<string>('tts.defaultVoice', 'af_heart');
this.defaultSpeed = this.configService.get<number>('tts.defaultSpeed', 1.0);
this.maxTextLength = this.configService.get<number>('tts.maxTextLength', 500);
}
async onModuleInit() {
if (!this.accessToken) {
this.logger.warn('No Matrix access token configured. Bot will not start.');
return;
}
await this.initializeClient();
}
async onModuleDestroy() {
if (this.client) {
await this.client.stop();
}
}
private async initializeClient() {
try {
const storageDir = path.dirname(this.storagePath);
if (!fs.existsSync(storageDir)) {
fs.mkdirSync(storageDir, { recursive: true });
}
const storage = new SimpleFsStorageProvider(this.storagePath);
this.client = new MatrixClient(this.homeserverUrl, this.accessToken, storage);
AutojoinRoomsMixin.setupOnClient(this.client);
this.client.on('room.invite', async (roomId: string) => {
this.logger.log(`Invited to room ${roomId}, joining...`);
await this.client.joinRoom(roomId);
setTimeout(async () => {
await this.sendWelcome(roomId);
}, 2000);
});
this.client.on('room.message', async (roomId: string, event: any) => {
await this.handleMessage(roomId, event);
});
await this.client.start();
this.botUserId = await this.client.getUserId();
this.logger.log(`Matrix TTS Bot connected as ${this.botUserId}`);
} catch (error) {
this.logger.error('Failed to initialize Matrix client:', error);
}
}
private async handleMessage(roomId: string, event: any) {
// Ignore own messages
if (event.sender === this.botUserId) return;
// Check room allowlist
if (this.allowedRooms.length > 0 && !this.allowedRooms.includes(roomId)) {
return;
}
const userId = event.sender;
const msgtype = event.content?.msgtype;
// Only handle text messages
if (msgtype !== 'm.text') return;
const body = event.content.body?.trim();
if (!body) return;
try {
// Handle ! commands
if (body.startsWith('!')) {
const [command, ...args] = body.slice(1).split(' ');
await this.executeCommand(roomId, event, userId, command.toLowerCase(), args.join(' '));
return;
}
// Convert text to speech
await this.handleTextToSpeech(roomId, event, userId, body);
} catch (error) {
this.logger.error(`Error handling message: ${error}`);
await this.sendReply(roomId, event, 'Ein Fehler ist aufgetreten.');
}
}
private async executeCommand(
roomId: string,
event: any,
userId: string,
command: string,
args: string
) {
switch (command) {
case 'help':
case 'hilfe':
await this.sendReply(roomId, event, HELP_TEXT);
break;
case 'voice':
case 'stimme':
await this.handleVoiceCommand(roomId, event, userId, args);
break;
case 'voices':
case 'stimmen':
await this.handleVoicesCommand(roomId, event);
break;
case 'speed':
case 'geschwindigkeit':
await this.handleSpeedCommand(roomId, event, userId, args);
break;
case 'status':
await this.handleStatusCommand(roomId, event, userId);
break;
default:
// Silently ignore unknown commands
break;
}
}
private async handleVoiceCommand(roomId: string, event: any, userId: string, args: string) {
if (!args.trim()) {
await this.sendReply(
roomId,
event,
'**Verwendung:** `!voice [name]`\n\nBeispiel: `!voice bm_daniel`\n\nZeige alle Stimmen mit `!voices`'
);
return;
}
const voiceName = args.trim().toLowerCase();
// Check if voice exists
const exists = await this.ttsService.voiceExists(voiceName);
if (!exists) {
await this.sendReply(
roomId,
event,
`Stimme "${voiceName}" nicht gefunden.\n\nZeige alle Stimmen mit \`!voices\``
);
return;
}
// Update user settings
const settings = this.getUserSettings(userId);
settings.voice = voiceName;
this.userSettings.set(userId, settings);
await this.sendReply(roomId, event, `Stimme geandert zu: **${voiceName}**`);
}
private async handleVoicesCommand(roomId: string, event: any) {
try {
const voices = await this.ttsService.getVoices();
let response = '**Verfugbare Stimmen:**\n\n';
if (voices.kokoro_voices.length > 0) {
response += '**Kokoro (schnell):**\n';
const voiceList = voices.kokoro_voices
.slice(0, 15) // Limit to first 15 to avoid message being too long
.map((v) => `- \`${v.id}\``)
.join('\n');
response += voiceList;
if (voices.kokoro_voices.length > 15) {
response += `\n... und ${voices.kokoro_voices.length - 15} weitere`;
}
}
if (voices.custom_voices.length > 0) {
response += '\n\n**Eigene Stimmen:**\n';
response += voices.custom_voices.map((v) => `- \`${v.id}\` - ${v.name}`).join('\n');
}
response += '\n\nWechseln mit: `!voice [name]`';
await this.sendReply(roomId, event, response);
} catch (error) {
this.logger.error('Failed to get voices:', error);
await this.sendReply(roomId, event, 'Fehler beim Abrufen der Stimmen.');
}
}
private async handleSpeedCommand(roomId: string, event: any, userId: string, args: string) {
if (!args.trim()) {
await this.sendReply(
roomId,
event,
'**Verwendung:** `!speed [0.5-2.0]`\n\nBeispiel: `!speed 1.2` (20% schneller)'
);
return;
}
const speed = parseFloat(args.trim());
if (isNaN(speed) || speed < 0.5 || speed > 2.0) {
await this.sendReply(roomId, event, 'Geschwindigkeit muss zwischen 0.5 und 2.0 liegen.');
return;
}
const settings = this.getUserSettings(userId);
settings.speed = speed;
this.userSettings.set(userId, settings);
await this.sendReply(roomId, event, `Geschwindigkeit geandert zu: **${speed}x**`);
}
private async handleStatusCommand(roomId: string, event: any, userId: string) {
const settings = this.getUserSettings(userId);
const ttsHealthy = await this.ttsService.isHealthy();
let response = '**Aktuelle Einstellungen:**\n\n';
response += `Stimme: \`${settings.voice}\`\n`;
response += `Geschwindigkeit: ${settings.speed}x\n`;
response += `Max. Textlange: ${this.maxTextLength} Zeichen\n\n`;
response += `TTS-Service: ${ttsHealthy ? 'Online' : 'Offline'}`;
await this.sendReply(roomId, event, response);
}
private async handleTextToSpeech(roomId: string, event: any, userId: string, text: string) {
// Check text length
if (text.length > this.maxTextLength) {
await this.sendReply(
roomId,
event,
`Text zu lang (${text.length}/${this.maxTextLength} Zeichen). Bitte kurze Nachricht senden.`
);
return;
}
const settings = this.getUserSettings(userId);
// Set typing indicator
await this.client.setTyping(roomId, true, 30000);
try {
// Synthesize speech
const audioBuffer = await this.ttsService.synthesize(text, settings.voice, settings.speed);
// Stop typing indicator
await this.client.setTyping(roomId, false);
// Upload audio to Matrix
const mxcUrl = await this.client.uploadContent(audioBuffer, 'audio/wav', 'speech.wav');
// Calculate approximate duration (rough estimate based on text length and speed)
const estimatedDuration = Math.round(((text.length / 15) * 1000) / settings.speed);
// Send audio message
await this.client.sendMessage(roomId, {
msgtype: 'm.audio',
body: 'speech.wav',
url: mxcUrl,
info: {
mimetype: 'audio/wav',
size: audioBuffer.length,
duration: estimatedDuration,
},
});
this.logger.debug(`Sent audio message for text: "${text.substring(0, 30)}..."`);
} catch (error) {
await this.client.setTyping(roomId, false);
this.logger.error('TTS processing failed:', error);
await this.sendReply(
roomId,
event,
'Fehler bei der Sprachsynthese. Ist der TTS-Service erreichbar?'
);
}
}
private getUserSettings(userId: string): UserSettings {
if (!this.userSettings.has(userId)) {
this.userSettings.set(userId, {
voice: this.defaultVoice,
speed: this.defaultSpeed,
});
}
return this.userSettings.get(userId)!;
}
private async sendWelcome(roomId: string) {
try {
await this.client.sendMessage(roomId, {
msgtype: 'm.text',
body: WELCOME_TEXT,
format: 'org.matrix.custom.html',
formatted_body: this.markdownToHtml(WELCOME_TEXT),
});
} catch (error) {
this.logger.error('Failed to send welcome:', error);
}
}
private async sendReply(roomId: string, event: any, message: string) {
const reply = RichReply.createFor(roomId, event, message, this.markdownToHtml(message));
reply.msgtype = 'm.text';
await this.client.sendMessage(roomId, reply);
}
private markdownToHtml(text: string): string {
return text
.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>')
.replace(/\*(.+?)\*/g, '<em>$1</em>')
.replace(/`(.+?)`/g, '<code>$1</code>')
.replace(/\n/g, '<br>');
}
}

View file

@ -0,0 +1,40 @@
export default () => ({
port: parseInt(process.env.PORT || '3023', 10),
matrix: {
homeserverUrl: process.env.MATRIX_HOMESERVER_URL || 'http://localhost:8008',
accessToken: process.env.MATRIX_ACCESS_TOKEN || '',
allowedRooms: (process.env.MATRIX_ALLOWED_ROOMS || '').split(',').filter(Boolean),
storagePath: process.env.MATRIX_STORAGE_PATH || './data/bot-storage.json',
},
tts: {
url: process.env.TTS_URL || 'http://localhost:3022',
defaultVoice: process.env.DEFAULT_VOICE || 'af_heart',
defaultSpeed: parseFloat(process.env.DEFAULT_SPEED || '1.0'),
maxTextLength: parseInt(process.env.MAX_TEXT_LENGTH || '500', 10),
},
});
export const HELP_TEXT = `**TTS Bot - Hilfe**
Ich wandle deine Textnachrichten in Sprache um!
**Befehle:**
- \`!voice [name]\` - Stimme wechseln (z.B. \`!voice bm_daniel\`)
- \`!voices\` - Alle verfugbaren Stimmen anzeigen
- \`!speed [0.5-2.0]\` - Geschwindigkeit andern
- \`!status\` - Aktuelle Einstellungen
- \`!help\` - Diese Hilfe
**Verwendung:**
Schreibe einfach eine Nachricht und ich sende dir die Sprachausgabe zuruck.
**Beispiel-Stimmen:**
- \`af_heart\` - Amerikanisch weiblich (warm)
- \`bm_daniel\` - Britisch mannlich (klassisch)
- \`am_michael\` - Amerikanisch mannlich`;
export const WELCOME_TEXT = `**TTS Bot**
Ich wandle Textnachrichten in Sprache um!
Schreibe einfach eine Nachricht oder \`!help\` fur Hilfe.`;

View file

@ -0,0 +1,13 @@
import { Controller, Get } from '@nestjs/common';
@Controller()
export class HealthController {
@Get('health')
health() {
return {
status: 'ok',
service: 'matrix-tts-bot',
timestamp: new Date().toISOString(),
};
}
}

View file

@ -0,0 +1,16 @@
import { NestFactory } from '@nestjs/core';
import { AppModule } from './app.module';
import { Logger } from '@nestjs/common';
async function bootstrap() {
const app = await NestFactory.create(AppModule);
const port = process.env.PORT || 3023;
await app.listen(port);
const logger = new Logger('Bootstrap');
logger.log(`Matrix TTS Bot running on port ${port}`);
logger.log(`Health check: http://localhost:${port}/health`);
}
bootstrap();

View file

@ -0,0 +1,8 @@
import { Module } from '@nestjs/common';
import { TtsService } from './tts.service';
@Module({
providers: [TtsService],
exports: [TtsService],
})
export class TtsModule {}

View file

@ -0,0 +1,97 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
export interface VoiceInfo {
id: string;
name: string;
description: string;
type: string;
}
export interface VoicesResponse {
kokoro_voices: VoiceInfo[];
custom_voices: VoiceInfo[];
}
@Injectable()
export class TtsService {
private readonly logger = new Logger(TtsService.name);
private readonly ttsUrl: string;
constructor(private configService: ConfigService) {
this.ttsUrl = this.configService.get<string>('tts.url', 'http://localhost:3022');
}
/**
* Synthesize text to speech using Kokoro model
*/
async synthesize(text: string, voice: string = 'af_heart', speed: number = 1.0): Promise<Buffer> {
const url = `${this.ttsUrl}/synthesize/kokoro`;
this.logger.debug(
`Synthesizing: "${text.substring(0, 50)}..." with voice=${voice}, speed=${speed}`
);
const response = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text,
voice,
speed,
output_format: 'wav',
}),
});
if (!response.ok) {
const errorText = await response.text();
this.logger.error(`TTS failed: ${response.status} - ${errorText}`);
throw new Error(`TTS synthesis failed: ${response.status}`);
}
const arrayBuffer = await response.arrayBuffer();
this.logger.debug(`Received audio: ${arrayBuffer.byteLength} bytes`);
return Buffer.from(arrayBuffer);
}
/**
* Get list of available voices
*/
async getVoices(): Promise<VoicesResponse> {
const url = `${this.ttsUrl}/voices`;
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to get voices: ${response.status}`);
}
return response.json();
}
/**
* Check if TTS service is healthy
*/
async isHealthy(): Promise<boolean> {
try {
const response = await fetch(`${this.ttsUrl}/health`);
return response.ok;
} catch {
return false;
}
}
/**
* Check if a voice exists
*/
async voiceExists(voiceId: string): Promise<boolean> {
try {
const voices = await this.getVoices();
const allVoices = [...voices.kokoro_voices, ...voices.custom_voices];
return allVoices.some((v) => v.id === voiceId);
} catch {
return false;
}
}
}

View file

@ -0,0 +1,4 @@
{
"extends": "./tsconfig.json",
"exclude": ["node_modules", "dist", "test", "**/*spec.ts"]
}

View file

@ -0,0 +1,23 @@
{
"compilerOptions": {
"module": "commonjs",
"declaration": true,
"removeComments": true,
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"allowSyntheticDefaultImports": true,
"target": "ES2021",
"sourceMap": true,
"outDir": "./dist",
"baseUrl": "./",
"incremental": true,
"skipLibCheck": true,
"strictNullChecks": true,
"noImplicitAny": true,
"strictBindCallApply": true,
"forceConsistentCasingInFileNames": true,
"noFallthroughCasesInSwitch": true,
"esModuleInterop": true,
"resolveJsonModule": true
}
}