From 48dfcd180fa7eee7feaf9d5e583e0c8371aeb1d4 Mon Sep 17 00:00:00 2001 From: Till-JS <101404291+Till-JS@users.noreply.github.com> Date: Sun, 1 Feb 2026 03:02:37 +0100 Subject: [PATCH] feat(matrix-mana-bot): add voice output/TTS support (Phase 2) - Add VoiceHandler for voice commands (!voice, !stimme, !stimmen) - Extend MatrixService with TTS response generation - Add prepareTextForSpeech() for German natural speech formatting - Send audio responses non-blocking after text response - Register voice commands in command router Co-Authored-By: Claude Opus 4.5 --- .../src/bot/command-router.service.ts | 20 +++ .../matrix-mana-bot/src/bot/matrix.service.ts | 115 +++++++++++++++++- .../src/handlers/handlers.module.ts | 8 +- .../src/handlers/voice.handler.ts | 108 ++++++++++++++++ 4 files changed, 247 insertions(+), 4 deletions(-) create mode 100644 services/matrix-mana-bot/src/handlers/voice.handler.ts diff --git a/services/matrix-mana-bot/src/bot/command-router.service.ts b/services/matrix-mana-bot/src/bot/command-router.service.ts index 1c38275a0..7baef3342 100644 --- a/services/matrix-mana-bot/src/bot/command-router.service.ts +++ b/services/matrix-mana-bot/src/bot/command-router.service.ts @@ -4,6 +4,7 @@ import { TodoHandler } from '../handlers/todo.handler'; import { CalendarHandler } from '../handlers/calendar.handler'; import { ClockHandler } from '../handlers/clock.handler'; import { HelpHandler } from '../handlers/help.handler'; +import { VoiceHandler } from '../handlers/voice.handler'; import { OrchestrationService } from '../orchestration/orchestration.service'; export interface CommandContext { @@ -50,6 +51,8 @@ export class CommandRouterService { private clockHandler: ClockHandler, @Inject(forwardRef(() => HelpHandler)) private helpHandler: HelpHandler, + @Inject(forwardRef(() => VoiceHandler)) + private voiceHandler: VoiceHandler, @Inject(forwardRef(() => OrchestrationService)) private orchestration: OrchestrationService ) { @@ -196,6 +199,23 @@ export class CommandRouterService { handler: (ctx) => this.helpHandler.showStatus(ctx), description: 'Show status', }, + + // Voice Commands + { + patterns: ['!voice', '!sprache'], + handler: (ctx, args) => this.voiceHandler.voiceSettings(ctx, args), + description: 'Voice settings', + }, + { + patterns: ['!stimmen', '!voices'], + handler: (ctx) => this.voiceHandler.listVoices(ctx), + description: 'List voices', + }, + { + patterns: ['!stimme'], + handler: (ctx, args) => this.voiceHandler.setVoice(ctx, args), + description: 'Set voice', + }, ]; } diff --git a/services/matrix-mana-bot/src/bot/matrix.service.ts b/services/matrix-mana-bot/src/bot/matrix.service.ts index e6d2d04fb..df6d97428 100644 --- a/services/matrix-mana-bot/src/bot/matrix.service.ts +++ b/services/matrix-mana-bot/src/bot/matrix.service.ts @@ -104,7 +104,7 @@ export class MatrixService extends BaseMatrixService { } /** - * Handle voice note messages - transcribe and process as text + * Handle voice note messages - transcribe, process, and respond with audio */ protected async handleAudioMessage( roomId: string, @@ -165,7 +165,16 @@ export class MatrixService extends BaseMatrixService { await this.client.setTyping(roomId, false); if (response) { + // Send text response first await this.sendReply(roomId, event, response); + + // Then generate and send audio response (non-blocking) + const prefs = this.voiceService.getUserPreferences(sender); + if (prefs.voiceEnabled) { + this.generateAndSendAudioResponse(roomId, response, sender).catch((err) => + this.logger.error(`Failed to send audio response: ${err}`) + ); + } } } catch (error) { await this.client.setTyping(roomId, false); @@ -178,6 +187,110 @@ export class MatrixService extends BaseMatrixService { } } + /** + * Generate TTS audio and send as Matrix audio message + */ + private async generateAndSendAudioResponse( + roomId: string, + text: string, + userId: string + ): Promise { + try { + // Prepare text for speech (remove markdown, emojis, etc.) + const speechText = this.prepareTextForSpeech(text); + + // Skip if text is too short or empty + if (!speechText || speechText.length < 5) { + return; + } + + // Skip if text is very long (summarize would be better) + if (speechText.length > 800) { + this.logger.debug(`Text too long for audio (${speechText.length} chars), skipping`); + return; + } + + // Generate audio + const audioBuffer = await this.voiceService.synthesize(speechText, userId); + + // Upload to Matrix + const mxcUrl = await this.uploadMedia(audioBuffer, 'audio/mpeg', 'response.mp3'); + + // Send audio message + await this.client.sendMessage(roomId, { + msgtype: 'm.audio', + body: 'Sprachantwort', + url: mxcUrl, + info: { + mimetype: 'audio/mpeg', + size: audioBuffer.length, + }, + }); + + this.logger.debug(`Sent audio response (${audioBuffer.length} bytes)`); + } catch (error) { + this.logger.error(`Failed to generate audio response: ${error}`); + // Don't throw - audio is optional + } + } + + /** + * Prepare text for text-to-speech + * Removes markdown formatting, excessive whitespace, and formats for natural speech + */ + private prepareTextForSpeech(text: string): string { + let result = text; + + // Remove code blocks + result = result.replace(/```[\s\S]*?```/g, ''); + result = result.replace(/`[^`]+`/g, ''); + + // Remove markdown formatting + result = result.replace(/\*\*(.+?)\*\*/g, '$1'); // Bold + result = result.replace(/\*(.+?)\*/g, '$1'); // Italic + result = result.replace(/~~(.+?)~~/g, '$1'); // Strikethrough + result = result.replace(/^#+\s*/gm, ''); // Headers + + // Remove common emojis (keep some for context) + result = result.replace(/[📋📅⏱️🔮💡❌✅🎤🔊☀️💪🔔]/g, ''); + + // Convert bullet points to natural speech + result = result.replace(/^[•\-]\s*/gm, ''); + + // Convert numbered lists + result = result.replace(/^\d+\.\s*/gm, ''); + + // Clean up time formats for German speech + result = result.replace(/(\d{1,2}):(\d{2})/g, (_, h, m) => { + const hour = parseInt(h); + const min = parseInt(m); + if (min === 0) { + return `${hour} Uhr`; + } else if (min === 30) { + return `halb ${hour + 1}`; + } else if (min === 15) { + return `viertel nach ${hour}`; + } else if (min === 45) { + return `viertel vor ${hour + 1}`; + } + return `${hour} Uhr ${min}`; + }); + + // Clean up multiple newlines and spaces + result = result.replace(/\n{2,}/g, '. '); + result = result.replace(/\n/g, ' '); + result = result.replace(/\s{2,}/g, ' '); + + // Remove URLs + result = result.replace(/https?:\/\/[^\s]+/g, ''); + + // Clean up punctuation + result = result.replace(/\s+([.,!?])/g, '$1'); + result = result.replace(/([.,!?])\s*([.,!?])/g, '$1'); + + return result.trim(); + } + private async sendWelcomeMessage(roomId: string, userId: string) { try { await this.sendMessage(roomId, WELCOME_TEXT); diff --git a/services/matrix-mana-bot/src/handlers/handlers.module.ts b/services/matrix-mana-bot/src/handlers/handlers.module.ts index a2ac7b430..a643e4294 100644 --- a/services/matrix-mana-bot/src/handlers/handlers.module.ts +++ b/services/matrix-mana-bot/src/handlers/handlers.module.ts @@ -4,11 +4,13 @@ import { TodoHandler } from './todo.handler'; import { CalendarHandler } from './calendar.handler'; import { ClockHandler } from './clock.handler'; import { HelpHandler } from './help.handler'; +import { VoiceHandler } from './voice.handler'; import { BotModule } from '../bot/bot.module'; +import { VoiceModule } from '../voice/voice.module'; @Module({ - imports: [forwardRef(() => BotModule)], - providers: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler], - exports: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler], + imports: [forwardRef(() => BotModule), VoiceModule], + providers: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler, VoiceHandler], + exports: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler, VoiceHandler], }) export class HandlersModule {} diff --git a/services/matrix-mana-bot/src/handlers/voice.handler.ts b/services/matrix-mana-bot/src/handlers/voice.handler.ts new file mode 100644 index 000000000..b70df7bf5 --- /dev/null +++ b/services/matrix-mana-bot/src/handlers/voice.handler.ts @@ -0,0 +1,108 @@ +import { Injectable } from '@nestjs/common'; +import { VoiceService } from '../voice/voice.service'; +import { CommandContext } from '../bot/command-router.service'; + +@Injectable() +export class VoiceHandler { + constructor(private voiceService: VoiceService) {} + + /** + * Show voice settings or toggle voice on/off + */ + async voiceSettings(ctx: CommandContext, args: string): Promise { + const arg = args.trim().toLowerCase(); + const prefs = this.voiceService.getUserPreferences(ctx.userId); + + // Toggle voice on/off + if (arg === 'an' || arg === 'on' || arg === 'ein') { + this.voiceService.setVoiceEnabled(ctx.userId, true); + return '🔊 Sprachantworten aktiviert.'; + } + + if (arg === 'aus' || arg === 'off') { + this.voiceService.setVoiceEnabled(ctx.userId, false); + return '🔇 Sprachantworten deaktiviert.'; + } + + // Show current settings + const status = prefs.voiceEnabled ? '✅ Aktiviert' : '❌ Deaktiviert'; + + return `**🎤 Voice-Einstellungen** + +**Status:** ${status} +**Stimme:** ${prefs.voice} +**Geschwindigkeit:** ${prefs.speed}x + +**Befehle:** +• \`!voice an\` / \`!voice aus\` - Aktivieren/Deaktivieren +• \`!stimme [name]\` - Stimme wählen +• \`!stimmen\` - Verfügbare Stimmen anzeigen`; + } + + /** + * List available TTS voices + */ + async listVoices(ctx: CommandContext): Promise { + const voices = await this.voiceService.getVoices(); + const prefs = this.voiceService.getUserPreferences(ctx.userId); + + if (Object.keys(voices).length === 0) { + return '❌ Keine Stimmen verfügbar. Voice Service nicht erreichbar.'; + } + + const voiceList = Object.entries(voices) + .map(([id, desc]) => { + const current = id === prefs.voice ? ' ✓' : ''; + return `• **${id}**${current}\n ${desc}`; + }) + .join('\n'); + + return `**🗣️ Verfügbare Stimmen** + +${voiceList} + +*Wähle mit \`!stimme [name]\`*`; + } + + /** + * Set TTS voice + */ + async setVoice(ctx: CommandContext, args: string): Promise { + const voiceName = args.trim(); + + if (!voiceName) { + return '❌ Bitte gib einen Stimmnamen an. Siehe `!stimmen` für verfügbare Stimmen.'; + } + + const voices = await this.voiceService.getVoices(); + + // Check if voice exists + if (!voices[voiceName]) { + // Try partial match + const matches = Object.keys(voices).filter((v) => + v.toLowerCase().includes(voiceName.toLowerCase()) + ); + + if (matches.length === 1) { + this.voiceService.setVoice(ctx.userId, matches[0]); + return `✅ Stimme geändert zu **${matches[0]}**`; + } + + if (matches.length > 1) { + return `❌ Mehrere Treffer: ${matches.join(', ')}\nBitte genauer angeben.`; + } + + return `❌ Stimme "${voiceName}" nicht gefunden. Siehe \`!stimmen\` für verfügbare Stimmen.`; + } + + this.voiceService.setVoice(ctx.userId, voiceName); + return `✅ Stimme geändert zu **${voiceName}**`; + } + + /** + * Check voice service health + */ + async checkHealth(): Promise<{ stt: boolean; tts: boolean }> { + return this.voiceService.checkHealth(); + } +}