feat(matrix-mana-bot): add voice output/TTS support (Phase 2)

- Add VoiceHandler for voice commands (!voice, !stimme, !stimmen)
- Extend MatrixService with TTS response generation
- Add prepareTextForSpeech() for German natural speech formatting
- Send audio responses non-blocking after text response
- Register voice commands in command router

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Till-JS 2026-02-01 03:02:37 +01:00
parent db07b5613d
commit 48dfcd180f
4 changed files with 247 additions and 4 deletions

View file

@ -4,6 +4,7 @@ import { TodoHandler } from '../handlers/todo.handler';
import { CalendarHandler } from '../handlers/calendar.handler';
import { ClockHandler } from '../handlers/clock.handler';
import { HelpHandler } from '../handlers/help.handler';
import { VoiceHandler } from '../handlers/voice.handler';
import { OrchestrationService } from '../orchestration/orchestration.service';
export interface CommandContext {
@ -50,6 +51,8 @@ export class CommandRouterService {
private clockHandler: ClockHandler,
@Inject(forwardRef(() => HelpHandler))
private helpHandler: HelpHandler,
@Inject(forwardRef(() => VoiceHandler))
private voiceHandler: VoiceHandler,
@Inject(forwardRef(() => OrchestrationService))
private orchestration: OrchestrationService
) {
@ -196,6 +199,23 @@ export class CommandRouterService {
handler: (ctx) => this.helpHandler.showStatus(ctx),
description: 'Show status',
},
// Voice Commands
{
patterns: ['!voice', '!sprache'],
handler: (ctx, args) => this.voiceHandler.voiceSettings(ctx, args),
description: 'Voice settings',
},
{
patterns: ['!stimmen', '!voices'],
handler: (ctx) => this.voiceHandler.listVoices(ctx),
description: 'List voices',
},
{
patterns: ['!stimme'],
handler: (ctx, args) => this.voiceHandler.setVoice(ctx, args),
description: 'Set voice',
},
];
}

View file

@ -104,7 +104,7 @@ export class MatrixService extends BaseMatrixService {
}
/**
* Handle voice note messages - transcribe and process as text
* Handle voice note messages - transcribe, process, and respond with audio
*/
protected async handleAudioMessage(
roomId: string,
@ -165,7 +165,16 @@ export class MatrixService extends BaseMatrixService {
await this.client.setTyping(roomId, false);
if (response) {
// Send text response first
await this.sendReply(roomId, event, response);
// Then generate and send audio response (non-blocking)
const prefs = this.voiceService.getUserPreferences(sender);
if (prefs.voiceEnabled) {
this.generateAndSendAudioResponse(roomId, response, sender).catch((err) =>
this.logger.error(`Failed to send audio response: ${err}`)
);
}
}
} catch (error) {
await this.client.setTyping(roomId, false);
@ -178,6 +187,110 @@ export class MatrixService extends BaseMatrixService {
}
}
/**
* Generate TTS audio and send as Matrix audio message
*/
private async generateAndSendAudioResponse(
roomId: string,
text: string,
userId: string
): Promise<void> {
try {
// Prepare text for speech (remove markdown, emojis, etc.)
const speechText = this.prepareTextForSpeech(text);
// Skip if text is too short or empty
if (!speechText || speechText.length < 5) {
return;
}
// Skip if text is very long (summarize would be better)
if (speechText.length > 800) {
this.logger.debug(`Text too long for audio (${speechText.length} chars), skipping`);
return;
}
// Generate audio
const audioBuffer = await this.voiceService.synthesize(speechText, userId);
// Upload to Matrix
const mxcUrl = await this.uploadMedia(audioBuffer, 'audio/mpeg', 'response.mp3');
// Send audio message
await this.client.sendMessage(roomId, {
msgtype: 'm.audio',
body: 'Sprachantwort',
url: mxcUrl,
info: {
mimetype: 'audio/mpeg',
size: audioBuffer.length,
},
});
this.logger.debug(`Sent audio response (${audioBuffer.length} bytes)`);
} catch (error) {
this.logger.error(`Failed to generate audio response: ${error}`);
// Don't throw - audio is optional
}
}
/**
* Prepare text for text-to-speech
* Removes markdown formatting, excessive whitespace, and formats for natural speech
*/
private prepareTextForSpeech(text: string): string {
let result = text;
// Remove code blocks
result = result.replace(/```[\s\S]*?```/g, '');
result = result.replace(/`[^`]+`/g, '');
// Remove markdown formatting
result = result.replace(/\*\*(.+?)\*\*/g, '$1'); // Bold
result = result.replace(/\*(.+?)\*/g, '$1'); // Italic
result = result.replace(/~~(.+?)~~/g, '$1'); // Strikethrough
result = result.replace(/^#+\s*/gm, ''); // Headers
// Remove common emojis (keep some for context)
result = result.replace(/[📋📅⏱️🔮💡❌✅🎤🔊☀️💪🔔]/g, '');
// Convert bullet points to natural speech
result = result.replace(/^[•\-]\s*/gm, '');
// Convert numbered lists
result = result.replace(/^\d+\.\s*/gm, '');
// Clean up time formats for German speech
result = result.replace(/(\d{1,2}):(\d{2})/g, (_, h, m) => {
const hour = parseInt(h);
const min = parseInt(m);
if (min === 0) {
return `${hour} Uhr`;
} else if (min === 30) {
return `halb ${hour + 1}`;
} else if (min === 15) {
return `viertel nach ${hour}`;
} else if (min === 45) {
return `viertel vor ${hour + 1}`;
}
return `${hour} Uhr ${min}`;
});
// Clean up multiple newlines and spaces
result = result.replace(/\n{2,}/g, '. ');
result = result.replace(/\n/g, ' ');
result = result.replace(/\s{2,}/g, ' ');
// Remove URLs
result = result.replace(/https?:\/\/[^\s]+/g, '');
// Clean up punctuation
result = result.replace(/\s+([.,!?])/g, '$1');
result = result.replace(/([.,!?])\s*([.,!?])/g, '$1');
return result.trim();
}
private async sendWelcomeMessage(roomId: string, userId: string) {
try {
await this.sendMessage(roomId, WELCOME_TEXT);

View file

@ -4,11 +4,13 @@ import { TodoHandler } from './todo.handler';
import { CalendarHandler } from './calendar.handler';
import { ClockHandler } from './clock.handler';
import { HelpHandler } from './help.handler';
import { VoiceHandler } from './voice.handler';
import { BotModule } from '../bot/bot.module';
import { VoiceModule } from '../voice/voice.module';
@Module({
imports: [forwardRef(() => BotModule)],
providers: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler],
exports: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler],
imports: [forwardRef(() => BotModule), VoiceModule],
providers: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler, VoiceHandler],
exports: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler, VoiceHandler],
})
export class HandlersModule {}

View file

@ -0,0 +1,108 @@
import { Injectable } from '@nestjs/common';
import { VoiceService } from '../voice/voice.service';
import { CommandContext } from '../bot/command-router.service';
@Injectable()
export class VoiceHandler {
constructor(private voiceService: VoiceService) {}
/**
* Show voice settings or toggle voice on/off
*/
async voiceSettings(ctx: CommandContext, args: string): Promise<string> {
const arg = args.trim().toLowerCase();
const prefs = this.voiceService.getUserPreferences(ctx.userId);
// Toggle voice on/off
if (arg === 'an' || arg === 'on' || arg === 'ein') {
this.voiceService.setVoiceEnabled(ctx.userId, true);
return '🔊 Sprachantworten aktiviert.';
}
if (arg === 'aus' || arg === 'off') {
this.voiceService.setVoiceEnabled(ctx.userId, false);
return '🔇 Sprachantworten deaktiviert.';
}
// Show current settings
const status = prefs.voiceEnabled ? '✅ Aktiviert' : '❌ Deaktiviert';
return `**🎤 Voice-Einstellungen**
**Status:** ${status}
**Stimme:** ${prefs.voice}
**Geschwindigkeit:** ${prefs.speed}x
**Befehle:**
\`!voice an\` / \`!voice aus\` - Aktivieren/Deaktivieren
\`!stimme [name]\` - Stimme wählen
\`!stimmen\` - Verfügbare Stimmen anzeigen`;
}
/**
* List available TTS voices
*/
async listVoices(ctx: CommandContext): Promise<string> {
const voices = await this.voiceService.getVoices();
const prefs = this.voiceService.getUserPreferences(ctx.userId);
if (Object.keys(voices).length === 0) {
return '❌ Keine Stimmen verfügbar. Voice Service nicht erreichbar.';
}
const voiceList = Object.entries(voices)
.map(([id, desc]) => {
const current = id === prefs.voice ? ' ✓' : '';
return `• **${id}**${current}\n ${desc}`;
})
.join('\n');
return `**🗣️ Verfügbare Stimmen**
${voiceList}
*Wähle mit \`!stimme [name]\`*`;
}
/**
* Set TTS voice
*/
async setVoice(ctx: CommandContext, args: string): Promise<string> {
const voiceName = args.trim();
if (!voiceName) {
return '❌ Bitte gib einen Stimmnamen an. Siehe `!stimmen` für verfügbare Stimmen.';
}
const voices = await this.voiceService.getVoices();
// Check if voice exists
if (!voices[voiceName]) {
// Try partial match
const matches = Object.keys(voices).filter((v) =>
v.toLowerCase().includes(voiceName.toLowerCase())
);
if (matches.length === 1) {
this.voiceService.setVoice(ctx.userId, matches[0]);
return `✅ Stimme geändert zu **${matches[0]}**`;
}
if (matches.length > 1) {
return `❌ Mehrere Treffer: ${matches.join(', ')}\nBitte genauer angeben.`;
}
return `❌ Stimme "${voiceName}" nicht gefunden. Siehe \`!stimmen\` für verfügbare Stimmen.`;
}
this.voiceService.setVoice(ctx.userId, voiceName);
return `✅ Stimme geändert zu **${voiceName}**`;
}
/**
* Check voice service health
*/
async checkHealth(): Promise<{ stt: boolean; tts: boolean }> {
return this.voiceService.checkHealth();
}
}