mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 21:41:09 +02:00
feat(matrix-mana-bot): add voice output/TTS support (Phase 2)
- Add VoiceHandler for voice commands (!voice, !stimme, !stimmen) - Extend MatrixService with TTS response generation - Add prepareTextForSpeech() for German natural speech formatting - Send audio responses non-blocking after text response - Register voice commands in command router Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
db07b5613d
commit
48dfcd180f
4 changed files with 247 additions and 4 deletions
|
|
@ -4,6 +4,7 @@ import { TodoHandler } from '../handlers/todo.handler';
|
|||
import { CalendarHandler } from '../handlers/calendar.handler';
|
||||
import { ClockHandler } from '../handlers/clock.handler';
|
||||
import { HelpHandler } from '../handlers/help.handler';
|
||||
import { VoiceHandler } from '../handlers/voice.handler';
|
||||
import { OrchestrationService } from '../orchestration/orchestration.service';
|
||||
|
||||
export interface CommandContext {
|
||||
|
|
@ -50,6 +51,8 @@ export class CommandRouterService {
|
|||
private clockHandler: ClockHandler,
|
||||
@Inject(forwardRef(() => HelpHandler))
|
||||
private helpHandler: HelpHandler,
|
||||
@Inject(forwardRef(() => VoiceHandler))
|
||||
private voiceHandler: VoiceHandler,
|
||||
@Inject(forwardRef(() => OrchestrationService))
|
||||
private orchestration: OrchestrationService
|
||||
) {
|
||||
|
|
@ -196,6 +199,23 @@ export class CommandRouterService {
|
|||
handler: (ctx) => this.helpHandler.showStatus(ctx),
|
||||
description: 'Show status',
|
||||
},
|
||||
|
||||
// Voice Commands
|
||||
{
|
||||
patterns: ['!voice', '!sprache'],
|
||||
handler: (ctx, args) => this.voiceHandler.voiceSettings(ctx, args),
|
||||
description: 'Voice settings',
|
||||
},
|
||||
{
|
||||
patterns: ['!stimmen', '!voices'],
|
||||
handler: (ctx) => this.voiceHandler.listVoices(ctx),
|
||||
description: 'List voices',
|
||||
},
|
||||
{
|
||||
patterns: ['!stimme'],
|
||||
handler: (ctx, args) => this.voiceHandler.setVoice(ctx, args),
|
||||
description: 'Set voice',
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ export class MatrixService extends BaseMatrixService {
|
|||
}
|
||||
|
||||
/**
|
||||
* Handle voice note messages - transcribe and process as text
|
||||
* Handle voice note messages - transcribe, process, and respond with audio
|
||||
*/
|
||||
protected async handleAudioMessage(
|
||||
roomId: string,
|
||||
|
|
@ -165,7 +165,16 @@ export class MatrixService extends BaseMatrixService {
|
|||
await this.client.setTyping(roomId, false);
|
||||
|
||||
if (response) {
|
||||
// Send text response first
|
||||
await this.sendReply(roomId, event, response);
|
||||
|
||||
// Then generate and send audio response (non-blocking)
|
||||
const prefs = this.voiceService.getUserPreferences(sender);
|
||||
if (prefs.voiceEnabled) {
|
||||
this.generateAndSendAudioResponse(roomId, response, sender).catch((err) =>
|
||||
this.logger.error(`Failed to send audio response: ${err}`)
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
await this.client.setTyping(roomId, false);
|
||||
|
|
@ -178,6 +187,110 @@ export class MatrixService extends BaseMatrixService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TTS audio and send as Matrix audio message
|
||||
*/
|
||||
private async generateAndSendAudioResponse(
|
||||
roomId: string,
|
||||
text: string,
|
||||
userId: string
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Prepare text for speech (remove markdown, emojis, etc.)
|
||||
const speechText = this.prepareTextForSpeech(text);
|
||||
|
||||
// Skip if text is too short or empty
|
||||
if (!speechText || speechText.length < 5) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip if text is very long (summarize would be better)
|
||||
if (speechText.length > 800) {
|
||||
this.logger.debug(`Text too long for audio (${speechText.length} chars), skipping`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate audio
|
||||
const audioBuffer = await this.voiceService.synthesize(speechText, userId);
|
||||
|
||||
// Upload to Matrix
|
||||
const mxcUrl = await this.uploadMedia(audioBuffer, 'audio/mpeg', 'response.mp3');
|
||||
|
||||
// Send audio message
|
||||
await this.client.sendMessage(roomId, {
|
||||
msgtype: 'm.audio',
|
||||
body: 'Sprachantwort',
|
||||
url: mxcUrl,
|
||||
info: {
|
||||
mimetype: 'audio/mpeg',
|
||||
size: audioBuffer.length,
|
||||
},
|
||||
});
|
||||
|
||||
this.logger.debug(`Sent audio response (${audioBuffer.length} bytes)`);
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to generate audio response: ${error}`);
|
||||
// Don't throw - audio is optional
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare text for text-to-speech
|
||||
* Removes markdown formatting, excessive whitespace, and formats for natural speech
|
||||
*/
|
||||
private prepareTextForSpeech(text: string): string {
|
||||
let result = text;
|
||||
|
||||
// Remove code blocks
|
||||
result = result.replace(/```[\s\S]*?```/g, '');
|
||||
result = result.replace(/`[^`]+`/g, '');
|
||||
|
||||
// Remove markdown formatting
|
||||
result = result.replace(/\*\*(.+?)\*\*/g, '$1'); // Bold
|
||||
result = result.replace(/\*(.+?)\*/g, '$1'); // Italic
|
||||
result = result.replace(/~~(.+?)~~/g, '$1'); // Strikethrough
|
||||
result = result.replace(/^#+\s*/gm, ''); // Headers
|
||||
|
||||
// Remove common emojis (keep some for context)
|
||||
result = result.replace(/[📋📅⏱️🔮💡❌✅🎤🔊☀️💪🔔]/g, '');
|
||||
|
||||
// Convert bullet points to natural speech
|
||||
result = result.replace(/^[•\-]\s*/gm, '');
|
||||
|
||||
// Convert numbered lists
|
||||
result = result.replace(/^\d+\.\s*/gm, '');
|
||||
|
||||
// Clean up time formats for German speech
|
||||
result = result.replace(/(\d{1,2}):(\d{2})/g, (_, h, m) => {
|
||||
const hour = parseInt(h);
|
||||
const min = parseInt(m);
|
||||
if (min === 0) {
|
||||
return `${hour} Uhr`;
|
||||
} else if (min === 30) {
|
||||
return `halb ${hour + 1}`;
|
||||
} else if (min === 15) {
|
||||
return `viertel nach ${hour}`;
|
||||
} else if (min === 45) {
|
||||
return `viertel vor ${hour + 1}`;
|
||||
}
|
||||
return `${hour} Uhr ${min}`;
|
||||
});
|
||||
|
||||
// Clean up multiple newlines and spaces
|
||||
result = result.replace(/\n{2,}/g, '. ');
|
||||
result = result.replace(/\n/g, ' ');
|
||||
result = result.replace(/\s{2,}/g, ' ');
|
||||
|
||||
// Remove URLs
|
||||
result = result.replace(/https?:\/\/[^\s]+/g, '');
|
||||
|
||||
// Clean up punctuation
|
||||
result = result.replace(/\s+([.,!?])/g, '$1');
|
||||
result = result.replace(/([.,!?])\s*([.,!?])/g, '$1');
|
||||
|
||||
return result.trim();
|
||||
}
|
||||
|
||||
private async sendWelcomeMessage(roomId: string, userId: string) {
|
||||
try {
|
||||
await this.sendMessage(roomId, WELCOME_TEXT);
|
||||
|
|
|
|||
|
|
@ -4,11 +4,13 @@ import { TodoHandler } from './todo.handler';
|
|||
import { CalendarHandler } from './calendar.handler';
|
||||
import { ClockHandler } from './clock.handler';
|
||||
import { HelpHandler } from './help.handler';
|
||||
import { VoiceHandler } from './voice.handler';
|
||||
import { BotModule } from '../bot/bot.module';
|
||||
import { VoiceModule } from '../voice/voice.module';
|
||||
|
||||
@Module({
|
||||
imports: [forwardRef(() => BotModule)],
|
||||
providers: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler],
|
||||
exports: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler],
|
||||
imports: [forwardRef(() => BotModule), VoiceModule],
|
||||
providers: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler, VoiceHandler],
|
||||
exports: [AiHandler, TodoHandler, CalendarHandler, ClockHandler, HelpHandler, VoiceHandler],
|
||||
})
|
||||
export class HandlersModule {}
|
||||
|
|
|
|||
108
services/matrix-mana-bot/src/handlers/voice.handler.ts
Normal file
108
services/matrix-mana-bot/src/handlers/voice.handler.ts
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
import { Injectable } from '@nestjs/common';
|
||||
import { VoiceService } from '../voice/voice.service';
|
||||
import { CommandContext } from '../bot/command-router.service';
|
||||
|
||||
@Injectable()
|
||||
export class VoiceHandler {
|
||||
constructor(private voiceService: VoiceService) {}
|
||||
|
||||
/**
|
||||
* Show voice settings or toggle voice on/off
|
||||
*/
|
||||
async voiceSettings(ctx: CommandContext, args: string): Promise<string> {
|
||||
const arg = args.trim().toLowerCase();
|
||||
const prefs = this.voiceService.getUserPreferences(ctx.userId);
|
||||
|
||||
// Toggle voice on/off
|
||||
if (arg === 'an' || arg === 'on' || arg === 'ein') {
|
||||
this.voiceService.setVoiceEnabled(ctx.userId, true);
|
||||
return '🔊 Sprachantworten aktiviert.';
|
||||
}
|
||||
|
||||
if (arg === 'aus' || arg === 'off') {
|
||||
this.voiceService.setVoiceEnabled(ctx.userId, false);
|
||||
return '🔇 Sprachantworten deaktiviert.';
|
||||
}
|
||||
|
||||
// Show current settings
|
||||
const status = prefs.voiceEnabled ? '✅ Aktiviert' : '❌ Deaktiviert';
|
||||
|
||||
return `**🎤 Voice-Einstellungen**
|
||||
|
||||
**Status:** ${status}
|
||||
**Stimme:** ${prefs.voice}
|
||||
**Geschwindigkeit:** ${prefs.speed}x
|
||||
|
||||
**Befehle:**
|
||||
• \`!voice an\` / \`!voice aus\` - Aktivieren/Deaktivieren
|
||||
• \`!stimme [name]\` - Stimme wählen
|
||||
• \`!stimmen\` - Verfügbare Stimmen anzeigen`;
|
||||
}
|
||||
|
||||
/**
|
||||
* List available TTS voices
|
||||
*/
|
||||
async listVoices(ctx: CommandContext): Promise<string> {
|
||||
const voices = await this.voiceService.getVoices();
|
||||
const prefs = this.voiceService.getUserPreferences(ctx.userId);
|
||||
|
||||
if (Object.keys(voices).length === 0) {
|
||||
return '❌ Keine Stimmen verfügbar. Voice Service nicht erreichbar.';
|
||||
}
|
||||
|
||||
const voiceList = Object.entries(voices)
|
||||
.map(([id, desc]) => {
|
||||
const current = id === prefs.voice ? ' ✓' : '';
|
||||
return `• **${id}**${current}\n ${desc}`;
|
||||
})
|
||||
.join('\n');
|
||||
|
||||
return `**🗣️ Verfügbare Stimmen**
|
||||
|
||||
${voiceList}
|
||||
|
||||
*Wähle mit \`!stimme [name]\`*`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set TTS voice
|
||||
*/
|
||||
async setVoice(ctx: CommandContext, args: string): Promise<string> {
|
||||
const voiceName = args.trim();
|
||||
|
||||
if (!voiceName) {
|
||||
return '❌ Bitte gib einen Stimmnamen an. Siehe `!stimmen` für verfügbare Stimmen.';
|
||||
}
|
||||
|
||||
const voices = await this.voiceService.getVoices();
|
||||
|
||||
// Check if voice exists
|
||||
if (!voices[voiceName]) {
|
||||
// Try partial match
|
||||
const matches = Object.keys(voices).filter((v) =>
|
||||
v.toLowerCase().includes(voiceName.toLowerCase())
|
||||
);
|
||||
|
||||
if (matches.length === 1) {
|
||||
this.voiceService.setVoice(ctx.userId, matches[0]);
|
||||
return `✅ Stimme geändert zu **${matches[0]}**`;
|
||||
}
|
||||
|
||||
if (matches.length > 1) {
|
||||
return `❌ Mehrere Treffer: ${matches.join(', ')}\nBitte genauer angeben.`;
|
||||
}
|
||||
|
||||
return `❌ Stimme "${voiceName}" nicht gefunden. Siehe \`!stimmen\` für verfügbare Stimmen.`;
|
||||
}
|
||||
|
||||
this.voiceService.setVoice(ctx.userId, voiceName);
|
||||
return `✅ Stimme geändert zu **${voiceName}**`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check voice service health
|
||||
*/
|
||||
async checkHealth(): Promise<{ stt: boolean; tts: boolean }> {
|
||||
return this.voiceService.checkHealth();
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue