mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-23 07:26:43 +02:00
feat(matrix-mana-bot): add voice output/TTS support (Phase 2)
- Add VoiceHandler for voice commands (!voice, !stimme, !stimmen) - Extend MatrixService with TTS response generation - Add prepareTextForSpeech() for German natural speech formatting - Send audio responses non-blocking after text response - Register voice commands in command router Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
db07b5613d
commit
48dfcd180f
4 changed files with 247 additions and 4 deletions
|
|
@ -4,6 +4,7 @@ import { TodoHandler } from '../handlers/todo.handler';
|
|||
import { CalendarHandler } from '../handlers/calendar.handler';
|
||||
import { ClockHandler } from '../handlers/clock.handler';
|
||||
import { HelpHandler } from '../handlers/help.handler';
|
||||
import { VoiceHandler } from '../handlers/voice.handler';
|
||||
import { OrchestrationService } from '../orchestration/orchestration.service';
|
||||
|
||||
export interface CommandContext {
|
||||
|
|
@ -50,6 +51,8 @@ export class CommandRouterService {
|
|||
private clockHandler: ClockHandler,
|
||||
@Inject(forwardRef(() => HelpHandler))
|
||||
private helpHandler: HelpHandler,
|
||||
@Inject(forwardRef(() => VoiceHandler))
|
||||
private voiceHandler: VoiceHandler,
|
||||
@Inject(forwardRef(() => OrchestrationService))
|
||||
private orchestration: OrchestrationService
|
||||
) {
|
||||
|
|
@ -196,6 +199,23 @@ export class CommandRouterService {
|
|||
handler: (ctx) => this.helpHandler.showStatus(ctx),
|
||||
description: 'Show status',
|
||||
},
|
||||
|
||||
// Voice Commands
|
||||
{
|
||||
patterns: ['!voice', '!sprache'],
|
||||
handler: (ctx, args) => this.voiceHandler.voiceSettings(ctx, args),
|
||||
description: 'Voice settings',
|
||||
},
|
||||
{
|
||||
patterns: ['!stimmen', '!voices'],
|
||||
handler: (ctx) => this.voiceHandler.listVoices(ctx),
|
||||
description: 'List voices',
|
||||
},
|
||||
{
|
||||
patterns: ['!stimme'],
|
||||
handler: (ctx, args) => this.voiceHandler.setVoice(ctx, args),
|
||||
description: 'Set voice',
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ export class MatrixService extends BaseMatrixService {
|
|||
}
|
||||
|
||||
/**
|
||||
* Handle voice note messages - transcribe and process as text
|
||||
* Handle voice note messages - transcribe, process, and respond with audio
|
||||
*/
|
||||
protected async handleAudioMessage(
|
||||
roomId: string,
|
||||
|
|
@ -165,7 +165,16 @@ export class MatrixService extends BaseMatrixService {
|
|||
await this.client.setTyping(roomId, false);
|
||||
|
||||
if (response) {
|
||||
// Send text response first
|
||||
await this.sendReply(roomId, event, response);
|
||||
|
||||
// Then generate and send audio response (non-blocking)
|
||||
const prefs = this.voiceService.getUserPreferences(sender);
|
||||
if (prefs.voiceEnabled) {
|
||||
this.generateAndSendAudioResponse(roomId, response, sender).catch((err) =>
|
||||
this.logger.error(`Failed to send audio response: ${err}`)
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
await this.client.setTyping(roomId, false);
|
||||
|
|
@ -178,6 +187,110 @@ export class MatrixService extends BaseMatrixService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TTS audio and send as Matrix audio message
|
||||
*/
|
||||
private async generateAndSendAudioResponse(
|
||||
roomId: string,
|
||||
text: string,
|
||||
userId: string
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Prepare text for speech (remove markdown, emojis, etc.)
|
||||
const speechText = this.prepareTextForSpeech(text);
|
||||
|
||||
// Skip if text is too short or empty
|
||||
if (!speechText || speechText.length < 5) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip if text is very long (summarize would be better)
|
||||
if (speechText.length > 800) {
|
||||
this.logger.debug(`Text too long for audio (${speechText.length} chars), skipping`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate audio
|
||||
const audioBuffer = await this.voiceService.synthesize(speechText, userId);
|
||||
|
||||
// Upload to Matrix
|
||||
const mxcUrl = await this.uploadMedia(audioBuffer, 'audio/mpeg', 'response.mp3');
|
||||
|
||||
// Send audio message
|
||||
await this.client.sendMessage(roomId, {
|
||||
msgtype: 'm.audio',
|
||||
body: 'Sprachantwort',
|
||||
url: mxcUrl,
|
||||
info: {
|
||||
mimetype: 'audio/mpeg',
|
||||
size: audioBuffer.length,
|
||||
},
|
||||
});
|
||||
|
||||
this.logger.debug(`Sent audio response (${audioBuffer.length} bytes)`);
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to generate audio response: ${error}`);
|
||||
// Don't throw - audio is optional
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare text for text-to-speech
|
||||
* Removes markdown formatting, excessive whitespace, and formats for natural speech
|
||||
*/
|
||||
private prepareTextForSpeech(text: string): string {
|
||||
let result = text;
|
||||
|
||||
// Remove code blocks
|
||||
result = result.replace(/```[\s\S]*?```/g, '');
|
||||
result = result.replace(/`[^`]+`/g, '');
|
||||
|
||||
// Remove markdown formatting
|
||||
result = result.replace(/\*\*(.+?)\*\*/g, '$1'); // Bold
|
||||
result = result.replace(/\*(.+?)\*/g, '$1'); // Italic
|
||||
result = result.replace(/~~(.+?)~~/g, '$1'); // Strikethrough
|
||||
result = result.replace(/^#+\s*/gm, ''); // Headers
|
||||
|
||||
// Remove common emojis (keep some for context)
|
||||
result = result.replace(/[📋📅⏱️🔮💡❌✅🎤🔊☀️💪🔔]/g, '');
|
||||
|
||||
// Convert bullet points to natural speech
|
||||
result = result.replace(/^[•\-]\s*/gm, '');
|
||||
|
||||
// Convert numbered lists
|
||||
result = result.replace(/^\d+\.\s*/gm, '');
|
||||
|
||||
// Clean up time formats for German speech
|
||||
result = result.replace(/(\d{1,2}):(\d{2})/g, (_, h, m) => {
|
||||
const hour = parseInt(h);
|
||||
const min = parseInt(m);
|
||||
if (min === 0) {
|
||||
return `${hour} Uhr`;
|
||||
} else if (min === 30) {
|
||||
return `halb ${hour + 1}`;
|
||||
} else if (min === 15) {
|
||||
return `viertel nach ${hour}`;
|
||||
} else if (min === 45) {
|
||||
return `viertel vor ${hour + 1}`;
|
||||
}
|
||||
return `${hour} Uhr ${min}`;
|
||||
});
|
||||
|
||||
// Clean up multiple newlines and spaces
|
||||
result = result.replace(/\n{2,}/g, '. ');
|
||||
result = result.replace(/\n/g, ' ');
|
||||
result = result.replace(/\s{2,}/g, ' ');
|
||||
|
||||
// Remove URLs
|
||||
result = result.replace(/https?:\/\/[^\s]+/g, '');
|
||||
|
||||
// Clean up punctuation
|
||||
result = result.replace(/\s+([.,!?])/g, '$1');
|
||||
result = result.replace(/([.,!?])\s*([.,!?])/g, '$1');
|
||||
|
||||
return result.trim();
|
||||
}
|
||||
|
||||
private async sendWelcomeMessage(roomId: string, userId: string) {
|
||||
try {
|
||||
await this.sendMessage(roomId, WELCOME_TEXT);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue