mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-14 22:01:09 +02:00
feat(matrix-mana-bot): add smart voice formatting (Phase 3)
- Create VoiceFormatterService for intelligent speech formatting - Convert numbers to German words (eins, zwei, drei...) - Convert times to natural speech (10:00 → zehn Uhr, 14:30 → halb drei) - Convert dates to German format (15.02. → 15. Februar) - Format task metadata (!p1 → mit höchster Priorität, @heute → fällig heute) - Summarize long lists (top 3 + "und X weitere") - Convert numbered lists to ordinals (Erstens, Zweitens, Drittens) - Smart truncation at sentence boundaries - Remove inline prepareTextForSpeech(), use formatter service Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
7a2e037cd5
commit
e892e8db35
3 changed files with 378 additions and 67 deletions
|
|
@ -3,6 +3,7 @@ import { ConfigService } from '@nestjs/config';
|
|||
import { BaseMatrixService, MatrixBotConfig, MatrixRoomEvent } from '@manacore/matrix-bot-common';
|
||||
import { CommandRouterService, CommandContext } from './command-router.service';
|
||||
import { VoiceService } from '../voice/voice.service';
|
||||
import { VoiceFormatterService } from '../voice/voice-formatter.service';
|
||||
import { HELP_TEXT, WELCOME_TEXT, BOT_INTRODUCTION } from '../config/configuration';
|
||||
|
||||
@Injectable()
|
||||
|
|
@ -14,7 +15,8 @@ export class MatrixService extends BaseMatrixService {
|
|||
@Inject(forwardRef(() => CommandRouterService))
|
||||
private commandRouter: CommandRouterService,
|
||||
@Inject(forwardRef(() => VoiceService))
|
||||
private voiceService: VoiceService
|
||||
private voiceService: VoiceService,
|
||||
private voiceFormatter: VoiceFormatterService
|
||||
) {
|
||||
super(configService);
|
||||
this.voiceEnabled = configService.get('voice.enabled') !== false;
|
||||
|
|
@ -196,19 +198,15 @@ export class MatrixService extends BaseMatrixService {
|
|||
userId: string
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Prepare text for speech (remove markdown, emojis, etc.)
|
||||
const speechText = this.prepareTextForSpeech(text);
|
||||
// Format text for natural German speech
|
||||
const speechText = this.voiceFormatter.format(text);
|
||||
|
||||
// Skip if text is too short or empty
|
||||
if (!speechText || speechText.length < 5) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip if text is very long (summarize would be better)
|
||||
if (speechText.length > 800) {
|
||||
this.logger.debug(`Text too long for audio (${speechText.length} chars), skipping`);
|
||||
return;
|
||||
}
|
||||
this.logger.debug(`Formatted for speech: ${speechText.length} chars`);
|
||||
|
||||
// Generate audio
|
||||
const audioBuffer = await this.voiceService.synthesize(speechText, userId);
|
||||
|
|
@ -234,63 +232,6 @@ export class MatrixService extends BaseMatrixService {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare text for text-to-speech
|
||||
* Removes markdown formatting, excessive whitespace, and formats for natural speech
|
||||
*/
|
||||
private prepareTextForSpeech(text: string): string {
|
||||
let result = text;
|
||||
|
||||
// Remove code blocks
|
||||
result = result.replace(/```[\s\S]*?```/g, '');
|
||||
result = result.replace(/`[^`]+`/g, '');
|
||||
|
||||
// Remove markdown formatting
|
||||
result = result.replace(/\*\*(.+?)\*\*/g, '$1'); // Bold
|
||||
result = result.replace(/\*(.+?)\*/g, '$1'); // Italic
|
||||
result = result.replace(/~~(.+?)~~/g, '$1'); // Strikethrough
|
||||
result = result.replace(/^#+\s*/gm, ''); // Headers
|
||||
|
||||
// Remove common emojis (keep some for context)
|
||||
result = result.replace(/[📋📅⏱️🔮💡❌✅🎤🔊☀️💪🔔]/g, '');
|
||||
|
||||
// Convert bullet points to natural speech
|
||||
result = result.replace(/^[•\-]\s*/gm, '');
|
||||
|
||||
// Convert numbered lists
|
||||
result = result.replace(/^\d+\.\s*/gm, '');
|
||||
|
||||
// Clean up time formats for German speech
|
||||
result = result.replace(/(\d{1,2}):(\d{2})/g, (_, h, m) => {
|
||||
const hour = parseInt(h);
|
||||
const min = parseInt(m);
|
||||
if (min === 0) {
|
||||
return `${hour} Uhr`;
|
||||
} else if (min === 30) {
|
||||
return `halb ${hour + 1}`;
|
||||
} else if (min === 15) {
|
||||
return `viertel nach ${hour}`;
|
||||
} else if (min === 45) {
|
||||
return `viertel vor ${hour + 1}`;
|
||||
}
|
||||
return `${hour} Uhr ${min}`;
|
||||
});
|
||||
|
||||
// Clean up multiple newlines and spaces
|
||||
result = result.replace(/\n{2,}/g, '. ');
|
||||
result = result.replace(/\n/g, ' ');
|
||||
result = result.replace(/\s{2,}/g, ' ');
|
||||
|
||||
// Remove URLs
|
||||
result = result.replace(/https?:\/\/[^\s]+/g, '');
|
||||
|
||||
// Clean up punctuation
|
||||
result = result.replace(/\s+([.,!?])/g, '$1');
|
||||
result = result.replace(/([.,!?])\s*([.,!?])/g, '$1');
|
||||
|
||||
return result.trim();
|
||||
}
|
||||
|
||||
private async sendWelcomeMessage(roomId: string, userId: string) {
|
||||
try {
|
||||
await this.sendMessage(roomId, WELCOME_TEXT);
|
||||
|
|
|
|||
369
services/matrix-mana-bot/src/voice/voice-formatter.service.ts
Normal file
369
services/matrix-mana-bot/src/voice/voice-formatter.service.ts
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
import { Injectable } from '@nestjs/common';
|
||||
|
||||
/**
|
||||
* Formats text responses for natural German speech synthesis.
|
||||
* Converts markdown, numbers, times, lists etc. to spoken language.
|
||||
*/
|
||||
@Injectable()
|
||||
export class VoiceFormatterService {
|
||||
private readonly MAX_AUDIO_CHARS = 800;
|
||||
private readonly MAX_LIST_ITEMS = 3;
|
||||
|
||||
/**
|
||||
* Main entry point - formats text for TTS
|
||||
*/
|
||||
format(text: string): string {
|
||||
if (!text || text.trim().length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
let result = text;
|
||||
|
||||
// Remove code blocks first (they shouldn't be read)
|
||||
result = this.removeCodeBlocks(result);
|
||||
|
||||
// Handle lists before other formatting
|
||||
result = this.formatLists(result);
|
||||
|
||||
// Remove markdown formatting
|
||||
result = this.removeMarkdown(result);
|
||||
|
||||
// Convert task metadata (!p1, @heute, #projekt)
|
||||
result = this.formatTaskMetadata(result);
|
||||
|
||||
// Convert times to German speech
|
||||
result = this.formatTimes(result);
|
||||
|
||||
// Convert dates to German speech
|
||||
result = this.formatDates(result);
|
||||
|
||||
// Convert numbers to words for small numbers
|
||||
result = this.formatNumbers(result);
|
||||
|
||||
// Remove emojis
|
||||
result = this.removeEmojis(result);
|
||||
|
||||
// Remove URLs
|
||||
result = this.removeUrls(result);
|
||||
|
||||
// Clean up whitespace and punctuation
|
||||
result = this.cleanupText(result);
|
||||
|
||||
// Truncate if too long
|
||||
result = this.truncateIfNeeded(result);
|
||||
|
||||
return result.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Format for confirmations (short, friendly)
|
||||
*/
|
||||
formatConfirmation(action: string, item: string): string {
|
||||
return `Erledigt. ${action} ${item}.`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format for errors (clear, helpful)
|
||||
*/
|
||||
formatError(message: string): string {
|
||||
const cleanMessage = this.removeEmojis(message).trim();
|
||||
return `Es gab ein Problem: ${cleanMessage}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format for list summaries
|
||||
*/
|
||||
formatListSummary(items: string[], itemType: string): string {
|
||||
const count = items.length;
|
||||
|
||||
if (count === 0) {
|
||||
return `Du hast keine ${itemType}.`;
|
||||
}
|
||||
|
||||
if (count === 1) {
|
||||
return `Du hast eine ${itemType.replace(/n$/, '')}: ${items[0]}.`;
|
||||
}
|
||||
|
||||
if (count <= this.MAX_LIST_ITEMS) {
|
||||
const lastItem = items[items.length - 1];
|
||||
const otherItems = items.slice(0, -1).join(', ');
|
||||
return `Du hast ${this.numberToWord(count)} ${itemType}: ${otherItems} und ${lastItem}.`;
|
||||
}
|
||||
|
||||
// Summarize long lists
|
||||
const topItems = items.slice(0, this.MAX_LIST_ITEMS);
|
||||
const remaining = count - this.MAX_LIST_ITEMS;
|
||||
const topItemsText = topItems.join(', ');
|
||||
return `Du hast ${this.numberToWord(count)} ${itemType}. Die wichtigsten: ${topItemsText}. Und ${this.numberToWord(remaining)} weitere.`;
|
||||
}
|
||||
|
||||
// --- Private helper methods ---
|
||||
|
||||
private removeCodeBlocks(text: string): string {
|
||||
// Remove fenced code blocks
|
||||
let result = text.replace(/```[\s\S]*?```/g, '');
|
||||
// Remove inline code
|
||||
result = result.replace(/`[^`]+`/g, '');
|
||||
return result;
|
||||
}
|
||||
|
||||
private formatLists(text: string): string {
|
||||
// Find bullet point lists and format them
|
||||
const bulletListRegex = /(?:^[•\-\*]\s+.+$\n?)+/gm;
|
||||
let result = text.replace(bulletListRegex, (match) => {
|
||||
const items = match
|
||||
.split('\n')
|
||||
.map((line) => line.replace(/^[•\-\*]\s+/, '').trim())
|
||||
.filter((line) => line.length > 0);
|
||||
|
||||
if (items.length <= this.MAX_LIST_ITEMS) {
|
||||
return items.join('. ') + '. ';
|
||||
}
|
||||
|
||||
// Summarize long lists
|
||||
const topItems = items.slice(0, this.MAX_LIST_ITEMS);
|
||||
const remaining = items.length - this.MAX_LIST_ITEMS;
|
||||
return `${topItems.join('. ')}. Und ${this.numberToWord(remaining)} weitere. `;
|
||||
});
|
||||
|
||||
// Format numbered lists
|
||||
const numberedListRegex = /(?:^\d+\.\s+.+$\n?)+/gm;
|
||||
result = result.replace(numberedListRegex, (match) => {
|
||||
const items = match
|
||||
.split('\n')
|
||||
.map((line) => line.replace(/^\d+\.\s+/, '').trim())
|
||||
.filter((line) => line.length > 0);
|
||||
|
||||
if (items.length <= this.MAX_LIST_ITEMS) {
|
||||
return items.map((item, i) => `${this.ordinalWord(i + 1)}, ${item}`).join('. ') + '. ';
|
||||
}
|
||||
|
||||
const topItems = items.slice(0, this.MAX_LIST_ITEMS);
|
||||
const remaining = items.length - this.MAX_LIST_ITEMS;
|
||||
const formattedTop = topItems
|
||||
.map((item, i) => `${this.ordinalWord(i + 1)}, ${item}`)
|
||||
.join('. ');
|
||||
return `${formattedTop}. Und ${this.numberToWord(remaining)} weitere. `;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private removeMarkdown(text: string): string {
|
||||
let result = text;
|
||||
|
||||
// Bold
|
||||
result = result.replace(/\*\*(.+?)\*\*/g, '$1');
|
||||
// Italic
|
||||
result = result.replace(/\*(.+?)\*/g, '$1');
|
||||
// Strikethrough
|
||||
result = result.replace(/~~(.+?)~~/g, '$1');
|
||||
// Headers
|
||||
result = result.replace(/^#{1,6}\s*/gm, '');
|
||||
// Links [text](url)
|
||||
result = result.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
|
||||
// Block quotes
|
||||
result = result.replace(/^>\s*/gm, '');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private formatTaskMetadata(text: string): string {
|
||||
let result = text;
|
||||
|
||||
// Priority: !p1, !p2, !p3, !p4
|
||||
result = result.replace(/!p1\b/gi, 'mit höchster Priorität');
|
||||
result = result.replace(/!p2\b/gi, 'mit hoher Priorität');
|
||||
result = result.replace(/!p3\b/gi, 'mit normaler Priorität');
|
||||
result = result.replace(/!p4\b/gi, 'mit niedriger Priorität');
|
||||
|
||||
// Due dates: @heute, @morgen, @übermorgen
|
||||
result = result.replace(/@heute\b/gi, 'fällig heute');
|
||||
result = result.replace(/@morgen\b/gi, 'fällig morgen');
|
||||
result = result.replace(/@übermorgen\b/gi, 'fällig übermorgen');
|
||||
|
||||
// Projects: #projektname -> im Projekt "projektname"
|
||||
result = result.replace(/#(\w+)/g, 'im Projekt $1');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private formatTimes(text: string): string {
|
||||
// Convert 24h time format to German speech
|
||||
return text.replace(/(\d{1,2}):(\d{2})(?:\s*Uhr)?/g, (_, h, m) => {
|
||||
const hour = parseInt(h);
|
||||
const min = parseInt(m);
|
||||
|
||||
if (min === 0) {
|
||||
return `${this.numberToWord(hour)} Uhr`;
|
||||
} else if (min === 30) {
|
||||
return `halb ${this.numberToWord(hour + 1)}`;
|
||||
} else if (min === 15) {
|
||||
return `viertel nach ${this.numberToWord(hour)}`;
|
||||
} else if (min === 45) {
|
||||
return `viertel vor ${this.numberToWord(hour + 1)}`;
|
||||
}
|
||||
return `${this.numberToWord(hour)} Uhr ${this.numberToWord(min)}`;
|
||||
});
|
||||
}
|
||||
|
||||
private formatDates(text: string): string {
|
||||
let result = text;
|
||||
|
||||
// German date format: DD.MM. or DD.MM.YYYY
|
||||
result = result.replace(/(\d{1,2})\.(\d{1,2})\.(\d{4})?/g, (_, d, m, y) => {
|
||||
const day = parseInt(d);
|
||||
const month = parseInt(m);
|
||||
const monthNames = [
|
||||
'Januar',
|
||||
'Februar',
|
||||
'März',
|
||||
'April',
|
||||
'Mai',
|
||||
'Juni',
|
||||
'Juli',
|
||||
'August',
|
||||
'September',
|
||||
'Oktober',
|
||||
'November',
|
||||
'Dezember',
|
||||
];
|
||||
const monthName = monthNames[month - 1] || '';
|
||||
|
||||
if (y) {
|
||||
return `${day}. ${monthName} ${y}`;
|
||||
}
|
||||
return `${day}. ${monthName}`;
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private formatNumbers(text: string): string {
|
||||
// Only convert small standalone numbers (1-12) to words
|
||||
// Larger numbers are fine as digits for speech synthesis
|
||||
return text.replace(/\b(\d+)\b/g, (match, numStr) => {
|
||||
const num = parseInt(numStr);
|
||||
if (num >= 1 && num <= 12) {
|
||||
return this.numberToWord(num);
|
||||
}
|
||||
return match;
|
||||
});
|
||||
}
|
||||
|
||||
private numberToWord(n: number): string {
|
||||
const words = [
|
||||
'null',
|
||||
'eins',
|
||||
'zwei',
|
||||
'drei',
|
||||
'vier',
|
||||
'fünf',
|
||||
'sechs',
|
||||
'sieben',
|
||||
'acht',
|
||||
'neun',
|
||||
'zehn',
|
||||
'elf',
|
||||
'zwölf',
|
||||
'dreizehn',
|
||||
'vierzehn',
|
||||
'fünfzehn',
|
||||
'sechzehn',
|
||||
'siebzehn',
|
||||
'achtzehn',
|
||||
'neunzehn',
|
||||
'zwanzig',
|
||||
'einundzwanzig',
|
||||
'zweiundzwanzig',
|
||||
'dreiundzwanzig',
|
||||
'vierundzwanzig',
|
||||
];
|
||||
|
||||
if (n >= 0 && n < words.length) {
|
||||
return words[n];
|
||||
}
|
||||
return n.toString();
|
||||
}
|
||||
|
||||
private ordinalWord(n: number): string {
|
||||
const ordinals = [
|
||||
'',
|
||||
'Erstens',
|
||||
'Zweitens',
|
||||
'Drittens',
|
||||
'Viertens',
|
||||
'Fünftens',
|
||||
'Sechstens',
|
||||
'Siebtens',
|
||||
'Achtens',
|
||||
'Neuntens',
|
||||
'Zehntens',
|
||||
];
|
||||
|
||||
if (n >= 1 && n < ordinals.length) {
|
||||
return ordinals[n];
|
||||
}
|
||||
return `${n}.`;
|
||||
}
|
||||
|
||||
private removeEmojis(text: string): string {
|
||||
// Remove common emojis used in bot responses
|
||||
return text.replace(
|
||||
/[\u{1F300}-\u{1F9FF}]|[\u{2600}-\u{26FF}]|[\u{2700}-\u{27BF}]|[\u{1F600}-\u{1F64F}]|[\u{1F680}-\u{1F6FF}]|[✅❌⏱️📋📅🔮💡🎤🔊☀️💪🔔✓]/gu,
|
||||
''
|
||||
);
|
||||
}
|
||||
|
||||
private removeUrls(text: string): string {
|
||||
return text.replace(/https?:\/\/[^\s]+/g, '');
|
||||
}
|
||||
|
||||
private cleanupText(text: string): string {
|
||||
let result = text;
|
||||
|
||||
// Multiple newlines to single period
|
||||
result = result.replace(/\n{2,}/g, '. ');
|
||||
// Single newlines to space
|
||||
result = result.replace(/\n/g, ' ');
|
||||
// Multiple spaces to single
|
||||
result = result.replace(/\s{2,}/g, ' ');
|
||||
// Remove space before punctuation
|
||||
result = result.replace(/\s+([.,!?;:])/g, '$1');
|
||||
// Remove duplicate punctuation
|
||||
result = result.replace(/([.,!?;:])\s*([.,!?;:])/g, '$1');
|
||||
// Ensure space after punctuation
|
||||
result = result.replace(/([.,!?;:])([A-Za-zÄÖÜäöüß])/g, '$1 $2');
|
||||
// Remove trailing/leading punctuation from text
|
||||
result = result.replace(/^[.,!?;:\s]+/, '');
|
||||
result = result.replace(/[.,!?;:\s]+$/, '');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private truncateIfNeeded(text: string): string {
|
||||
if (text.length <= this.MAX_AUDIO_CHARS) {
|
||||
return text;
|
||||
}
|
||||
|
||||
// Try to truncate at sentence boundary
|
||||
const truncated = text.slice(0, this.MAX_AUDIO_CHARS);
|
||||
const lastSentenceEnd = Math.max(
|
||||
truncated.lastIndexOf('. '),
|
||||
truncated.lastIndexOf('! '),
|
||||
truncated.lastIndexOf('? ')
|
||||
);
|
||||
|
||||
if (lastSentenceEnd > this.MAX_AUDIO_CHARS * 0.5) {
|
||||
return truncated.slice(0, lastSentenceEnd + 1) + ' Und so weiter.';
|
||||
}
|
||||
|
||||
// Fallback: truncate at word boundary
|
||||
const lastSpace = truncated.lastIndexOf(' ');
|
||||
if (lastSpace > 0) {
|
||||
return truncated.slice(0, lastSpace) + '. Und so weiter.';
|
||||
}
|
||||
|
||||
return truncated + '. Und so weiter.';
|
||||
}
|
||||
}
|
||||
|
|
@ -1,8 +1,9 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { VoiceService } from './voice.service';
|
||||
import { VoiceFormatterService } from './voice-formatter.service';
|
||||
|
||||
@Module({
|
||||
providers: [VoiceService],
|
||||
exports: [VoiceService],
|
||||
providers: [VoiceService, VoiceFormatterService],
|
||||
exports: [VoiceService, VoiceFormatterService],
|
||||
})
|
||||
export class VoiceModule {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue