mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-17 14:49:39 +02:00
Replace 21 separate NestJS Matrix bot processes (~2.1 GB RAM, ~4.2 GB Docker images) with a single Go binary using plugin architecture (8.6 MB binary, ~30 MB RAM). New services: - services/mana-matrix-bot/ — Go Matrix bot with 21 plugins (mautrix-go, Redis sessions) - services/mana-api-gateway-go/ — Go API gateway (rate limiting, API keys, credit billing) Deleted: - 21 services/matrix-*-bot/ directories - packages/bot-services/ and packages/matrix-bot-common/ - Legacy deploy scripts and CI build jobs Updated: - docker-compose.macmini.yml: new Go services, legacy bots removed - CI/CD: change detection + build jobs for Go services - Root package.json: new dev:matrix, build:matrix, test:matrix scripts Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
253 lines
7.1 KiB
Go
253 lines
7.1 KiB
Go
package tts
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"strconv"
|
|
"sync"
|
|
|
|
"github.com/manacore/mana-matrix-bot/internal/plugin"
|
|
"github.com/manacore/mana-matrix-bot/internal/services"
|
|
)
|
|
|
|
func init() {
|
|
plugin.Register("tts", func() plugin.Plugin { return &TTSPlugin{} })
|
|
}
|
|
|
|
// UserSettings holds per-user TTS preferences.
|
|
type UserSettings struct {
|
|
Voice string
|
|
Speed float64
|
|
}
|
|
|
|
// TTSPlugin implements the Matrix text-to-speech bot.
|
|
type TTSPlugin struct {
|
|
voice *services.VoiceClient
|
|
router *plugin.CommandRouter
|
|
detector *plugin.KeywordDetector
|
|
maxLen int
|
|
|
|
mu sync.RWMutex
|
|
settings map[string]*UserSettings
|
|
}
|
|
|
|
func (p *TTSPlugin) Name() string { return "tts" }
|
|
|
|
func (p *TTSPlugin) Init(_ context.Context, cfg plugin.PluginConfig) error {
|
|
ttsURL := cfg.Extra["tts_url"]
|
|
if ttsURL == "" {
|
|
ttsURL = "http://localhost:3022"
|
|
}
|
|
|
|
p.voice = services.NewVoiceClient("", ttsURL)
|
|
p.settings = make(map[string]*UserSettings)
|
|
p.maxLen = 500
|
|
|
|
p.router = plugin.NewCommandRouter()
|
|
p.router.Handle("!help", p.cmdHelp)
|
|
p.router.Handle("!hilfe", p.cmdHelp)
|
|
p.router.Handle("!voice", p.cmdVoice)
|
|
p.router.Handle("!stimme", p.cmdVoice)
|
|
p.router.Handle("!voices", p.cmdVoices)
|
|
p.router.Handle("!stimmen", p.cmdVoices)
|
|
p.router.Handle("!speed", p.cmdSpeed)
|
|
p.router.Handle("!geschwindigkeit", p.cmdSpeed)
|
|
p.router.Handle("!status", p.cmdStatus)
|
|
|
|
p.detector = plugin.NewKeywordDetector(append(plugin.CommonKeywords,
|
|
plugin.KeywordCommand{Keywords: []string{"stimme", "stimme ändern"}, Command: "voice"},
|
|
plugin.KeywordCommand{Keywords: []string{"stimmen", "verfügbare stimmen"}, Command: "voices"},
|
|
plugin.KeywordCommand{Keywords: []string{"geschwindigkeit", "tempo"}, Command: "speed"},
|
|
))
|
|
|
|
slog.Info("tts plugin initialized", "url", ttsURL)
|
|
return nil
|
|
}
|
|
|
|
func (p *TTSPlugin) Commands() []plugin.CommandDef {
|
|
return []plugin.CommandDef{
|
|
{Patterns: []string{"!voice [name]", "!stimme"}, Description: "Stimme ändern", Category: "Einstellungen"},
|
|
{Patterns: []string{"!voices", "!stimmen"}, Description: "Verfügbare Stimmen", Category: "Einstellungen"},
|
|
{Patterns: []string{"!speed [0.5-2.0]"}, Description: "Geschwindigkeit", Category: "Einstellungen"},
|
|
{Patterns: []string{"!status"}, Description: "Aktuelle Einstellungen", Category: "System"},
|
|
}
|
|
}
|
|
|
|
func (p *TTSPlugin) HandleTextMessage(ctx context.Context, mc *plugin.MessageContext) error {
|
|
// Try command router first
|
|
matched, err := p.router.Route(mc)
|
|
if matched {
|
|
return err
|
|
}
|
|
|
|
// Try keywords
|
|
cmd := p.detector.Detect(mc.Body)
|
|
switch cmd {
|
|
case "help":
|
|
return p.cmdHelp(mc, "")
|
|
case "voice":
|
|
return p.cmdVoice(mc, "")
|
|
case "voices":
|
|
return p.cmdVoices(mc, "")
|
|
case "speed":
|
|
return p.cmdSpeed(mc, "")
|
|
}
|
|
|
|
// Default: treat as text to synthesize
|
|
return p.synthesize(mc, mc.Body)
|
|
}
|
|
|
|
// --- Synthesis ---
|
|
|
|
func (p *TTSPlugin) synthesize(mc *plugin.MessageContext, text string) error {
|
|
ctx := context.Background()
|
|
|
|
if len(text) > p.maxLen {
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID,
|
|
fmt.Sprintf("❌ Text zu lang (%d Zeichen). Maximum: %d Zeichen.", len(text), p.maxLen))
|
|
return nil
|
|
}
|
|
|
|
settings := p.getSettings(mc.Sender)
|
|
|
|
audioData, err := p.voice.Synthesize(ctx, text, settings.Voice)
|
|
if err != nil {
|
|
slog.Error("tts synthesis failed", "error", err)
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID, "❌ Sprachsynthese fehlgeschlagen.")
|
|
return nil
|
|
}
|
|
|
|
// Upload audio to Matrix
|
|
mxcURL, err := mc.Client.UploadMedia(ctx, audioData, "audio/wav", "speech.wav")
|
|
if err != nil {
|
|
slog.Error("upload audio failed", "error", err)
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID, "❌ Audio-Upload fehlgeschlagen.")
|
|
return nil
|
|
}
|
|
|
|
// Send audio message
|
|
_, err = mc.Client.SendAudio(ctx, mc.RoomID, mxcURL, "speech.wav", len(audioData))
|
|
if err != nil {
|
|
slog.Error("send audio failed", "error", err)
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID, "❌ Audio konnte nicht gesendet werden.")
|
|
return nil
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// --- Command Handlers ---
|
|
|
|
func (p *TTSPlugin) cmdVoice(mc *plugin.MessageContext, args string) error {
|
|
ctx := context.Background()
|
|
|
|
if args == "" {
|
|
settings := p.getSettings(mc.Sender)
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID,
|
|
fmt.Sprintf("**Aktuelle Stimme:** `%s`\n\n**Verwendung:** `!voice [name]`\n\nZeige alle: `!voices`", settings.Voice))
|
|
return nil
|
|
}
|
|
|
|
p.mu.Lock()
|
|
settings := p.getSettings(mc.Sender)
|
|
settings.Voice = args
|
|
p.settings[mc.Sender] = settings
|
|
p.mu.Unlock()
|
|
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID, fmt.Sprintf("✅ Stimme auf `%s` gesetzt.", args))
|
|
return nil
|
|
}
|
|
|
|
func (p *TTSPlugin) cmdVoices(mc *plugin.MessageContext, _ string) error {
|
|
ctx := context.Background()
|
|
|
|
// Known voices (static list — the TTS service auto-discovers)
|
|
help := `**Verfügbare Stimmen:**
|
|
|
|
**Kokoro (Englisch, schnell):**
|
|
• ` + "`af_heart`" + ` — Weiblich (Standard)
|
|
• ` + "`af_bella`" + ` — Weiblich
|
|
• ` + "`am_michael`" + ` — Männlich
|
|
• ` + "`bm_daniel`" + ` — Männlich
|
|
• ` + "`bf_emma`" + ` — Weiblich
|
|
|
|
**Piper (Deutsch, lokal):**
|
|
• ` + "`de_kerstin`" + ` — Deutsch Frau
|
|
• ` + "`de_thorsten`" + ` — Deutsch Mann
|
|
|
|
Wechseln mit: ` + "`!voice [name]`"
|
|
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID, help)
|
|
return nil
|
|
}
|
|
|
|
func (p *TTSPlugin) cmdSpeed(mc *plugin.MessageContext, args string) error {
|
|
ctx := context.Background()
|
|
|
|
if args == "" {
|
|
settings := p.getSettings(mc.Sender)
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID,
|
|
fmt.Sprintf("**Aktuelle Geschwindigkeit:** %.1fx\n\n**Verwendung:** `!speed [0.5-2.0]`", settings.Speed))
|
|
return nil
|
|
}
|
|
|
|
speed, err := strconv.ParseFloat(args, 64)
|
|
if err != nil || speed < 0.5 || speed > 2.0 {
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID, "❌ Geschwindigkeit muss zwischen 0.5 und 2.0 liegen.\n\nBeispiel: `!speed 1.2`")
|
|
return nil
|
|
}
|
|
|
|
p.mu.Lock()
|
|
settings := p.getSettings(mc.Sender)
|
|
settings.Speed = speed
|
|
p.settings[mc.Sender] = settings
|
|
p.mu.Unlock()
|
|
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID, fmt.Sprintf("✅ Geschwindigkeit auf %.1fx gesetzt.", speed))
|
|
return nil
|
|
}
|
|
|
|
func (p *TTSPlugin) cmdStatus(mc *plugin.MessageContext, _ string) error {
|
|
ctx := context.Background()
|
|
settings := p.getSettings(mc.Sender)
|
|
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID,
|
|
fmt.Sprintf("**Aktuelle Einstellungen:**\n\nStimme: `%s`\nGeschwindigkeit: %.1fx\nMax. Textlänge: %d Zeichen",
|
|
settings.Voice, settings.Speed, p.maxLen))
|
|
return nil
|
|
}
|
|
|
|
func (p *TTSPlugin) cmdHelp(mc *plugin.MessageContext, _ string) error {
|
|
ctx := context.Background()
|
|
help := `**🔊 TTS Bot - Text zu Sprache**
|
|
|
|
Sende eine **Textnachricht** und ich lese sie vor!
|
|
|
|
**Einstellungen:**
|
|
• ` + "`!voice af_heart`" + ` — Stimme wechseln
|
|
• ` + "`!voices`" + ` — Alle Stimmen anzeigen
|
|
• ` + "`!speed 1.2`" + ` — Geschwindigkeit (0.5-2.0)
|
|
• ` + "`!status`" + ` — Aktuelle Einstellungen
|
|
|
|
**Max. Textlänge:** 500 Zeichen`
|
|
|
|
mc.Client.SendReply(ctx, mc.RoomID, mc.EventID, help)
|
|
return nil
|
|
}
|
|
|
|
// --- Settings ---
|
|
|
|
func (p *TTSPlugin) getSettings(userID string) *UserSettings {
|
|
p.mu.RLock()
|
|
settings, ok := p.settings[userID]
|
|
p.mu.RUnlock()
|
|
|
|
if !ok {
|
|
return &UserSettings{
|
|
Voice: "af_heart",
|
|
Speed: 1.0,
|
|
}
|
|
}
|
|
return settings
|
|
}
|