managarten/services/mana-matrix-bot/internal/services/voice.go
Till JS 819568c3df feat(infra): consolidate 21 Matrix bots into Go binary + add Go API gateway
Replace 21 separate NestJS Matrix bot processes (~2.1 GB RAM, ~4.2 GB Docker images)
with a single Go binary using plugin architecture (8.6 MB binary, ~30 MB RAM).

New services:
- services/mana-matrix-bot/ — Go Matrix bot with 21 plugins (mautrix-go, Redis sessions)
- services/mana-api-gateway-go/ — Go API gateway (rate limiting, API keys, credit billing)

Deleted:
- 21 services/matrix-*-bot/ directories
- packages/bot-services/ and packages/matrix-bot-common/
- Legacy deploy scripts and CI build jobs

Updated:
- docker-compose.macmini.yml: new Go services, legacy bots removed
- CI/CD: change detection + build jobs for Go services
- Root package.json: new dev:matrix, build:matrix, test:matrix scripts

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 21:03:00 +01:00

118 lines
2.9 KiB
Go

package services
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"mime/multipart"
"net/http"
"time"
)
// VoiceClient handles STT and TTS requests to the Python services.
type VoiceClient struct {
sttURL string
ttsURL string
httpClient *http.Client
}
// TranscriptionResult holds the STT response.
type TranscriptionResult struct {
Text string `json:"text"`
Language string `json:"language"`
Duration float64 `json:"duration"`
}
// NewVoiceClient creates a new voice service client.
func NewVoiceClient(sttURL, ttsURL string) *VoiceClient {
return &VoiceClient{
sttURL: sttURL,
ttsURL: ttsURL,
httpClient: &http.Client{
Timeout: 120 * time.Second, // STT/TTS can be slow
},
}
}
// Transcribe sends audio data to the STT service and returns the transcription.
func (c *VoiceClient) Transcribe(ctx context.Context, audioData []byte, language string) (*TranscriptionResult, error) {
var buf bytes.Buffer
writer := multipart.NewWriter(&buf)
part, err := writer.CreateFormFile("file", "audio.ogg")
if err != nil {
return nil, fmt.Errorf("create form file: %w", err)
}
if _, err := part.Write(audioData); err != nil {
return nil, fmt.Errorf("write audio: %w", err)
}
if language != "" {
if err := writer.WriteField("language", language); err != nil {
return nil, fmt.Errorf("write language: %w", err)
}
}
if err := writer.Close(); err != nil {
return nil, fmt.Errorf("close writer: %w", err)
}
req, err := http.NewRequestWithContext(ctx, "POST", c.sttURL+"/transcribe", &buf)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", writer.FormDataContentType())
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("STT request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("STT error %d: %s", resp.StatusCode, string(body))
}
var result TranscriptionResult
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("decode STT response: %w", err)
}
return &result, nil
}
// Synthesize sends text to the TTS service and returns audio data.
func (c *VoiceClient) Synthesize(ctx context.Context, text string, voice string) ([]byte, error) {
payload := map[string]any{
"text": text,
}
if voice != "" {
payload["voice"] = voice
}
data, err := json.Marshal(payload)
if err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, "POST", c.ttsURL+"/synthesize", bytes.NewReader(data))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
resp, err := c.httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("TTS request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("TTS error %d: %s", resp.StatusCode, string(body))
}
return io.ReadAll(resp.Body)
}