mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-18 17:41:23 +02:00
Add internationalization (DE + EN) to previously missing apps:
- todo: task management translations
- skilltree: skill/XP system translations
- nutriphi: nutrition tracking translations
- planta: plant care translations
- questions: research app translations
- matrix: chat client translations (layout integration)
Each app includes:
- svelte-i18n setup with SSR support
- localStorage persistence ({app}_locale pattern)
- i18n loading state in +layout.svelte
- German (default) and English translations
Updated CONSISTENCY_REPORT.md to mark i18n task as complete.
Also includes:
- mana-tts service placeholder files
187 lines
5.8 KiB
Python
187 lines
5.8 KiB
Python
"""
|
|
Kokoro TTS Service for fast preset voice synthesis.
|
|
Uses mlx-audio's Kokoro implementation optimized for Apple Silicon.
|
|
"""
|
|
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
import numpy as np
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Global singleton for lazy initialization
|
|
_kokoro_model = None
|
|
_kokoro_model_name = None
|
|
|
|
# Default model
|
|
DEFAULT_KOKORO_MODEL = "mlx-community/Kokoro-82M-bf16"
|
|
|
|
# Available Kokoro voices (American Female/Male, British Female/Male)
|
|
KOKORO_VOICES = {
|
|
# American Female voices
|
|
"af_heart": "American Female - Heart (warm, emotional)",
|
|
"af_alloy": "American Female - Alloy (neutral, professional)",
|
|
"af_aoede": "American Female - Aoede (clear, articulate)",
|
|
"af_bella": "American Female - Bella (friendly, approachable)",
|
|
"af_jessica": "American Female - Jessica (confident, clear)",
|
|
"af_kore": "American Female - Kore (calm, measured)",
|
|
"af_nicole": "American Female - Nicole (bright, energetic)",
|
|
"af_nova": "American Female - Nova (modern, dynamic)",
|
|
"af_river": "American Female - River (smooth, flowing)",
|
|
"af_sarah": "American Female - Sarah (warm, conversational)",
|
|
"af_sky": "American Female - Sky (light, airy)",
|
|
# American Male voices
|
|
"am_adam": "American Male - Adam (deep, authoritative)",
|
|
"am_echo": "American Male - Echo (resonant, clear)",
|
|
"am_eric": "American Male - Eric (professional, neutral)",
|
|
"am_fenrir": "American Male - Fenrir (strong, commanding)",
|
|
"am_liam": "American Male - Liam (friendly, casual)",
|
|
"am_michael": "American Male - Michael (warm, trustworthy)",
|
|
"am_onyx": "American Male - Onyx (deep, smooth)",
|
|
"am_puck": "American Male - Puck (playful, light)",
|
|
# British Female voices
|
|
"bf_alice": "British Female - Alice (refined, elegant)",
|
|
"bf_emma": "British Female - Emma (clear, professional)",
|
|
"bf_isabella": "British Female - Isabella (sophisticated, warm)",
|
|
"bf_lily": "British Female - Lily (soft, gentle)",
|
|
# British Male voices
|
|
"bm_daniel": "British Male - Daniel (classic, authoritative)",
|
|
"bm_fable": "British Male - Fable (storyteller, expressive)",
|
|
"bm_george": "British Male - George (traditional, clear)",
|
|
"bm_lewis": "British Male - Lewis (modern, approachable)",
|
|
}
|
|
|
|
DEFAULT_VOICE = "af_heart"
|
|
|
|
|
|
@dataclass
|
|
class KokoroResult:
|
|
"""Result from Kokoro TTS synthesis."""
|
|
|
|
audio: np.ndarray
|
|
sample_rate: int
|
|
voice: str
|
|
duration: float
|
|
|
|
|
|
def get_kokoro_model(model_name: str = DEFAULT_KOKORO_MODEL):
|
|
"""
|
|
Get or create Kokoro model instance (singleton pattern).
|
|
|
|
Args:
|
|
model_name: HuggingFace model identifier
|
|
|
|
Returns:
|
|
Kokoro model instance
|
|
"""
|
|
global _kokoro_model, _kokoro_model_name
|
|
|
|
# Return existing model if same model name
|
|
if _kokoro_model is not None and _kokoro_model_name == model_name:
|
|
return _kokoro_model
|
|
|
|
logger.info(f"Loading Kokoro model: {model_name}")
|
|
|
|
try:
|
|
from mlx_audio.tts import load
|
|
|
|
_kokoro_model = load(model_name)
|
|
_kokoro_model_name = model_name
|
|
logger.info("Kokoro model loaded successfully")
|
|
return _kokoro_model
|
|
|
|
except ImportError as e:
|
|
logger.error(f"Failed to import mlx_audio: {e}")
|
|
raise RuntimeError(
|
|
"mlx-audio not installed. Run: pip install mlx-audio"
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to load Kokoro model: {e}")
|
|
raise
|
|
|
|
|
|
def is_kokoro_loaded() -> bool:
|
|
"""Check if Kokoro model is currently loaded."""
|
|
return _kokoro_model is not None
|
|
|
|
|
|
def get_available_voices() -> dict[str, str]:
|
|
"""Get dictionary of available Kokoro voices."""
|
|
return KOKORO_VOICES.copy()
|
|
|
|
|
|
async def synthesize_kokoro(
|
|
text: str,
|
|
voice: str = DEFAULT_VOICE,
|
|
speed: float = 1.0,
|
|
model_name: str = DEFAULT_KOKORO_MODEL,
|
|
) -> KokoroResult:
|
|
"""
|
|
Synthesize speech using Kokoro TTS.
|
|
|
|
Args:
|
|
text: Text to synthesize
|
|
voice: Voice ID from KOKORO_VOICES
|
|
speed: Speech speed multiplier (0.5-2.0)
|
|
model_name: HuggingFace model identifier
|
|
|
|
Returns:
|
|
KokoroResult with audio data
|
|
"""
|
|
# Validate voice
|
|
if voice not in KOKORO_VOICES:
|
|
logger.warning(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'")
|
|
voice = DEFAULT_VOICE
|
|
|
|
# Clamp speed to valid range
|
|
speed = max(0.5, min(2.0, speed))
|
|
|
|
# Get model
|
|
model = get_kokoro_model(model_name)
|
|
|
|
logger.info(f"Synthesizing with Kokoro: voice={voice}, speed={speed}, text_length={len(text)}")
|
|
|
|
try:
|
|
# Generate audio using mlx-audio's generate method
|
|
# Returns a generator of GenerationResult objects
|
|
result_gen = model.generate(
|
|
text=text,
|
|
voice=voice,
|
|
speed=speed,
|
|
)
|
|
|
|
# Collect all audio chunks from the generator
|
|
audio_chunks = []
|
|
sample_rate = 24000 # Default, will be updated from result
|
|
|
|
for result in result_gen:
|
|
# Each result has audio, sample_rate, audio_duration (string)
|
|
sample_rate = result.sample_rate
|
|
|
|
# Convert MLX array to numpy
|
|
audio_np = np.array(result.audio, dtype=np.float32)
|
|
audio_chunks.append(audio_np)
|
|
|
|
# Concatenate all chunks
|
|
if audio_chunks:
|
|
full_audio = np.concatenate(audio_chunks)
|
|
else:
|
|
raise RuntimeError("No audio generated")
|
|
|
|
# Calculate duration from audio length
|
|
total_duration = len(full_audio) / sample_rate
|
|
|
|
logger.info(f"Kokoro synthesis complete: duration={total_duration:.2f}s")
|
|
|
|
return KokoroResult(
|
|
audio=full_audio,
|
|
sample_rate=sample_rate,
|
|
voice=voice,
|
|
duration=total_duration,
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Kokoro synthesis failed: {e}")
|
|
raise RuntimeError(f"TTS synthesis failed: {e}")
|