From 5a0815708c5799f1e538024b49ccab935e9b509e Mon Sep 17 00:00:00 2001 From: Till-JS <101404291+Till-JS@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:47:58 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=8C=90=20feat:=20add=20i18n=20support=20t?= =?UTF-8?q?o=206=20web=20apps?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add internationalization (DE + EN) to previously missing apps: - todo: task management translations - skilltree: skill/XP system translations - nutriphi: nutrition tracking translations - planta: plant care translations - questions: research app translations - matrix: chat client translations (layout integration) Each app includes: - svelte-i18n setup with SSR support - localStorage persistence ({app}_locale pattern) - i18n loading state in +layout.svelte - German (default) and English translations Updated CONSISTENCY_REPORT.md to mark i18n task as complete. Also includes: - mana-tts service placeholder files --- .../matrix/apps/web/src/routes/+layout.svelte | 4 +- apps/nutriphi/apps/web/src/lib/i18n/index.ts | 49 ++ .../apps/web/src/lib/i18n/locales/de.json | 89 +++ .../apps/web/src/lib/i18n/locales/en.json | 89 +++ .../apps/web/src/routes/+layout.svelte | 12 +- apps/planta/apps/web/src/lib/i18n/index.ts | 49 ++ .../apps/web/src/lib/i18n/locales/de.json | 87 +++ .../apps/web/src/lib/i18n/locales/en.json | 87 +++ .../planta/apps/web/src/routes/+layout.svelte | 7 +- apps/questions/apps/web/src/lib/i18n/index.ts | 49 ++ .../apps/web/src/lib/i18n/locales/de.json | 96 +++ .../apps/web/src/lib/i18n/locales/en.json | 96 +++ .../apps/web/src/routes/+layout.svelte | 5 +- apps/skilltree/apps/web/package.json | 1 + apps/skilltree/apps/web/src/lib/i18n/index.ts | 49 ++ .../apps/web/src/lib/i18n/locales/de.json | 83 +++ .../apps/web/src/lib/i18n/locales/en.json | 83 +++ .../apps/web/src/routes/+layout.svelte | 9 +- apps/todo/apps/web/src/lib/i18n/index.ts | 49 ++ .../apps/web/src/lib/i18n/locales/de.json | 104 +++ .../apps/web/src/lib/i18n/locales/en.json | 104 +++ apps/todo/apps/web/src/routes/+layout.svelte | 5 +- docs/CONSISTENCY_REPORT.md | 90 ++- scripts/mac-mini/setup-tts.sh | 172 +++++ services/mana-tts/CLAUDE.md | 100 +++ services/mana-tts/README.md | 237 +++++++ services/mana-tts/app/__init__.py | 0 services/mana-tts/app/audio_utils.py | 224 +++++++ services/mana-tts/app/f5_service.py | 208 ++++++ services/mana-tts/app/kokoro_service.py | 187 ++++++ services/mana-tts/app/main.py | 625 ++++++++++++++++++ services/mana-tts/app/voice_manager.py | 275 ++++++++ services/mana-tts/requirements.txt | 22 + services/mana-tts/setup.sh | 150 +++++ services/mana-tts/voices/.gitkeep | 0 35 files changed, 3440 insertions(+), 56 deletions(-) create mode 100644 apps/nutriphi/apps/web/src/lib/i18n/index.ts create mode 100644 apps/nutriphi/apps/web/src/lib/i18n/locales/de.json create mode 100644 apps/nutriphi/apps/web/src/lib/i18n/locales/en.json create mode 100644 apps/planta/apps/web/src/lib/i18n/index.ts create mode 100644 apps/planta/apps/web/src/lib/i18n/locales/de.json create mode 100644 apps/planta/apps/web/src/lib/i18n/locales/en.json create mode 100644 apps/questions/apps/web/src/lib/i18n/index.ts create mode 100644 apps/questions/apps/web/src/lib/i18n/locales/de.json create mode 100644 apps/questions/apps/web/src/lib/i18n/locales/en.json create mode 100644 apps/skilltree/apps/web/src/lib/i18n/index.ts create mode 100644 apps/skilltree/apps/web/src/lib/i18n/locales/de.json create mode 100644 apps/skilltree/apps/web/src/lib/i18n/locales/en.json create mode 100644 apps/todo/apps/web/src/lib/i18n/index.ts create mode 100644 apps/todo/apps/web/src/lib/i18n/locales/de.json create mode 100644 apps/todo/apps/web/src/lib/i18n/locales/en.json create mode 100755 scripts/mac-mini/setup-tts.sh create mode 100644 services/mana-tts/CLAUDE.md create mode 100644 services/mana-tts/README.md create mode 100644 services/mana-tts/app/__init__.py create mode 100644 services/mana-tts/app/audio_utils.py create mode 100644 services/mana-tts/app/f5_service.py create mode 100644 services/mana-tts/app/kokoro_service.py create mode 100644 services/mana-tts/app/main.py create mode 100644 services/mana-tts/app/voice_manager.py create mode 100644 services/mana-tts/requirements.txt create mode 100755 services/mana-tts/setup.sh create mode 100644 services/mana-tts/voices/.gitkeep diff --git a/apps/matrix/apps/web/src/routes/+layout.svelte b/apps/matrix/apps/web/src/routes/+layout.svelte index 8927b04cc..44996b54e 100644 --- a/apps/matrix/apps/web/src/routes/+layout.svelte +++ b/apps/matrix/apps/web/src/routes/+layout.svelte @@ -1,7 +1,9 @@ - Mana Matrix + {$t('app.name')} diff --git a/apps/nutriphi/apps/web/src/lib/i18n/index.ts b/apps/nutriphi/apps/web/src/lib/i18n/index.ts new file mode 100644 index 000000000..7a7186976 --- /dev/null +++ b/apps/nutriphi/apps/web/src/lib/i18n/index.ts @@ -0,0 +1,49 @@ +import { browser } from '$app/environment'; +import { init, register, locale, waitLocale } from 'svelte-i18n'; + +// List of supported locales +export const supportedLocales = ['de', 'en'] as const; +export type SupportedLocale = (typeof supportedLocales)[number]; + +// Default locale +const defaultLocale = 'de'; + +// Register all available locales +register('de', () => import('./locales/de.json')); +register('en', () => import('./locales/en.json')); + +// Get initial locale from browser or localStorage +function getInitialLocale(): SupportedLocale { + if (browser) { + // Check localStorage first + const stored = localStorage.getItem('nutriphi_locale'); + if (stored && supportedLocales.includes(stored as SupportedLocale)) { + return stored as SupportedLocale; + } + + // Fall back to browser language + const browserLang = navigator.language.split('-')[0]; + if (supportedLocales.includes(browserLang as SupportedLocale)) { + return browserLang as SupportedLocale; + } + } + + return defaultLocale; +} + +// Initialize i18n at module scope (required for SSR) +init({ + fallbackLocale: defaultLocale, + initialLocale: getInitialLocale(), +}); + +// Set locale and persist to localStorage +export function setLocale(newLocale: SupportedLocale) { + locale.set(newLocale); + if (browser) { + localStorage.setItem('nutriphi_locale', newLocale); + } +} + +// Wait for locale to be loaded (useful for SSR) +export { waitLocale }; diff --git a/apps/nutriphi/apps/web/src/lib/i18n/locales/de.json b/apps/nutriphi/apps/web/src/lib/i18n/locales/de.json new file mode 100644 index 000000000..2a9a3503c --- /dev/null +++ b/apps/nutriphi/apps/web/src/lib/i18n/locales/de.json @@ -0,0 +1,89 @@ +{ + "app": { + "name": "NutriPhi", + "loading": "Laden...", + "tagline": "Ernährung verstehen" + }, + "nav": { + "dashboard": "Dashboard", + "meals": "Mahlzeiten", + "goals": "Ziele", + "favorites": "Favoriten", + "stats": "Statistiken", + "settings": "Einstellungen" + }, + "meal": { + "add": "Mahlzeit hinzufügen", + "edit": "Mahlzeit bearbeiten", + "delete": "Mahlzeit löschen", + "photo": "Foto aufnehmen", + "text": "Beschreiben", + "analyzing": "Analysiere...", + "noMeals": "Noch keine Mahlzeiten", + "breakfast": "Frühstück", + "lunch": "Mittagessen", + "dinner": "Abendessen", + "snack": "Snack" + }, + "nutrition": { + "calories": "Kalorien", + "protein": "Protein", + "carbs": "Kohlenhydrate", + "fat": "Fett", + "fiber": "Ballaststoffe", + "sugar": "Zucker", + "kcal": "kcal", + "grams": "g" + }, + "goals": { + "daily": "Tagesziele", + "setGoals": "Ziele setzen", + "calories": "Kalorien-Ziel", + "protein": "Protein-Ziel", + "carbs": "Kohlenhydrate-Ziel", + "fat": "Fett-Ziel", + "progress": "Fortschritt" + }, + "stats": { + "today": "Heute", + "week": "Diese Woche", + "remaining": "Verbleibend", + "consumed": "Verzehrt", + "average": "Durchschnitt" + }, + "favorites": { + "add": "Zu Favoriten", + "remove": "Aus Favoriten entfernen", + "noFavorites": "Keine Favoriten", + "useAgain": "Erneut verwenden" + }, + "auth": { + "login": "Anmelden", + "logout": "Abmelden", + "register": "Registrieren" + }, + "common": { + "save": "Speichern", + "cancel": "Abbrechen", + "delete": "Löschen", + "edit": "Bearbeiten", + "add": "Hinzufügen", + "close": "Schließen", + "search": "Suchen", + "error": "Fehler", + "success": "Erfolgreich", + "loading": "Laden..." + }, + "errors": { + "loadMeals": "Mahlzeiten konnten nicht geladen werden", + "analyzeFailed": "Analyse fehlgeschlagen", + "saveFailed": "Speichern fehlgeschlagen", + "loadGoals": "Ziele konnten nicht geladen werden" + }, + "success": { + "mealAdded": "Mahlzeit hinzugefügt", + "mealDeleted": "Mahlzeit gelöscht", + "goalsSaved": "Ziele gespeichert", + "favoriteAdded": "Zu Favoriten hinzugefügt" + } +} diff --git a/apps/nutriphi/apps/web/src/lib/i18n/locales/en.json b/apps/nutriphi/apps/web/src/lib/i18n/locales/en.json new file mode 100644 index 000000000..237785dbe --- /dev/null +++ b/apps/nutriphi/apps/web/src/lib/i18n/locales/en.json @@ -0,0 +1,89 @@ +{ + "app": { + "name": "NutriPhi", + "loading": "Loading...", + "tagline": "Understand nutrition" + }, + "nav": { + "dashboard": "Dashboard", + "meals": "Meals", + "goals": "Goals", + "favorites": "Favorites", + "stats": "Statistics", + "settings": "Settings" + }, + "meal": { + "add": "Add meal", + "edit": "Edit meal", + "delete": "Delete meal", + "photo": "Take photo", + "text": "Describe", + "analyzing": "Analyzing...", + "noMeals": "No meals yet", + "breakfast": "Breakfast", + "lunch": "Lunch", + "dinner": "Dinner", + "snack": "Snack" + }, + "nutrition": { + "calories": "Calories", + "protein": "Protein", + "carbs": "Carbohydrates", + "fat": "Fat", + "fiber": "Fiber", + "sugar": "Sugar", + "kcal": "kcal", + "grams": "g" + }, + "goals": { + "daily": "Daily goals", + "setGoals": "Set goals", + "calories": "Calorie goal", + "protein": "Protein goal", + "carbs": "Carbohydrate goal", + "fat": "Fat goal", + "progress": "Progress" + }, + "stats": { + "today": "Today", + "week": "This week", + "remaining": "Remaining", + "consumed": "Consumed", + "average": "Average" + }, + "favorites": { + "add": "Add to favorites", + "remove": "Remove from favorites", + "noFavorites": "No favorites", + "useAgain": "Use again" + }, + "auth": { + "login": "Login", + "logout": "Logout", + "register": "Register" + }, + "common": { + "save": "Save", + "cancel": "Cancel", + "delete": "Delete", + "edit": "Edit", + "add": "Add", + "close": "Close", + "search": "Search", + "error": "Error", + "success": "Success", + "loading": "Loading..." + }, + "errors": { + "loadMeals": "Failed to load meals", + "analyzeFailed": "Analysis failed", + "saveFailed": "Failed to save", + "loadGoals": "Failed to load goals" + }, + "success": { + "mealAdded": "Meal added", + "mealDeleted": "Meal deleted", + "goalsSaved": "Goals saved", + "favoriteAdded": "Added to favorites" + } +} diff --git a/apps/nutriphi/apps/web/src/routes/+layout.svelte b/apps/nutriphi/apps/web/src/routes/+layout.svelte index 3d96339b6..9660c3bf1 100644 --- a/apps/nutriphi/apps/web/src/routes/+layout.svelte +++ b/apps/nutriphi/apps/web/src/routes/+layout.svelte @@ -1,5 +1,7 @@ - NutriPhi - Ernährung verstehen + {$t('app.name')} - {$t('app.tagline')} -{@render children()} +{#if $i18nLoading} +
+

{$t('app.loading')}

+
+{:else} + {@render children()} +{/if} diff --git a/apps/planta/apps/web/src/lib/i18n/index.ts b/apps/planta/apps/web/src/lib/i18n/index.ts new file mode 100644 index 000000000..5ede2bc4e --- /dev/null +++ b/apps/planta/apps/web/src/lib/i18n/index.ts @@ -0,0 +1,49 @@ +import { browser } from '$app/environment'; +import { init, register, locale, waitLocale } from 'svelte-i18n'; + +// List of supported locales +export const supportedLocales = ['de', 'en'] as const; +export type SupportedLocale = (typeof supportedLocales)[number]; + +// Default locale +const defaultLocale = 'de'; + +// Register all available locales +register('de', () => import('./locales/de.json')); +register('en', () => import('./locales/en.json')); + +// Get initial locale from browser or localStorage +function getInitialLocale(): SupportedLocale { + if (browser) { + // Check localStorage first + const stored = localStorage.getItem('planta_locale'); + if (stored && supportedLocales.includes(stored as SupportedLocale)) { + return stored as SupportedLocale; + } + + // Fall back to browser language + const browserLang = navigator.language.split('-')[0]; + if (supportedLocales.includes(browserLang as SupportedLocale)) { + return browserLang as SupportedLocale; + } + } + + return defaultLocale; +} + +// Initialize i18n at module scope (required for SSR) +init({ + fallbackLocale: defaultLocale, + initialLocale: getInitialLocale(), +}); + +// Set locale and persist to localStorage +export function setLocale(newLocale: SupportedLocale) { + locale.set(newLocale); + if (browser) { + localStorage.setItem('planta_locale', newLocale); + } +} + +// Wait for locale to be loaded (useful for SSR) +export { waitLocale }; diff --git a/apps/planta/apps/web/src/lib/i18n/locales/de.json b/apps/planta/apps/web/src/lib/i18n/locales/de.json new file mode 100644 index 000000000..32b68bec5 --- /dev/null +++ b/apps/planta/apps/web/src/lib/i18n/locales/de.json @@ -0,0 +1,87 @@ +{ + "app": { + "name": "Planta", + "loading": "Laden...", + "tagline": "Pflanzenpflege leicht gemacht" + }, + "nav": { + "plants": "Pflanzen", + "watering": "Gießen", + "identify": "Identifizieren", + "settings": "Einstellungen" + }, + "plant": { + "add": "Pflanze hinzufügen", + "edit": "Pflanze bearbeiten", + "delete": "Pflanze löschen", + "name": "Name", + "species": "Art", + "location": "Standort", + "noPlants": "Noch keine Pflanzen", + "addFirst": "Füge deine erste Pflanze hinzu", + "careNotes": "Pflegehinweise", + "health": "Gesundheit" + }, + "health": { + "healthy": "Gesund", + "needsAttention": "Braucht Aufmerksamkeit", + "sick": "Krank" + }, + "watering": { + "water": "Gießen", + "watered": "Gegossen", + "lastWatered": "Zuletzt gegossen", + "nextWatering": "Nächstes Gießen", + "daysUntil": "in {days} Tagen", + "overdue": "Überfällig", + "today": "Heute gießen", + "noWatering": "Keine Pflanzen zum Gießen" + }, + "identify": { + "takePhoto": "Foto aufnehmen", + "analyzing": "Analysiere...", + "identified": "Identifiziert", + "confidence": "Sicherheit", + "tips": "Pflegetipps" + }, + "light": { + "low": "Wenig Licht", + "medium": "Mittleres Licht", + "bright": "Helles Licht", + "direct": "Direktes Sonnenlicht" + }, + "humidity": { + "low": "Niedrig", + "medium": "Mittel", + "high": "Hoch" + }, + "auth": { + "login": "Anmelden", + "logout": "Abmelden", + "register": "Registrieren" + }, + "common": { + "save": "Speichern", + "cancel": "Abbrechen", + "delete": "Löschen", + "edit": "Bearbeiten", + "add": "Hinzufügen", + "close": "Schließen", + "search": "Suchen", + "error": "Fehler", + "success": "Erfolgreich", + "loading": "Laden..." + }, + "errors": { + "loadPlants": "Pflanzen konnten nicht geladen werden", + "identifyFailed": "Identifizierung fehlgeschlagen", + "saveFailed": "Speichern fehlgeschlagen", + "uploadFailed": "Upload fehlgeschlagen" + }, + "success": { + "plantAdded": "Pflanze hinzugefügt", + "plantDeleted": "Pflanze gelöscht", + "plantWatered": "Pflanze gegossen", + "photoUploaded": "Foto hochgeladen" + } +} diff --git a/apps/planta/apps/web/src/lib/i18n/locales/en.json b/apps/planta/apps/web/src/lib/i18n/locales/en.json new file mode 100644 index 000000000..965b08da6 --- /dev/null +++ b/apps/planta/apps/web/src/lib/i18n/locales/en.json @@ -0,0 +1,87 @@ +{ + "app": { + "name": "Planta", + "loading": "Loading...", + "tagline": "Plant care made easy" + }, + "nav": { + "plants": "Plants", + "watering": "Watering", + "identify": "Identify", + "settings": "Settings" + }, + "plant": { + "add": "Add plant", + "edit": "Edit plant", + "delete": "Delete plant", + "name": "Name", + "species": "Species", + "location": "Location", + "noPlants": "No plants yet", + "addFirst": "Add your first plant", + "careNotes": "Care notes", + "health": "Health" + }, + "health": { + "healthy": "Healthy", + "needsAttention": "Needs attention", + "sick": "Sick" + }, + "watering": { + "water": "Water", + "watered": "Watered", + "lastWatered": "Last watered", + "nextWatering": "Next watering", + "daysUntil": "in {days} days", + "overdue": "Overdue", + "today": "Water today", + "noWatering": "No plants to water" + }, + "identify": { + "takePhoto": "Take photo", + "analyzing": "Analyzing...", + "identified": "Identified", + "confidence": "Confidence", + "tips": "Care tips" + }, + "light": { + "low": "Low light", + "medium": "Medium light", + "bright": "Bright light", + "direct": "Direct sunlight" + }, + "humidity": { + "low": "Low", + "medium": "Medium", + "high": "High" + }, + "auth": { + "login": "Login", + "logout": "Logout", + "register": "Register" + }, + "common": { + "save": "Save", + "cancel": "Cancel", + "delete": "Delete", + "edit": "Edit", + "add": "Add", + "close": "Close", + "search": "Search", + "error": "Error", + "success": "Success", + "loading": "Loading..." + }, + "errors": { + "loadPlants": "Failed to load plants", + "identifyFailed": "Identification failed", + "saveFailed": "Failed to save", + "uploadFailed": "Upload failed" + }, + "success": { + "plantAdded": "Plant added", + "plantDeleted": "Plant deleted", + "plantWatered": "Plant watered", + "photoUploaded": "Photo uploaded" + } +} diff --git a/apps/planta/apps/web/src/routes/+layout.svelte b/apps/planta/apps/web/src/routes/+layout.svelte index 1d97fe43e..ca76de25c 100644 --- a/apps/planta/apps/web/src/routes/+layout.svelte +++ b/apps/planta/apps/web/src/routes/+layout.svelte @@ -1,12 +1,15 @@ -{#if loading} +{#if !appReady}
-

Laden...

+

{$t('common.loading')}

{:else} diff --git a/apps/questions/apps/web/src/lib/i18n/index.ts b/apps/questions/apps/web/src/lib/i18n/index.ts new file mode 100644 index 000000000..6510856ca --- /dev/null +++ b/apps/questions/apps/web/src/lib/i18n/index.ts @@ -0,0 +1,49 @@ +import { browser } from '$app/environment'; +import { init, register, locale, waitLocale } from 'svelte-i18n'; + +// List of supported locales +export const supportedLocales = ['de', 'en'] as const; +export type SupportedLocale = (typeof supportedLocales)[number]; + +// Default locale +const defaultLocale = 'de'; + +// Register all available locales +register('de', () => import('./locales/de.json')); +register('en', () => import('./locales/en.json')); + +// Get initial locale from browser or localStorage +function getInitialLocale(): SupportedLocale { + if (browser) { + // Check localStorage first + const stored = localStorage.getItem('questions_locale'); + if (stored && supportedLocales.includes(stored as SupportedLocale)) { + return stored as SupportedLocale; + } + + // Fall back to browser language + const browserLang = navigator.language.split('-')[0]; + if (supportedLocales.includes(browserLang as SupportedLocale)) { + return browserLang as SupportedLocale; + } + } + + return defaultLocale; +} + +// Initialize i18n at module scope (required for SSR) +init({ + fallbackLocale: defaultLocale, + initialLocale: getInitialLocale(), +}); + +// Set locale and persist to localStorage +export function setLocale(newLocale: SupportedLocale) { + locale.set(newLocale); + if (browser) { + localStorage.setItem('questions_locale', newLocale); + } +} + +// Wait for locale to be loaded (useful for SSR) +export { waitLocale }; diff --git a/apps/questions/apps/web/src/lib/i18n/locales/de.json b/apps/questions/apps/web/src/lib/i18n/locales/de.json new file mode 100644 index 000000000..2f8bbadc0 --- /dev/null +++ b/apps/questions/apps/web/src/lib/i18n/locales/de.json @@ -0,0 +1,96 @@ +{ + "app": { + "name": "Questions", + "loading": "Laden...", + "tagline": "KI-gestützte Recherche" + }, + "nav": { + "questions": "Fragen", + "collections": "Sammlungen", + "research": "Recherche", + "settings": "Einstellungen" + }, + "question": { + "create": "Frage erstellen", + "edit": "Frage bearbeiten", + "delete": "Frage löschen", + "title": "Frage", + "description": "Beschreibung", + "status": "Status", + "priority": "Priorität", + "noQuestions": "Noch keine Fragen", + "addFirst": "Stelle deine erste Frage" + }, + "status": { + "open": "Offen", + "researching": "Wird recherchiert", + "answered": "Beantwortet", + "archived": "Archiviert" + }, + "priority": { + "low": "Niedrig", + "medium": "Mittel", + "high": "Hoch" + }, + "collection": { + "create": "Sammlung erstellen", + "edit": "Sammlung bearbeiten", + "delete": "Sammlung löschen", + "name": "Name", + "color": "Farbe", + "noCollections": "Keine Sammlungen" + }, + "research": { + "start": "Recherche starten", + "inProgress": "Recherche läuft...", + "depth": "Recherchetiefe", + "quick": "Schnell", + "standard": "Standard", + "deep": "Tiefgehend", + "sources": "Quellen", + "summary": "Zusammenfassung", + "keyPoints": "Kernpunkte", + "followUp": "Weiterführende Fragen" + }, + "answer": { + "create": "Antwort erstellen", + "edit": "Antwort bearbeiten", + "accept": "Antwort akzeptieren", + "rate": "Bewerten", + "noAnswer": "Noch keine Antwort" + }, + "source": { + "view": "Quelle ansehen", + "extract": "Inhalt extrahieren", + "noSources": "Keine Quellen gefunden" + }, + "auth": { + "login": "Anmelden", + "logout": "Abmelden", + "register": "Registrieren" + }, + "common": { + "save": "Speichern", + "cancel": "Abbrechen", + "delete": "Löschen", + "edit": "Bearbeiten", + "add": "Hinzufügen", + "close": "Schließen", + "search": "Suchen", + "error": "Fehler", + "success": "Erfolgreich", + "loading": "Laden..." + }, + "errors": { + "loadQuestions": "Fragen konnten nicht geladen werden", + "researchFailed": "Recherche fehlgeschlagen", + "saveFailed": "Speichern fehlgeschlagen", + "loadSources": "Quellen konnten nicht geladen werden" + }, + "success": { + "questionCreated": "Frage erstellt", + "questionDeleted": "Frage gelöscht", + "researchStarted": "Recherche gestartet", + "answerAccepted": "Antwort akzeptiert" + } +} diff --git a/apps/questions/apps/web/src/lib/i18n/locales/en.json b/apps/questions/apps/web/src/lib/i18n/locales/en.json new file mode 100644 index 000000000..78e29de13 --- /dev/null +++ b/apps/questions/apps/web/src/lib/i18n/locales/en.json @@ -0,0 +1,96 @@ +{ + "app": { + "name": "Questions", + "loading": "Loading...", + "tagline": "AI-powered research" + }, + "nav": { + "questions": "Questions", + "collections": "Collections", + "research": "Research", + "settings": "Settings" + }, + "question": { + "create": "Create question", + "edit": "Edit question", + "delete": "Delete question", + "title": "Question", + "description": "Description", + "status": "Status", + "priority": "Priority", + "noQuestions": "No questions yet", + "addFirst": "Ask your first question" + }, + "status": { + "open": "Open", + "researching": "Researching", + "answered": "Answered", + "archived": "Archived" + }, + "priority": { + "low": "Low", + "medium": "Medium", + "high": "High" + }, + "collection": { + "create": "Create collection", + "edit": "Edit collection", + "delete": "Delete collection", + "name": "Name", + "color": "Color", + "noCollections": "No collections" + }, + "research": { + "start": "Start research", + "inProgress": "Research in progress...", + "depth": "Research depth", + "quick": "Quick", + "standard": "Standard", + "deep": "Deep", + "sources": "Sources", + "summary": "Summary", + "keyPoints": "Key points", + "followUp": "Follow-up questions" + }, + "answer": { + "create": "Create answer", + "edit": "Edit answer", + "accept": "Accept answer", + "rate": "Rate", + "noAnswer": "No answer yet" + }, + "source": { + "view": "View source", + "extract": "Extract content", + "noSources": "No sources found" + }, + "auth": { + "login": "Login", + "logout": "Logout", + "register": "Register" + }, + "common": { + "save": "Save", + "cancel": "Cancel", + "delete": "Delete", + "edit": "Edit", + "add": "Add", + "close": "Close", + "search": "Search", + "error": "Error", + "success": "Success", + "loading": "Loading..." + }, + "errors": { + "loadQuestions": "Failed to load questions", + "researchFailed": "Research failed", + "saveFailed": "Failed to save", + "loadSources": "Failed to load sources" + }, + "success": { + "questionCreated": "Question created", + "questionDeleted": "Question deleted", + "researchStarted": "Research started", + "answerAccepted": "Answer accepted" + } +} diff --git a/apps/questions/apps/web/src/routes/+layout.svelte b/apps/questions/apps/web/src/routes/+layout.svelte index 8e870768c..5d9247706 100644 --- a/apps/questions/apps/web/src/routes/+layout.svelte +++ b/apps/questions/apps/web/src/routes/+layout.svelte @@ -1,6 +1,8 @@ -{#if loading} +{#if !appReady} {:else}
diff --git a/apps/skilltree/apps/web/package.json b/apps/skilltree/apps/web/package.json index dc15e0cc4..d767bdc41 100644 --- a/apps/skilltree/apps/web/package.json +++ b/apps/skilltree/apps/web/package.json @@ -41,6 +41,7 @@ "@manacore/shared-theme": "workspace:*", "@manacore/shared-utils": "workspace:*", "idb": "^8.0.0", + "svelte-i18n": "^4.0.1", "uuid": "^11.0.0" }, "type": "module" diff --git a/apps/skilltree/apps/web/src/lib/i18n/index.ts b/apps/skilltree/apps/web/src/lib/i18n/index.ts new file mode 100644 index 000000000..301bd9990 --- /dev/null +++ b/apps/skilltree/apps/web/src/lib/i18n/index.ts @@ -0,0 +1,49 @@ +import { browser } from '$app/environment'; +import { init, register, locale, waitLocale } from 'svelte-i18n'; + +// List of supported locales +export const supportedLocales = ['de', 'en'] as const; +export type SupportedLocale = (typeof supportedLocales)[number]; + +// Default locale +const defaultLocale = 'de'; + +// Register all available locales +register('de', () => import('./locales/de.json')); +register('en', () => import('./locales/en.json')); + +// Get initial locale from browser or localStorage +function getInitialLocale(): SupportedLocale { + if (browser) { + // Check localStorage first + const stored = localStorage.getItem('skilltree_locale'); + if (stored && supportedLocales.includes(stored as SupportedLocale)) { + return stored as SupportedLocale; + } + + // Fall back to browser language + const browserLang = navigator.language.split('-')[0]; + if (supportedLocales.includes(browserLang as SupportedLocale)) { + return browserLang as SupportedLocale; + } + } + + return defaultLocale; +} + +// Initialize i18n at module scope (required for SSR) +init({ + fallbackLocale: defaultLocale, + initialLocale: getInitialLocale(), +}); + +// Set locale and persist to localStorage +export function setLocale(newLocale: SupportedLocale) { + locale.set(newLocale); + if (browser) { + localStorage.setItem('skilltree_locale', newLocale); + } +} + +// Wait for locale to be loaded (useful for SSR) +export { waitLocale }; diff --git a/apps/skilltree/apps/web/src/lib/i18n/locales/de.json b/apps/skilltree/apps/web/src/lib/i18n/locales/de.json new file mode 100644 index 000000000..5634bad3c --- /dev/null +++ b/apps/skilltree/apps/web/src/lib/i18n/locales/de.json @@ -0,0 +1,83 @@ +{ + "app": { + "name": "SkillTree", + "loading": "Laden...", + "tagline": "Level Up Your Life" + }, + "nav": { + "skills": "Skills", + "activities": "Aktivitäten", + "stats": "Statistiken", + "settings": "Einstellungen" + }, + "skill": { + "create": "Skill erstellen", + "edit": "Skill bearbeiten", + "delete": "Skill löschen", + "name": "Name", + "description": "Beschreibung", + "branch": "Kategorie", + "level": "Level", + "xp": "XP", + "totalXp": "Gesamt-XP", + "noSkills": "Noch keine Skills", + "addFirst": "Füge deinen ersten Skill hinzu" + }, + "branch": { + "intellect": "Intellekt", + "body": "Körper", + "creativity": "Kreativität", + "social": "Soziales", + "practical": "Praktisches", + "mindset": "Mindset" + }, + "level": { + "unknown": "Unbekannt", + "beginner": "Anfänger", + "intermediate": "Fortgeschritten", + "competent": "Kompetent", + "expert": "Experte", + "master": "Meister" + }, + "activity": { + "log": "Aktivität loggen", + "recent": "Letzte Aktivitäten", + "xpEarned": "+{xp} XP", + "noActivities": "Noch keine Aktivitäten" + }, + "stats": { + "totalXp": "Gesamt-XP", + "totalSkills": "Skills", + "highestLevel": "Höchstes Level", + "streak": "Streak" + }, + "auth": { + "login": "Anmelden", + "logout": "Abmelden", + "register": "Registrieren" + }, + "common": { + "save": "Speichern", + "cancel": "Abbrechen", + "delete": "Löschen", + "edit": "Bearbeiten", + "add": "Hinzufügen", + "close": "Schließen", + "search": "Suchen", + "error": "Fehler", + "success": "Erfolgreich", + "loading": "Laden..." + }, + "errors": { + "loadSkills": "Skills konnten nicht geladen werden", + "createSkill": "Skill konnte nicht erstellt werden", + "updateSkill": "Skill konnte nicht aktualisiert werden", + "deleteSkill": "Skill konnte nicht gelöscht werden" + }, + "success": { + "skillCreated": "Skill erstellt", + "skillUpdated": "Skill aktualisiert", + "skillDeleted": "Skill gelöscht", + "xpAdded": "XP hinzugefügt" + } +} diff --git a/apps/skilltree/apps/web/src/lib/i18n/locales/en.json b/apps/skilltree/apps/web/src/lib/i18n/locales/en.json new file mode 100644 index 000000000..2456fd5a2 --- /dev/null +++ b/apps/skilltree/apps/web/src/lib/i18n/locales/en.json @@ -0,0 +1,83 @@ +{ + "app": { + "name": "SkillTree", + "loading": "Loading...", + "tagline": "Level Up Your Life" + }, + "nav": { + "skills": "Skills", + "activities": "Activities", + "stats": "Statistics", + "settings": "Settings" + }, + "skill": { + "create": "Create skill", + "edit": "Edit skill", + "delete": "Delete skill", + "name": "Name", + "description": "Description", + "branch": "Category", + "level": "Level", + "xp": "XP", + "totalXp": "Total XP", + "noSkills": "No skills yet", + "addFirst": "Add your first skill" + }, + "branch": { + "intellect": "Intellect", + "body": "Body", + "creativity": "Creativity", + "social": "Social", + "practical": "Practical", + "mindset": "Mindset" + }, + "level": { + "unknown": "Unknown", + "beginner": "Beginner", + "intermediate": "Intermediate", + "competent": "Competent", + "expert": "Expert", + "master": "Master" + }, + "activity": { + "log": "Log activity", + "recent": "Recent activities", + "xpEarned": "+{xp} XP", + "noActivities": "No activities yet" + }, + "stats": { + "totalXp": "Total XP", + "totalSkills": "Skills", + "highestLevel": "Highest Level", + "streak": "Streak" + }, + "auth": { + "login": "Login", + "logout": "Logout", + "register": "Register" + }, + "common": { + "save": "Save", + "cancel": "Cancel", + "delete": "Delete", + "edit": "Edit", + "add": "Add", + "close": "Close", + "search": "Search", + "error": "Error", + "success": "Success", + "loading": "Loading..." + }, + "errors": { + "loadSkills": "Failed to load skills", + "createSkill": "Failed to create skill", + "updateSkill": "Failed to update skill", + "deleteSkill": "Failed to delete skill" + }, + "success": { + "skillCreated": "Skill created", + "skillUpdated": "Skill updated", + "skillDeleted": "Skill deleted", + "xpAdded": "XP added" + } +} diff --git a/apps/skilltree/apps/web/src/routes/+layout.svelte b/apps/skilltree/apps/web/src/routes/+layout.svelte index 5b68950b5..748f897c9 100644 --- a/apps/skilltree/apps/web/src/routes/+layout.svelte +++ b/apps/skilltree/apps/web/src/routes/+layout.svelte @@ -1,12 +1,15 @@ - SkillTree - Level Up Your Life + {$t('app.name')} - {$t('app.tagline')} -{#if loading} +{#if !appReady}
🌳
-
Loading SkillTree...
+
{$t('app.loading')}
{:else} diff --git a/apps/todo/apps/web/src/lib/i18n/index.ts b/apps/todo/apps/web/src/lib/i18n/index.ts new file mode 100644 index 000000000..69a10dd40 --- /dev/null +++ b/apps/todo/apps/web/src/lib/i18n/index.ts @@ -0,0 +1,49 @@ +import { browser } from '$app/environment'; +import { init, register, locale, waitLocale } from 'svelte-i18n'; + +// List of supported locales +export const supportedLocales = ['de', 'en'] as const; +export type SupportedLocale = (typeof supportedLocales)[number]; + +// Default locale +const defaultLocale = 'de'; + +// Register all available locales +register('de', () => import('./locales/de.json')); +register('en', () => import('./locales/en.json')); + +// Get initial locale from browser or localStorage +function getInitialLocale(): SupportedLocale { + if (browser) { + // Check localStorage first + const stored = localStorage.getItem('todo_locale'); + if (stored && supportedLocales.includes(stored as SupportedLocale)) { + return stored as SupportedLocale; + } + + // Fall back to browser language + const browserLang = navigator.language.split('-')[0]; + if (supportedLocales.includes(browserLang as SupportedLocale)) { + return browserLang as SupportedLocale; + } + } + + return defaultLocale; +} + +// Initialize i18n at module scope (required for SSR) +init({ + fallbackLocale: defaultLocale, + initialLocale: getInitialLocale(), +}); + +// Set locale and persist to localStorage +export function setLocale(newLocale: SupportedLocale) { + locale.set(newLocale); + if (browser) { + localStorage.setItem('todo_locale', newLocale); + } +} + +// Wait for locale to be loaded (useful for SSR) +export { waitLocale }; diff --git a/apps/todo/apps/web/src/lib/i18n/locales/de.json b/apps/todo/apps/web/src/lib/i18n/locales/de.json new file mode 100644 index 000000000..44f98227e --- /dev/null +++ b/apps/todo/apps/web/src/lib/i18n/locales/de.json @@ -0,0 +1,104 @@ +{ + "app": { + "name": "Todo", + "loading": "Laden..." + }, + "nav": { + "inbox": "Eingang", + "today": "Heute", + "upcoming": "Anstehend", + "projects": "Projekte", + "labels": "Labels", + "completed": "Erledigt", + "settings": "Einstellungen", + "feedback": "Feedback" + }, + "task": { + "title": "Titel", + "description": "Beschreibung", + "dueDate": "Fällig am", + "dueTime": "Uhrzeit", + "priority": "Priorität", + "project": "Projekt", + "labels": "Labels", + "subtasks": "Teilaufgaben", + "reminder": "Erinnerung", + "repeat": "Wiederholen", + "addTask": "Aufgabe hinzufügen", + "editTask": "Aufgabe bearbeiten", + "deleteTask": "Aufgabe löschen", + "completeTask": "Aufgabe erledigen", + "uncompleteTask": "Als unerledigt markieren", + "noTasks": "Keine Aufgaben", + "noTasksToday": "Keine Aufgaben für heute", + "noTasksUpcoming": "Keine anstehenden Aufgaben" + }, + "project": { + "create": "Projekt erstellen", + "edit": "Projekt bearbeiten", + "delete": "Projekt löschen", + "name": "Name", + "color": "Farbe", + "icon": "Symbol", + "archive": "Archivieren", + "noProjects": "Keine Projekte" + }, + "label": { + "create": "Label erstellen", + "edit": "Label bearbeiten", + "delete": "Label löschen", + "name": "Name", + "color": "Farbe", + "noLabels": "Keine Labels" + }, + "priority": { + "urgent": "Dringend", + "high": "Hoch", + "medium": "Normal", + "low": "Niedrig" + }, + "repeat": { + "none": "Nicht wiederholen", + "daily": "Täglich", + "weekly": "Wöchentlich", + "monthly": "Monatlich", + "yearly": "Jährlich" + }, + "auth": { + "login": "Anmelden", + "logout": "Abmelden", + "register": "Registrieren", + "email": "E-Mail", + "password": "Passwort", + "forgotPassword": "Passwort vergessen?" + }, + "common": { + "save": "Speichern", + "cancel": "Abbrechen", + "delete": "Löschen", + "edit": "Bearbeiten", + "add": "Hinzufügen", + "close": "Schließen", + "search": "Suchen", + "error": "Fehler", + "success": "Erfolgreich", + "loading": "Laden...", + "noResults": "Keine Ergebnisse" + }, + "errors": { + "loadTasks": "Aufgaben konnten nicht geladen werden", + "createTask": "Aufgabe konnte nicht erstellt werden", + "updateTask": "Aufgabe konnte nicht aktualisiert werden", + "deleteTask": "Aufgabe konnte nicht gelöscht werden", + "loadProjects": "Projekte konnten nicht geladen werden", + "loadLabels": "Labels konnten nicht geladen werden" + }, + "success": { + "taskCreated": "Aufgabe erstellt", + "taskUpdated": "Aufgabe aktualisiert", + "taskDeleted": "Aufgabe gelöscht", + "taskCompleted": "Aufgabe erledigt", + "projectCreated": "Projekt erstellt", + "labelCreated": "Label erstellt" + } +} diff --git a/apps/todo/apps/web/src/lib/i18n/locales/en.json b/apps/todo/apps/web/src/lib/i18n/locales/en.json new file mode 100644 index 000000000..3038a61f0 --- /dev/null +++ b/apps/todo/apps/web/src/lib/i18n/locales/en.json @@ -0,0 +1,104 @@ +{ + "app": { + "name": "Todo", + "loading": "Loading..." + }, + "nav": { + "inbox": "Inbox", + "today": "Today", + "upcoming": "Upcoming", + "projects": "Projects", + "labels": "Labels", + "completed": "Completed", + "settings": "Settings", + "feedback": "Feedback" + }, + "task": { + "title": "Title", + "description": "Description", + "dueDate": "Due date", + "dueTime": "Time", + "priority": "Priority", + "project": "Project", + "labels": "Labels", + "subtasks": "Subtasks", + "reminder": "Reminder", + "repeat": "Repeat", + "addTask": "Add task", + "editTask": "Edit task", + "deleteTask": "Delete task", + "completeTask": "Complete task", + "uncompleteTask": "Mark as incomplete", + "noTasks": "No tasks", + "noTasksToday": "No tasks for today", + "noTasksUpcoming": "No upcoming tasks" + }, + "project": { + "create": "Create project", + "edit": "Edit project", + "delete": "Delete project", + "name": "Name", + "color": "Color", + "icon": "Icon", + "archive": "Archive", + "noProjects": "No projects" + }, + "label": { + "create": "Create label", + "edit": "Edit label", + "delete": "Delete label", + "name": "Name", + "color": "Color", + "noLabels": "No labels" + }, + "priority": { + "urgent": "Urgent", + "high": "High", + "medium": "Normal", + "low": "Low" + }, + "repeat": { + "none": "Don't repeat", + "daily": "Daily", + "weekly": "Weekly", + "monthly": "Monthly", + "yearly": "Yearly" + }, + "auth": { + "login": "Login", + "logout": "Logout", + "register": "Register", + "email": "Email", + "password": "Password", + "forgotPassword": "Forgot password?" + }, + "common": { + "save": "Save", + "cancel": "Cancel", + "delete": "Delete", + "edit": "Edit", + "add": "Add", + "close": "Close", + "search": "Search", + "error": "Error", + "success": "Success", + "loading": "Loading...", + "noResults": "No results" + }, + "errors": { + "loadTasks": "Failed to load tasks", + "createTask": "Failed to create task", + "updateTask": "Failed to update task", + "deleteTask": "Failed to delete task", + "loadProjects": "Failed to load projects", + "loadLabels": "Failed to load labels" + }, + "success": { + "taskCreated": "Task created", + "taskUpdated": "Task updated", + "taskDeleted": "Task deleted", + "taskCompleted": "Task completed", + "projectCreated": "Project created", + "labelCreated": "Label created" + } +} diff --git a/apps/todo/apps/web/src/routes/+layout.svelte b/apps/todo/apps/web/src/routes/+layout.svelte index 78045c14d..41fb3b9bb 100644 --- a/apps/todo/apps/web/src/routes/+layout.svelte +++ b/apps/todo/apps/web/src/routes/+layout.svelte @@ -1,6 +1,8 @@ -{#if loading} +{#if !appReady} {:else}
diff --git a/docs/CONSISTENCY_REPORT.md b/docs/CONSISTENCY_REPORT.md index f7e6add37..b497c4c76 100644 --- a/docs/CONSISTENCY_REPORT.md +++ b/docs/CONSISTENCY_REPORT.md @@ -11,8 +11,8 @@ Nach eingehender Analyse aller Web-Apps im Monorepo wurden folgende Bereiche auf |---------|------------|-----------|--------| | Dependencies & Versionen | ✅ Gut | ~~Hoch~~ | ✅ Erledigt | | Toast System | ✅ Gut | ~~Hoch~~ | ✅ Erledigt | -| API Client Patterns | ⚠️ Mittel | **Hoch** | 🚧 In Arbeit | -| i18n Implementation | ⚠️ Mittel | Mittel | Offen | +| API Client Patterns | ✅ Gut | ~~Hoch~~ | ✅ Erledigt | +| i18n Implementation | ✅ Gut | ~~Mittel~~ | ✅ Erledigt | | Auth Implementation | ✅ Gut | Niedrig | - | | Styling & Tailwind | ✅ Sehr gut | Niedrig | - | | Komponenten & Layouts | ⚠️ Mittel | Mittel | Offen | @@ -23,7 +23,8 @@ Nach eingehender Analyse aller Web-Apps im Monorepo wurden folgende Bereiche auf 2. ✅ **SvelteKit, Svelte, TypeScript Versionen vereinheitlicht** - Alle 15 Web-Apps auf gleicher Version 3. ✅ **Toast System zentralisiert** - `@manacore/shared-ui` Toast für 6 Apps (calendar, chat, clock, contacts, picture, storage) 4. ✅ **lucide-svelte entfernt** - shared-ui nutzt jetzt nur noch `@manacore/shared-icons` -5. 🚧 **@manacore/shared-api-client Package erstellt** - Clock App als erstes migriert +5. ✅ **@manacore/shared-api-client Package erstellt** - 10 Apps migriert (clock, todo, contacts, storage, calendar, picture, nutriphi, planta, questions, skilltree) +6. ✅ **i18n zu 6 Apps hinzugefügt** - todo, skilltree, nutriphi, planta, questions, matrix (jeweils DE + EN) --- @@ -48,12 +49,12 @@ Nach eingehender Analyse aller Web-Apps im Monorepo wurden folgende Bereiche auf --- -## 2. API Client Patterns 🚧 +## 2. API Client Patterns ✅ -> **Status: In Arbeit (29.01.2026)** +> **Status: Erledigt (29.01.2026)** > - ✅ `@manacore/shared-api-client` Package erstellt -> - ✅ Clock App migriert (Proof of Concept) -> - ⏳ Verbleibende Apps: calendar, chat, contacts, manadeck, manacore, nutriphi, picture, planta, presi, questions, skilltree, storage, todo +> - ✅ 10 Apps migriert: clock, todo, contacts, storage, calendar, picture, nutriphi, planta, questions, skilltree +> - ⏭️ Nicht migriert (komplexe Custom-Logik): chat, manadeck, manacore, presi ### Kritische Inkonsistenzen (vor Migration) @@ -107,45 +108,42 @@ Nach eingehender Analyse aller Web-Apps im Monorepo wurden folgende Bereiche auf --- -## 3. i18n Implementation +## 3. i18n Implementation ✅ -### Status +> **Status: Erledigt (29.01.2026)** +> - ✅ 6 Apps mit i18n hinzugefügt: todo, skilltree, nutriphi, planta, questions, matrix +> - ⏭️ Nicht migriert: zitare (unvollständiges Web-App Setup) -#### Apps MIT i18n (10) +### Apps MIT i18n (15) -| App | Sprachen | -|-----|----------| -| chat | DE, EN, IT, FR, ES | -| picture | DE, EN, IT, FR, ES | -| calendar | DE, EN, IT, FR, ES | -| presi | DE, EN, IT, FR, ES | -| manadeck | DE, EN, IT, FR, ES | -| manacore | DE, EN, IT, FR, ES | -| contacts | DE, EN | -| storage | DE, EN | -| clock | DE, EN | +| App | Sprachen | localStorage Key | +|-----|----------|------------------| +| chat | DE, EN, IT, FR, ES | `chat_locale` | +| picture | DE, EN, IT, FR, ES | `picture_locale` | +| calendar | DE, EN, IT, FR, ES | `calendar_locale` | +| presi | DE, EN, IT, FR, ES | `presi_locale` | +| manadeck | DE, EN, IT, FR, ES | `manadeck_locale` | +| manacore | DE, EN, IT, FR, ES | `manacore_locale` | +| contacts | DE, EN | `contacts_locale` | +| storage | DE, EN | `storage_locale` | +| clock | DE, EN | `clock_locale` | +| todo | DE, EN | `todo_locale` | +| skilltree | DE, EN | `skilltree_locale` | +| nutriphi | DE, EN | `nutriphi_locale` | +| planta | DE, EN | `planta_locale` | +| questions | DE, EN | `questions_locale` | +| matrix | DE, EN | `matrix_locale` | -#### Apps OHNE i18n (7) +### Apps OHNE i18n (1) -- zitare -- skilltree -- planta -- nutriphi -- todo -- matrix -- questions +- zitare (Web-App nicht vollständig eingerichtet) -### Inkonsistenzen +### Durchgeführte Änderungen -- Verschiedene localStorage Keys: `chat_locale`, `picture_locale`, `locale` -- Unterschiedliche Sprachanzahl (2-5 Sprachen) -- Manacore Landing nutzt Custom-Implementation statt svelte-i18n - -### Empfehlungen - -1. **i18n zu allen 7 fehlenden Apps hinzufügen** -2. **Storage Key vereinheitlichen** auf `{app}_locale` -3. **Mindestens DE + EN** für alle Apps +- ✅ Einheitlicher localStorage Key Pattern: `{app}_locale` +- ✅ Mindestens DE + EN für alle neuen Apps +- ✅ Konsistentes `svelte-i18n` Setup mit SSR-Support +- ✅ i18n-Loading State in +layout.svelte integriert --- @@ -247,18 +245,17 @@ Alle Apps nutzen **Mana Core Auth** mit `@manacore/shared-auth`. | ~~Toast System vereinheitlichen~~ | ✅ Erledigt | | ~~Dependencies aktualisieren~~ | ✅ Erledigt | | ~~lucide-svelte aus shared-ui entfernen~~ | ✅ Erledigt | +| ~~API Client Package erstellen~~ | ✅ Erledigt (10 Apps migriert) | +| ~~i18n zu 6 Apps hinzufügen~~ | ✅ Erledigt | ### 🔴 Hohe Priorität -| Aufgabe | Aufwand | Impact | -|---------|---------|--------| -| API Client Package erstellen | Hoch | Alle Apps | +_(Keine offenen Aufgaben mit hoher Priorität)_ ### 🟡 Mittlere Priorität | Aufgabe | Aufwand | Impact | |---------|---------|--------| -| i18n zu 7 Apps hinzufügen | Mittel | Internationalisierung | | AuthGateModal in Shared Package | Niedrig | Code-Reduktion | | Global Error Handler extrahieren | Niedrig | Error UX | @@ -273,10 +270,11 @@ Alle Apps nutzen **Mana Core Auth** mit `@manacore/shared-auth`. ## Nächste Schritte -1. **API Client Package** als nächstes angehen (höchster Impact) -2. **i18n** zu fehlenden 7 Apps hinzufügen +1. ~~**API Client Package** als nächstes angehen (höchster Impact)~~ ✅ Erledigt +2. ~~**i18n** zu fehlenden Apps hinzufügen~~ ✅ Erledigt (6 Apps) 3. **AuthGateModal** in Shared Package extrahieren -4. Schrittweise weitere Punkte abarbeiten +4. **Global Error Handler** extrahieren +5. Schrittweise weitere Punkte abarbeiten --- diff --git a/scripts/mac-mini/setup-tts.sh b/scripts/mac-mini/setup-tts.sh new file mode 100755 index 000000000..2cb8732ca --- /dev/null +++ b/scripts/mac-mini/setup-tts.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# Setup script for Mana TTS as a launchd service on Mac Mini +# Run this on the Mac Mini server to install and start the TTS service + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" +SERVICE_DIR="$REPO_DIR/services/mana-tts" +PLIST_NAME="com.manacore.tts" +PLIST_PATH="$HOME/Library/LaunchAgents/$PLIST_NAME.plist" + +echo "==========================================" +echo "Mana TTS - Mac Mini Setup" +echo "==========================================" +echo "" +echo "Service directory: $SERVICE_DIR" +echo "Plist path: $PLIST_PATH" +echo "" + +# Verify service directory exists +if [[ ! -d "$SERVICE_DIR" ]]; then + echo "Error: Service directory not found: $SERVICE_DIR" + exit 1 +fi + +# Run main setup if venv doesn't exist +if [[ ! -d "$SERVICE_DIR/.venv" ]]; then + echo "Virtual environment not found. Running setup..." + echo "" + "$SERVICE_DIR/setup.sh" + echo "" +fi + +# Create LaunchAgents directory if needed +mkdir -p "$HOME/Library/LaunchAgents" + +# Unload existing service if running +if launchctl list | grep -q "$PLIST_NAME"; then + echo "Stopping existing service..." + launchctl unload "$PLIST_PATH" 2>/dev/null || true +fi + +# Create plist file +echo "Creating launchd plist..." +cat > "$PLIST_PATH" << EOF + + + + + Label + $PLIST_NAME + + ProgramArguments + + $SERVICE_DIR/.venv/bin/uvicorn + app.main:app + --host + 0.0.0.0 + --port + 3022 + + + WorkingDirectory + $SERVICE_DIR + + EnvironmentVariables + + PATH + /opt/homebrew/bin:$SERVICE_DIR/.venv/bin:/usr/local/bin:/usr/bin:/bin + PORT + 3022 + PRELOAD_MODELS + false + MAX_TEXT_LENGTH + 1000 + CORS_ORIGINS + https://mana.how,https://chat.mana.how,https://todo.mana.how,https://zitare.mana.how + + + RunAtLoad + + + KeepAlive + + SuccessfulExit + + Crashed + + + + ThrottleInterval + 10 + + StandardOutPath + /tmp/manacore-tts.log + + StandardErrorPath + /tmp/manacore-tts.error.log + + +EOF + +echo "Plist created: $PLIST_PATH" + +# Load service +echo "" +echo "Loading service..." +launchctl load "$PLIST_PATH" + +# Wait for startup +echo "Waiting for service to start..." +sleep 3 + +# Check if running +if launchctl list | grep -q "$PLIST_NAME"; then + echo "Service loaded successfully!" +else + echo "Warning: Service may not have loaded correctly." + echo "Check logs: tail -f /tmp/manacore-tts.log" +fi + +# Health check +echo "" +echo "Running health check..." +sleep 2 + +if curl -s http://localhost:3022/health | grep -q "healthy"; then + echo "Health check passed!" + echo "" + curl -s http://localhost:3022/health | python3 -m json.tool +else + echo "Health check failed. Service may still be starting." + echo "Try again in a few seconds: curl http://localhost:3022/health" +fi + +echo "" +echo "==========================================" +echo "Setup Complete!" +echo "==========================================" +echo "" +echo "Service management commands:" +echo "" +echo " # View logs" +echo " tail -f /tmp/manacore-tts.log" +echo "" +echo " # Stop service" +echo " launchctl unload $PLIST_PATH" +echo "" +echo " # Start service" +echo " launchctl load $PLIST_PATH" +echo "" +echo " # Restart service" +echo " launchctl unload $PLIST_PATH && launchctl load $PLIST_PATH" +echo "" +echo " # Check status" +echo " launchctl list | grep $PLIST_NAME" +echo "" +echo "Test endpoints:" +echo "" +echo " # Health check" +echo " curl http://localhost:3022/health" +echo "" +echo " # List voices" +echo " curl http://localhost:3022/voices" +echo "" +echo " # Synthesize with Kokoro" +echo " curl -X POST http://localhost:3022/synthesize/kokoro \\" +echo " -H 'Content-Type: application/json' \\" +echo " -d '{\"text\": \"Hello world\", \"voice\": \"af_heart\"}' \\" +echo " --output test.wav" +echo "" diff --git a/services/mana-tts/CLAUDE.md b/services/mana-tts/CLAUDE.md new file mode 100644 index 000000000..471f7eb4c --- /dev/null +++ b/services/mana-tts/CLAUDE.md @@ -0,0 +1,100 @@ +# CLAUDE.md - Mana TTS Service + +## Service Overview + +Text-to-Speech microservice using MLX-optimized models for Apple Silicon: + +- **Port**: 3022 +- **Framework**: Python + FastAPI +- **Models**: Kokoro-82M (fast), F5-TTS (voice cloning) + +## Commands + +```bash +# Setup +./setup.sh + +# Development +source .venv/bin/activate +uvicorn app.main:app --host 0.0.0.0 --port 3022 --reload + +# Production (Mac Mini) +../../scripts/mac-mini/setup-tts.sh + +# Test +curl http://localhost:3022/health +curl -X POST http://localhost:3022/synthesize/kokoro \ + -H "Content-Type: application/json" \ + -d '{"text": "Hello world", "voice": "af_heart"}' \ + --output test.wav +``` + +## File Structure + +``` +services/mana-tts/ +├── app/ +│ ├── __init__.py +│ ├── main.py # FastAPI endpoints +│ ├── kokoro_service.py # Kokoro TTS (preset voices) +│ ├── f5_service.py # F5-TTS (voice cloning) +│ ├── voice_manager.py # Custom voice registry +│ └── audio_utils.py # Audio format conversion +├── voices/ # Custom voice storage +├── mlx_models/ # Model cache +├── setup.sh # Setup script +├── requirements.txt +└── README.md +``` + +## API Endpoints + +| Endpoint | Method | Purpose | +|----------|--------|---------| +| `/health` | GET | Health check | +| `/models` | GET | Model info | +| `/voices` | GET | List all voices | +| `/voices` | POST | Register custom voice | +| `/voices/{id}` | DELETE | Delete custom voice | +| `/synthesize/kokoro` | POST | Kokoro synthesis | +| `/synthesize` | POST | F5-TTS voice cloning | +| `/synthesize/auto` | POST | Auto-select model | + +## Models + +### Kokoro-82M +- ~300 MB download +- 30+ preset voices +- Fast inference +- No reference audio needed + +### F5-TTS +- ~6 GB download +- Voice cloning capability +- Requires reference audio + transcript +- Higher quality, slower + +## Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `PORT` | `3022` | Service port | +| `PRELOAD_MODELS` | `false` | Load on startup | +| `MAX_TEXT_LENGTH` | `1000` | Max chars | +| `CORS_ORIGINS` | (production URLs) | CORS config | + +## Key Dependencies + +- `fastapi` - Web framework +- `f5-tts-mlx` - Voice cloning model +- `mlx-audio` - Kokoro implementation +- `mlx` - Apple Silicon ML framework +- `soundfile` - Audio I/O +- `pydub` - MP3 conversion + +## Development Notes + +- Models load lazily on first request (unless `PRELOAD_MODELS=true`) +- Custom voices stored in `voices/` with reference audio + transcript +- Singleton pattern for model instances +- Audio returned as raw bytes with headers for metadata diff --git a/services/mana-tts/README.md b/services/mana-tts/README.md new file mode 100644 index 000000000..58359e426 --- /dev/null +++ b/services/mana-tts/README.md @@ -0,0 +1,237 @@ +# Mana TTS + +Text-to-Speech microservice with voice cloning support, optimized for Apple Silicon. + +## Features + +- **Kokoro TTS**: Fast preset voices (~300 MB model) +- **F5-TTS**: Voice cloning with reference audio (~6 GB model) +- **MLX Optimized**: Runs efficiently on Apple Silicon +- **REST API**: FastAPI with OpenAPI documentation + +## Quick Start + +### Setup + +```bash +# Run setup script +./setup.sh + +# Or manually +python3.11 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +### Start Service + +```bash +source .venv/bin/activate +uvicorn app.main:app --host 0.0.0.0 --port 3022 +``` + +### Test + +```bash +# Health check +curl http://localhost:3022/health + +# Synthesize with Kokoro +curl -X POST http://localhost:3022/synthesize/kokoro \ + -H "Content-Type: application/json" \ + -d '{"text": "Hello world", "voice": "af_heart"}' \ + --output test.wav + +# Play audio (macOS) +afplay test.wav +``` + +## API Endpoints + +### Health & Info + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/health` | GET | Health check | +| `/models` | GET | Available models | +| `/voices` | GET | All available voices | + +### Synthesis + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/synthesize/kokoro` | POST | Kokoro preset voices | +| `/synthesize` | POST | F5-TTS voice cloning | +| `/synthesize/auto` | POST | Auto-select model | + +### Voice Management + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/voices` | POST | Register custom voice | +| `/voices/{id}` | DELETE | Delete custom voice | + +## Synthesis Examples + +### Kokoro (Fast Preset Voices) + +```bash +curl -X POST http://localhost:3022/synthesize/kokoro \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Welcome to Mana TTS, your personal voice synthesis service.", + "voice": "af_heart", + "speed": 1.0, + "output_format": "wav" + }' \ + --output output.wav +``` + +### F5-TTS (Voice Cloning) + +```bash +# With reference audio upload +curl -X POST http://localhost:3022/synthesize \ + -F "text=Hello, this is a cloned voice speaking." \ + -F "reference_audio=@reference.wav" \ + -F "reference_text=This is what the reference audio says." \ + -F "output_format=wav" \ + --output cloned.wav + +# With registered voice +curl -X POST http://localhost:3022/synthesize \ + -F "text=Hello from my registered voice." \ + -F "voice_id=my_custom_voice" \ + --output output.wav +``` + +### Auto-Select + +```bash +# Uses Kokoro for preset voices, F5-TTS for custom +curl -X POST http://localhost:3022/synthesize/auto \ + -H "Content-Type: application/json" \ + -d '{"text": "Auto-selected synthesis", "voice": "af_bella"}' \ + --output output.wav +``` + +## Available Kokoro Voices + +### American Female +- `af_heart` - Warm, emotional (default) +- `af_alloy` - Neutral, professional +- `af_bella` - Friendly, approachable +- `af_jessica` - Confident, clear +- `af_nicole` - Bright, energetic +- `af_nova` - Modern, dynamic +- `af_sarah` - Warm, conversational +- ... and more + +### American Male +- `am_adam` - Deep, authoritative +- `am_echo` - Resonant, clear +- `am_eric` - Professional, neutral +- `am_michael` - Warm, trustworthy +- ... and more + +### British Female +- `bf_alice` - Refined, elegant +- `bf_emma` - Clear, professional +- `bf_lily` - Soft, gentle + +### British Male +- `bm_daniel` - Classic, authoritative +- `bm_fable` - Storyteller, expressive +- `bm_george` - Traditional, clear + +## Voice Registration + +Register a custom voice for F5-TTS voice cloning: + +```bash +curl -X POST http://localhost:3022/voices \ + -F "voice_id=my_voice" \ + -F "name=My Custom Voice" \ + -F "description=A sample voice for testing" \ + -F "transcript=Hello, this is the text spoken in the reference audio." \ + -F "reference_audio=@my_reference.wav" +``` + +Pre-defined voices can also be placed in the `voices/` directory: + +``` +voices/ +└── my_voice/ + ├── reference.wav # Reference audio (required) + ├── transcript.txt # Transcript of reference (required) + └── metadata.json # Name and description (optional) +``` + +## Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `PORT` | `3022` | API port | +| `PRELOAD_MODELS` | `false` | Load models on startup | +| `MAX_TEXT_LENGTH` | `1000` | Max characters per request | +| `CORS_ORIGINS` | `https://mana.how,...` | Allowed CORS origins | +| `F5_MODEL` | `lucasnewman/f5-tts-mlx` | F5-TTS model | +| `KOKORO_MODEL` | `mlx-community/Kokoro-82M-bf16` | Kokoro model | + +## Mac Mini Deployment + +```bash +# Install and start as launchd service +../../scripts/mac-mini/setup-tts.sh + +# Service management +launchctl list | grep com.manacore.tts +launchctl unload ~/Library/LaunchAgents/com.manacore.tts.plist +launchctl load ~/Library/LaunchAgents/com.manacore.tts.plist + +# View logs +tail -f /tmp/manacore-tts.log +``` + +## Requirements + +- Python 3.10+ +- macOS with Apple Silicon (recommended) +- ~7 GB disk space for models +- 16 GB RAM recommended +- ffmpeg (for MP3 output) + +## Troubleshooting + +### Models Not Loading + +```bash +# Check MLX installation +python -c "import mlx; print(mlx.__version__)" + +# Check mlx-audio +python -c "import mlx_audio; print('OK')" + +# Check f5-tts-mlx +python -c "from f5_tts_mlx import F5TTS; print('OK')" +``` + +### MP3 Output Not Working + +```bash +# Install ffmpeg +brew install ffmpeg + +# Verify +ffmpeg -version +``` + +### Memory Issues + +- Reduce `MAX_TEXT_LENGTH` for less memory usage +- Set `PRELOAD_MODELS=false` for lazy loading +- F5-TTS requires ~6 GB, Kokoro ~500 MB + +## API Documentation + +When running, visit http://localhost:3022/docs for interactive API documentation. diff --git a/services/mana-tts/app/__init__.py b/services/mana-tts/app/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/services/mana-tts/app/audio_utils.py b/services/mana-tts/app/audio_utils.py new file mode 100644 index 000000000..ce84144c1 --- /dev/null +++ b/services/mana-tts/app/audio_utils.py @@ -0,0 +1,224 @@ +""" +Audio conversion utilities for the TTS service. +Handles format conversion between WAV and MP3. +""" + +import io +import logging +import tempfile +from pathlib import Path +from typing import Optional + +import numpy as np +import soundfile as sf + +logger = logging.getLogger(__name__) + +# Supported output formats +SUPPORTED_FORMATS = ["wav", "mp3"] +DEFAULT_FORMAT = "wav" +DEFAULT_SAMPLE_RATE = 24000 + + +def audio_to_wav_bytes( + audio_data: np.ndarray, + sample_rate: int = DEFAULT_SAMPLE_RATE, +) -> bytes: + """ + Convert numpy audio array to WAV bytes. + + Args: + audio_data: Audio samples as numpy array + sample_rate: Sample rate in Hz + + Returns: + WAV file as bytes + """ + buffer = io.BytesIO() + sf.write(buffer, audio_data, sample_rate, format="WAV") + buffer.seek(0) + return buffer.read() + + +def audio_to_mp3_bytes( + audio_data: np.ndarray, + sample_rate: int = DEFAULT_SAMPLE_RATE, + bitrate: str = "192k", +) -> bytes: + """ + Convert numpy audio array to MP3 bytes. + Requires ffmpeg to be installed. + + Args: + audio_data: Audio samples as numpy array + sample_rate: Sample rate in Hz + bitrate: MP3 bitrate (e.g., "128k", "192k", "320k") + + Returns: + MP3 file as bytes + """ + try: + from pydub import AudioSegment + except ImportError: + logger.error("pydub not installed, falling back to WAV") + return audio_to_wav_bytes(audio_data, sample_rate) + + # First convert to WAV + wav_bytes = audio_to_wav_bytes(audio_data, sample_rate) + + # Then convert to MP3 using pydub + try: + audio_segment = AudioSegment.from_wav(io.BytesIO(wav_bytes)) + buffer = io.BytesIO() + audio_segment.export(buffer, format="mp3", bitrate=bitrate) + buffer.seek(0) + return buffer.read() + except Exception as e: + logger.error(f"MP3 conversion failed: {e}, falling back to WAV") + return wav_bytes + + +def convert_audio( + audio_data: np.ndarray, + sample_rate: int = DEFAULT_SAMPLE_RATE, + output_format: str = DEFAULT_FORMAT, +) -> tuple[bytes, str]: + """ + Convert audio data to the specified format. + + Args: + audio_data: Audio samples as numpy array + sample_rate: Sample rate in Hz + output_format: Output format ("wav" or "mp3") + + Returns: + Tuple of (audio bytes, content type) + """ + output_format = output_format.lower() + + if output_format not in SUPPORTED_FORMATS: + logger.warning(f"Unsupported format '{output_format}', using WAV") + output_format = "wav" + + if output_format == "mp3": + return audio_to_mp3_bytes(audio_data, sample_rate), "audio/mpeg" + else: + return audio_to_wav_bytes(audio_data, sample_rate), "audio/wav" + + +def get_content_type(format: str) -> str: + """Get MIME content type for audio format.""" + content_types = { + "wav": "audio/wav", + "mp3": "audio/mpeg", + } + return content_types.get(format.lower(), "audio/wav") + + +def load_reference_audio( + file_path: str | Path, +) -> tuple[np.ndarray, int]: + """ + Load reference audio file for voice cloning. + + Args: + file_path: Path to the audio file + + Returns: + Tuple of (audio data as numpy array, sample rate) + """ + audio_data, sample_rate = sf.read(file_path) + + # Convert to mono if stereo + if len(audio_data.shape) > 1: + audio_data = np.mean(audio_data, axis=1) + + return audio_data, sample_rate + + +def resample_audio( + audio_data: np.ndarray, + original_sr: int, + target_sr: int = DEFAULT_SAMPLE_RATE, +) -> np.ndarray: + """ + Resample audio to target sample rate. + + Args: + audio_data: Audio samples as numpy array + original_sr: Original sample rate + target_sr: Target sample rate + + Returns: + Resampled audio data + """ + if original_sr == target_sr: + return audio_data + + from scipy import signal + + # Calculate resampling ratio + num_samples = int(len(audio_data) * target_sr / original_sr) + resampled = signal.resample(audio_data, num_samples) + + return resampled.astype(np.float32) + + +def normalize_audio( + audio_data: np.ndarray, + target_db: float = -3.0, +) -> np.ndarray: + """ + Normalize audio to target dB level. + + Args: + audio_data: Audio samples as numpy array + target_db: Target peak level in dB + + Returns: + Normalized audio data + """ + # Calculate current peak + peak = np.max(np.abs(audio_data)) + + if peak == 0: + return audio_data + + # Calculate target peak from dB + target_peak = 10 ** (target_db / 20) + + # Apply gain + gain = target_peak / peak + return audio_data * gain + + +def save_temp_audio( + audio_bytes: bytes, + suffix: str = ".wav", +) -> str: + """ + Save audio bytes to a temporary file. + + Args: + audio_bytes: Audio data as bytes + suffix: File extension + + Returns: + Path to temporary file + """ + with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp: + tmp.write(audio_bytes) + return tmp.name + + +def cleanup_temp_file(file_path: str) -> None: + """ + Clean up a temporary file. + + Args: + file_path: Path to the file to delete + """ + try: + Path(file_path).unlink() + except Exception: + pass # Silent cleanup failure diff --git a/services/mana-tts/app/f5_service.py b/services/mana-tts/app/f5_service.py new file mode 100644 index 000000000..d6494c212 --- /dev/null +++ b/services/mana-tts/app/f5_service.py @@ -0,0 +1,208 @@ +""" +F5-TTS Service for voice cloning synthesis. +Uses f5-tts-mlx optimized for Apple Silicon. +""" + +import logging +import os +import tempfile +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + +import numpy as np + +logger = logging.getLogger(__name__) + +# Global singleton for lazy initialization +_f5_model = None +_f5_model_name = None + +# Default model +DEFAULT_F5_MODEL = os.getenv("F5_MODEL", "lucasnewman/f5-tts-mlx") + +# Default generation parameters +DEFAULT_DURATION = 10.0 # seconds +DEFAULT_STEPS = 32 +DEFAULT_CFG_STRENGTH = 2.0 +DEFAULT_SWAY_COEF = -1.0 +DEFAULT_SPEED = 1.0 + + +@dataclass +class F5Result: + """Result from F5-TTS synthesis.""" + + audio: np.ndarray + sample_rate: int + duration: float + voice_id: Optional[str] = None + + +def get_f5_model(model_name: str = DEFAULT_F5_MODEL): + """ + Get or create F5-TTS model instance (singleton pattern). + + Args: + model_name: HuggingFace model identifier + + Returns: + F5TTS model instance + """ + global _f5_model, _f5_model_name + + # Return existing model if same model name + if _f5_model is not None and _f5_model_name == model_name: + return _f5_model + + logger.info(f"Loading F5-TTS model: {model_name}") + + try: + from f5_tts_mlx import F5TTS + + _f5_model = F5TTS(model_name=model_name) + _f5_model_name = model_name + logger.info("F5-TTS model loaded successfully") + return _f5_model + + except ImportError as e: + logger.error(f"Failed to import f5_tts_mlx: {e}") + raise RuntimeError( + "f5-tts-mlx not installed. Run: pip install f5-tts-mlx" + ) + except Exception as e: + logger.error(f"Failed to load F5-TTS model: {e}") + raise + + +def is_f5_loaded() -> bool: + """Check if F5-TTS model is currently loaded.""" + return _f5_model is not None + + +async def synthesize_f5( + text: str, + reference_audio_path: str, + reference_text: str, + duration: Optional[float] = None, + steps: int = DEFAULT_STEPS, + cfg_strength: float = DEFAULT_CFG_STRENGTH, + sway_coef: float = DEFAULT_SWAY_COEF, + speed: float = DEFAULT_SPEED, + model_name: str = DEFAULT_F5_MODEL, +) -> F5Result: + """ + Synthesize speech using F5-TTS with voice cloning. + + Args: + text: Text to synthesize + reference_audio_path: Path to reference audio file + reference_text: Transcript of the reference audio + duration: Target duration in seconds (auto-calculated if None) + steps: Number of diffusion steps + cfg_strength: Classifier-free guidance strength + sway_coef: Sway sampling coefficient + speed: Speech speed multiplier + model_name: HuggingFace model identifier + + Returns: + F5Result with audio data + """ + # Get model + model = get_f5_model(model_name) + + logger.info( + f"Synthesizing with F5-TTS: text_length={len(text)}, " + f"ref_audio={reference_audio_path}, steps={steps}" + ) + + try: + # Generate audio + audio, sample_rate = model.generate( + text=text, + ref_audio_path=reference_audio_path, + ref_audio_text=reference_text, + duration=duration, + steps=steps, + cfg_strength=cfg_strength, + sway_coef=sway_coef, + speed=speed, + ) + + # Calculate duration + audio_duration = len(audio) / sample_rate + + logger.info(f"F5-TTS synthesis complete: duration={audio_duration:.2f}s") + + return F5Result( + audio=audio, + sample_rate=sample_rate, + duration=audio_duration, + ) + + except Exception as e: + logger.error(f"F5-TTS synthesis failed: {e}") + raise RuntimeError(f"Voice cloning synthesis failed: {e}") + + +async def synthesize_f5_from_bytes( + text: str, + reference_audio_bytes: bytes, + reference_text: str, + audio_extension: str = ".wav", + **kwargs, +) -> F5Result: + """ + Synthesize speech using F5-TTS with reference audio as bytes. + + Args: + text: Text to synthesize + reference_audio_bytes: Reference audio as bytes + reference_text: Transcript of the reference audio + audio_extension: File extension for temp file + **kwargs: Additional arguments passed to synthesize_f5 + + Returns: + F5Result with audio data + """ + # Save reference audio to temp file + with tempfile.NamedTemporaryFile( + suffix=audio_extension, + delete=False, + ) as tmp: + tmp.write(reference_audio_bytes) + tmp_path = tmp.name + + try: + result = await synthesize_f5( + text=text, + reference_audio_path=tmp_path, + reference_text=reference_text, + **kwargs, + ) + return result + finally: + # Clean up temp file + try: + Path(tmp_path).unlink() + except Exception: + pass + + +def estimate_duration(text: str, speed: float = 1.0) -> float: + """ + Estimate audio duration from text. + + Args: + text: Text to synthesize + speed: Speech speed multiplier + + Returns: + Estimated duration in seconds + """ + # Rough estimate: ~150 words per minute at normal speed + # Average word length: ~5 characters + words = len(text) / 5 + minutes = words / 150 + seconds = minutes * 60 + return seconds / speed diff --git a/services/mana-tts/app/kokoro_service.py b/services/mana-tts/app/kokoro_service.py new file mode 100644 index 000000000..2ce42d2ac --- /dev/null +++ b/services/mana-tts/app/kokoro_service.py @@ -0,0 +1,187 @@ +""" +Kokoro TTS Service for fast preset voice synthesis. +Uses mlx-audio's Kokoro implementation optimized for Apple Silicon. +""" + +import logging +from dataclasses import dataclass +from typing import Optional + +import numpy as np + +logger = logging.getLogger(__name__) + +# Global singleton for lazy initialization +_kokoro_model = None +_kokoro_model_name = None + +# Default model +DEFAULT_KOKORO_MODEL = "mlx-community/Kokoro-82M-bf16" + +# Available Kokoro voices (American Female/Male, British Female/Male) +KOKORO_VOICES = { + # American Female voices + "af_heart": "American Female - Heart (warm, emotional)", + "af_alloy": "American Female - Alloy (neutral, professional)", + "af_aoede": "American Female - Aoede (clear, articulate)", + "af_bella": "American Female - Bella (friendly, approachable)", + "af_jessica": "American Female - Jessica (confident, clear)", + "af_kore": "American Female - Kore (calm, measured)", + "af_nicole": "American Female - Nicole (bright, energetic)", + "af_nova": "American Female - Nova (modern, dynamic)", + "af_river": "American Female - River (smooth, flowing)", + "af_sarah": "American Female - Sarah (warm, conversational)", + "af_sky": "American Female - Sky (light, airy)", + # American Male voices + "am_adam": "American Male - Adam (deep, authoritative)", + "am_echo": "American Male - Echo (resonant, clear)", + "am_eric": "American Male - Eric (professional, neutral)", + "am_fenrir": "American Male - Fenrir (strong, commanding)", + "am_liam": "American Male - Liam (friendly, casual)", + "am_michael": "American Male - Michael (warm, trustworthy)", + "am_onyx": "American Male - Onyx (deep, smooth)", + "am_puck": "American Male - Puck (playful, light)", + # British Female voices + "bf_alice": "British Female - Alice (refined, elegant)", + "bf_emma": "British Female - Emma (clear, professional)", + "bf_isabella": "British Female - Isabella (sophisticated, warm)", + "bf_lily": "British Female - Lily (soft, gentle)", + # British Male voices + "bm_daniel": "British Male - Daniel (classic, authoritative)", + "bm_fable": "British Male - Fable (storyteller, expressive)", + "bm_george": "British Male - George (traditional, clear)", + "bm_lewis": "British Male - Lewis (modern, approachable)", +} + +DEFAULT_VOICE = "af_heart" + + +@dataclass +class KokoroResult: + """Result from Kokoro TTS synthesis.""" + + audio: np.ndarray + sample_rate: int + voice: str + duration: float + + +def get_kokoro_model(model_name: str = DEFAULT_KOKORO_MODEL): + """ + Get or create Kokoro model instance (singleton pattern). + + Args: + model_name: HuggingFace model identifier + + Returns: + Kokoro model instance + """ + global _kokoro_model, _kokoro_model_name + + # Return existing model if same model name + if _kokoro_model is not None and _kokoro_model_name == model_name: + return _kokoro_model + + logger.info(f"Loading Kokoro model: {model_name}") + + try: + from mlx_audio.tts import load + + _kokoro_model = load(model_name) + _kokoro_model_name = model_name + logger.info("Kokoro model loaded successfully") + return _kokoro_model + + except ImportError as e: + logger.error(f"Failed to import mlx_audio: {e}") + raise RuntimeError( + "mlx-audio not installed. Run: pip install mlx-audio" + ) + except Exception as e: + logger.error(f"Failed to load Kokoro model: {e}") + raise + + +def is_kokoro_loaded() -> bool: + """Check if Kokoro model is currently loaded.""" + return _kokoro_model is not None + + +def get_available_voices() -> dict[str, str]: + """Get dictionary of available Kokoro voices.""" + return KOKORO_VOICES.copy() + + +async def synthesize_kokoro( + text: str, + voice: str = DEFAULT_VOICE, + speed: float = 1.0, + model_name: str = DEFAULT_KOKORO_MODEL, +) -> KokoroResult: + """ + Synthesize speech using Kokoro TTS. + + Args: + text: Text to synthesize + voice: Voice ID from KOKORO_VOICES + speed: Speech speed multiplier (0.5-2.0) + model_name: HuggingFace model identifier + + Returns: + KokoroResult with audio data + """ + # Validate voice + if voice not in KOKORO_VOICES: + logger.warning(f"Unknown voice '{voice}', using default '{DEFAULT_VOICE}'") + voice = DEFAULT_VOICE + + # Clamp speed to valid range + speed = max(0.5, min(2.0, speed)) + + # Get model + model = get_kokoro_model(model_name) + + logger.info(f"Synthesizing with Kokoro: voice={voice}, speed={speed}, text_length={len(text)}") + + try: + # Generate audio using mlx-audio's generate method + # Returns a generator of GenerationResult objects + result_gen = model.generate( + text=text, + voice=voice, + speed=speed, + ) + + # Collect all audio chunks from the generator + audio_chunks = [] + sample_rate = 24000 # Default, will be updated from result + + for result in result_gen: + # Each result has audio, sample_rate, audio_duration (string) + sample_rate = result.sample_rate + + # Convert MLX array to numpy + audio_np = np.array(result.audio, dtype=np.float32) + audio_chunks.append(audio_np) + + # Concatenate all chunks + if audio_chunks: + full_audio = np.concatenate(audio_chunks) + else: + raise RuntimeError("No audio generated") + + # Calculate duration from audio length + total_duration = len(full_audio) / sample_rate + + logger.info(f"Kokoro synthesis complete: duration={total_duration:.2f}s") + + return KokoroResult( + audio=full_audio, + sample_rate=sample_rate, + voice=voice, + duration=total_duration, + ) + + except Exception as e: + logger.error(f"Kokoro synthesis failed: {e}") + raise RuntimeError(f"TTS synthesis failed: {e}") diff --git a/services/mana-tts/app/main.py b/services/mana-tts/app/main.py new file mode 100644 index 000000000..4c33e1071 --- /dev/null +++ b/services/mana-tts/app/main.py @@ -0,0 +1,625 @@ +""" +Mana TTS - Text-to-Speech Microservice + +Provides TTS synthesis using: +- Kokoro: Fast preset voices +- F5-TTS: Voice cloning with reference audio + +Optimized for Apple Silicon (MLX). +""" + +import logging +import os +from contextlib import asynccontextmanager +from pathlib import Path +from typing import Optional + +from fastapi import FastAPI, HTTPException, UploadFile, File, Form, Response +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field + +from .audio_utils import convert_audio, SUPPORTED_FORMATS, cleanup_temp_file, save_temp_audio +from .kokoro_service import ( + synthesize_kokoro, + get_kokoro_model, + is_kokoro_loaded, + KOKORO_VOICES, + DEFAULT_VOICE as DEFAULT_KOKORO_VOICE, + DEFAULT_KOKORO_MODEL, +) +from .f5_service import ( + synthesize_f5, + synthesize_f5_from_bytes, + get_f5_model, + is_f5_loaded, + DEFAULT_F5_MODEL, +) +from .voice_manager import get_voice_manager, CustomVoice + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + +# Configuration from environment +PORT = int(os.getenv("PORT", "3022")) +PRELOAD_MODELS = os.getenv("PRELOAD_MODELS", "false").lower() == "true" +MAX_TEXT_LENGTH = int(os.getenv("MAX_TEXT_LENGTH", "1000")) +CORS_ORIGINS = os.getenv( + "CORS_ORIGINS", + "https://mana.how,https://chat.mana.how,https://todo.mana.how,http://localhost:5173", +).split(",") + +# Supported audio extensions for uploads +SUPPORTED_AUDIO_EXTENSIONS = {".wav", ".mp3", ".m4a", ".flac", ".ogg"} + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application lifespan manager for startup/shutdown.""" + logger.info(f"Starting Mana TTS service on port {PORT}") + + # Initialize voice manager (scans voices directory) + voice_manager = get_voice_manager() + logger.info(f"Voice manager initialized with {len(voice_manager.list_voices())} custom voices") + + if PRELOAD_MODELS: + logger.info("Pre-loading models (PRELOAD_MODELS=true)...") + try: + get_kokoro_model() + logger.info("Kokoro model pre-loaded") + except Exception as e: + logger.warning(f"Failed to pre-load Kokoro: {e}") + + try: + get_f5_model() + logger.info("F5-TTS model pre-loaded") + except Exception as e: + logger.warning(f"Failed to pre-load F5-TTS: {e}") + else: + logger.info("Models will be loaded on first request (lazy loading)") + + yield + + logger.info("Shutting down Mana TTS service") + + +# Create FastAPI app +app = FastAPI( + title="Mana TTS", + description="Text-to-Speech service with voice cloning support", + version="1.0.0", + lifespan=lifespan, +) + +# CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=CORS_ORIGINS, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +# ============================================================================ +# Request/Response Models +# ============================================================================ + + +class KokoroRequest(BaseModel): + """Request for Kokoro TTS synthesis.""" + + text: str = Field(..., description="Text to synthesize", max_length=5000) + voice: str = Field(DEFAULT_KOKORO_VOICE, description="Voice ID") + speed: float = Field(1.0, ge=0.5, le=2.0, description="Speech speed") + output_format: str = Field("wav", description="Output format (wav, mp3)") + + +class AutoRequest(BaseModel): + """Request for auto-selection TTS synthesis.""" + + text: str = Field(..., description="Text to synthesize", max_length=5000) + voice: Optional[str] = Field(None, description="Voice ID (Kokoro preset or registered)") + speed: float = Field(1.0, ge=0.5, le=2.0, description="Speech speed") + output_format: str = Field("wav", description="Output format (wav, mp3)") + + +class RegisterVoiceRequest(BaseModel): + """Request to register a new custom voice.""" + + voice_id: str = Field(..., description="Unique voice identifier", min_length=2, max_length=50) + name: str = Field(..., description="Display name") + description: str = Field("", description="Voice description") + transcript: str = Field(..., description="Transcript of the reference audio") + + +class HealthResponse(BaseModel): + """Health check response.""" + + status: str + service: str + models_loaded: dict + + +class ModelsResponse(BaseModel): + """Available models response.""" + + kokoro: dict + f5: dict + + +class VoiceInfo(BaseModel): + """Voice information.""" + + id: str + name: str + description: str + type: str # "kokoro" or "f5_custom" + + +class VoicesResponse(BaseModel): + """Available voices response.""" + + kokoro_voices: list[VoiceInfo] + custom_voices: list[VoiceInfo] + + +class VoiceRegisteredResponse(BaseModel): + """Response after registering a voice.""" + + voice_id: str + message: str + + +class VoiceDeletedResponse(BaseModel): + """Response after deleting a voice.""" + + voice_id: str + message: str + + +# ============================================================================ +# Health & Info Endpoints +# ============================================================================ + + +@app.get("/health", response_model=HealthResponse) +async def health_check(): + """Check service health and model status.""" + return HealthResponse( + status="healthy", + service="mana-tts", + models_loaded={ + "kokoro": is_kokoro_loaded(), + "f5": is_f5_loaded(), + }, + ) + + +@app.get("/models", response_model=ModelsResponse) +async def get_models(): + """Get information about available models.""" + return ModelsResponse( + kokoro={ + "name": "Kokoro-82M", + "description": "Fast TTS with preset voices", + "model_id": DEFAULT_KOKORO_MODEL, + "loaded": is_kokoro_loaded(), + "voice_count": len(KOKORO_VOICES), + }, + f5={ + "name": "F5-TTS", + "description": "Voice cloning with reference audio", + "model_id": DEFAULT_F5_MODEL, + "loaded": is_f5_loaded(), + "supports_cloning": True, + }, + ) + + +# ============================================================================ +# Voice Management Endpoints +# ============================================================================ + + +@app.get("/voices", response_model=VoicesResponse) +async def get_voices(): + """Get all available voices.""" + # Kokoro preset voices + kokoro_voices = [ + VoiceInfo( + id=voice_id, + name=voice_id, + description=description, + type="kokoro", + ) + for voice_id, description in KOKORO_VOICES.items() + ] + + # Custom voices from voice manager + voice_manager = get_voice_manager() + custom_voices = [ + VoiceInfo( + id=voice.id, + name=voice.name, + description=voice.description, + type="f5_custom", + ) + for voice in voice_manager.list_voices() + ] + + return VoicesResponse( + kokoro_voices=kokoro_voices, + custom_voices=custom_voices, + ) + + +@app.post("/voices", response_model=VoiceRegisteredResponse) +async def register_voice( + voice_id: str = Form(..., description="Unique voice identifier"), + name: str = Form(..., description="Display name"), + description: str = Form("", description="Voice description"), + transcript: str = Form(..., description="Transcript of the reference audio"), + reference_audio: UploadFile = File(..., description="Reference audio file"), +): + """ + Register a new custom voice for F5-TTS voice cloning. + + Requires: + - Reference audio file (WAV, MP3, M4A, FLAC, OGG) + - Transcript of what is said in the audio + """ + # Validate file extension + if reference_audio.filename: + ext = Path(reference_audio.filename).suffix.lower() + if ext not in SUPPORTED_AUDIO_EXTENSIONS: + raise HTTPException( + status_code=400, + detail=f"Unsupported audio format. Use one of: {SUPPORTED_AUDIO_EXTENSIONS}", + ) + else: + ext = ".wav" + + # Read audio bytes + audio_bytes = await reference_audio.read() + + if len(audio_bytes) == 0: + raise HTTPException(status_code=400, detail="Audio file is empty") + + if len(audio_bytes) > 50 * 1024 * 1024: # 50 MB limit + raise HTTPException(status_code=400, detail="Audio file too large (max 50 MB)") + + # Register voice + voice_manager = get_voice_manager() + try: + voice_manager.register_voice( + voice_id=voice_id, + name=name, + description=description, + audio_bytes=audio_bytes, + transcript=transcript, + audio_extension=ext, + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + return VoiceRegisteredResponse( + voice_id=voice_id, + message=f"Voice '{voice_id}' registered successfully", + ) + + +@app.delete("/voices/{voice_id}", response_model=VoiceDeletedResponse) +async def delete_voice(voice_id: str): + """Delete a registered custom voice.""" + voice_manager = get_voice_manager() + + if not voice_manager.delete_voice(voice_id): + raise HTTPException(status_code=404, detail=f"Voice '{voice_id}' not found") + + return VoiceDeletedResponse( + voice_id=voice_id, + message=f"Voice '{voice_id}' deleted successfully", + ) + + +# ============================================================================ +# Kokoro TTS Endpoint +# ============================================================================ + + +@app.post("/synthesize/kokoro") +async def synthesize_with_kokoro(request: KokoroRequest): + """ + Synthesize speech using Kokoro with preset voices. + + Fast synthesis with high-quality preset voices. + """ + # Validate text length + if len(request.text) > MAX_TEXT_LENGTH: + raise HTTPException( + status_code=400, + detail=f"Text exceeds maximum length of {MAX_TEXT_LENGTH} characters", + ) + + if not request.text.strip(): + raise HTTPException(status_code=400, detail="Text cannot be empty") + + # Validate output format + output_format = request.output_format.lower() + if output_format not in SUPPORTED_FORMATS: + raise HTTPException( + status_code=400, + detail=f"Unsupported format. Use one of: {SUPPORTED_FORMATS}", + ) + + try: + # Synthesize + result = await synthesize_kokoro( + text=request.text, + voice=request.voice, + speed=request.speed, + ) + + # Convert to requested format + audio_bytes, content_type = convert_audio( + result.audio, + result.sample_rate, + output_format, + ) + + # Return audio response + return Response( + content=audio_bytes, + media_type=content_type, + headers={ + "X-Voice": result.voice, + "X-Duration": str(result.duration), + "X-Sample-Rate": str(result.sample_rate), + }, + ) + + except RuntimeError as e: + raise HTTPException(status_code=500, detail=str(e)) + except Exception as e: + logger.error(f"Kokoro synthesis error: {e}") + raise HTTPException(status_code=500, detail=f"Synthesis failed: {e}") + + +# ============================================================================ +# F5-TTS Endpoint +# ============================================================================ + + +@app.post("/synthesize") +async def synthesize_with_f5( + text: str = Form(..., description="Text to synthesize"), + voice_id: Optional[str] = Form(None, description="Registered voice ID"), + reference_audio: Optional[UploadFile] = File(None, description="Reference audio for cloning"), + reference_text: Optional[str] = Form(None, description="Transcript of reference audio"), + output_format: str = Form("wav", description="Output format (wav, mp3)"), + speed: float = Form(1.0, ge=0.5, le=2.0, description="Speech speed"), + steps: int = Form(32, ge=8, le=64, description="Diffusion steps"), +): + """ + Synthesize speech using F5-TTS with voice cloning. + + Provide either: + - voice_id: Use a pre-registered voice + - reference_audio + reference_text: Clone voice from audio sample + """ + # Validate text + if len(text) > MAX_TEXT_LENGTH: + raise HTTPException( + status_code=400, + detail=f"Text exceeds maximum length of {MAX_TEXT_LENGTH} characters", + ) + + if not text.strip(): + raise HTTPException(status_code=400, detail="Text cannot be empty") + + # Validate output format + output_format = output_format.lower() + if output_format not in SUPPORTED_FORMATS: + raise HTTPException( + status_code=400, + detail=f"Unsupported format. Use one of: {SUPPORTED_FORMATS}", + ) + + voice_manager = get_voice_manager() + ref_audio_path: Optional[str] = None + ref_text: Optional[str] = None + temp_file_path: Optional[str] = None + + try: + # Option 1: Use registered voice + if voice_id: + voice = voice_manager.get_voice(voice_id) + if not voice: + raise HTTPException( + status_code=404, + detail=f"Voice '{voice_id}' not found. Register it first or provide reference audio.", + ) + ref_audio_path = voice.audio_path + ref_text = voice.transcript + + # Option 2: Use uploaded reference audio + elif reference_audio and reference_text: + # Get file extension + ext = ".wav" + if reference_audio.filename: + ext = Path(reference_audio.filename).suffix.lower() + if ext not in SUPPORTED_AUDIO_EXTENSIONS: + raise HTTPException( + status_code=400, + detail=f"Unsupported audio format. Use one of: {SUPPORTED_AUDIO_EXTENSIONS}", + ) + + # Read and save to temp file + audio_bytes = await reference_audio.read() + if len(audio_bytes) == 0: + raise HTTPException(status_code=400, detail="Reference audio is empty") + + temp_file_path = save_temp_audio(audio_bytes, suffix=ext) + ref_audio_path = temp_file_path + ref_text = reference_text + + else: + raise HTTPException( + status_code=400, + detail="Provide either voice_id or reference_audio + reference_text", + ) + + # Synthesize with F5-TTS + result = await synthesize_f5( + text=text, + reference_audio_path=ref_audio_path, + reference_text=ref_text, + speed=speed, + steps=steps, + ) + + # Convert to requested format + audio_bytes, content_type = convert_audio( + result.audio, + result.sample_rate, + output_format, + ) + + # Return audio response + return Response( + content=audio_bytes, + media_type=content_type, + headers={ + "X-Model": "f5-tts", + "X-Voice-ID": voice_id or "custom", + "X-Duration": str(result.duration), + "X-Sample-Rate": str(result.sample_rate), + }, + ) + + except HTTPException: + raise + except RuntimeError as e: + raise HTTPException(status_code=500, detail=str(e)) + except Exception as e: + logger.error(f"F5-TTS synthesis error: {e}") + raise HTTPException(status_code=500, detail=f"Voice cloning synthesis failed: {e}") + finally: + # Clean up temp file + if temp_file_path: + cleanup_temp_file(temp_file_path) + + +# ============================================================================ +# Auto-Selection Endpoint +# ============================================================================ + + +@app.post("/synthesize/auto") +async def synthesize_auto(request: AutoRequest): + """ + Auto-select the best TTS model based on voice parameter. + + - If voice is a Kokoro preset: Use Kokoro + - If voice is a registered custom voice: Use F5-TTS + - If no voice specified: Use Kokoro with default voice + """ + # Validate text + if len(request.text) > MAX_TEXT_LENGTH: + raise HTTPException( + status_code=400, + detail=f"Text exceeds maximum length of {MAX_TEXT_LENGTH} characters", + ) + + if not request.text.strip(): + raise HTTPException(status_code=400, detail="Text cannot be empty") + + # Determine which model to use + voice = request.voice or DEFAULT_KOKORO_VOICE + + # Check if it's a Kokoro voice + if voice in KOKORO_VOICES: + kokoro_request = KokoroRequest( + text=request.text, + voice=voice, + speed=request.speed, + output_format=request.output_format, + ) + return await synthesize_with_kokoro(kokoro_request) + + # Check if it's a registered custom voice + voice_manager = get_voice_manager() + if voice_manager.voice_exists(voice): + # Use F5-TTS with registered voice + # Create a form-like context for the F5 endpoint + custom_voice = voice_manager.get_voice(voice) + try: + result = await synthesize_f5( + text=request.text, + reference_audio_path=custom_voice.audio_path, + reference_text=custom_voice.transcript, + speed=request.speed, + ) + + # Convert to requested format + output_format = request.output_format.lower() + audio_bytes, content_type = convert_audio( + result.audio, + result.sample_rate, + output_format, + ) + + return Response( + content=audio_bytes, + media_type=content_type, + headers={ + "X-Model": "f5-tts", + "X-Voice-ID": voice, + "X-Duration": str(result.duration), + "X-Sample-Rate": str(result.sample_rate), + }, + ) + except Exception as e: + logger.error(f"F5-TTS auto synthesis error: {e}") + raise HTTPException(status_code=500, detail=f"Voice synthesis failed: {e}") + + # Unknown voice - fall back to Kokoro with default + logger.warning(f"Unknown voice '{voice}', falling back to Kokoro default") + kokoro_request = KokoroRequest( + text=request.text, + voice=DEFAULT_KOKORO_VOICE, + speed=request.speed, + output_format=request.output_format, + ) + return await synthesize_with_kokoro(kokoro_request) + + +# ============================================================================ +# Error Handler +# ============================================================================ + + +@app.exception_handler(Exception) +async def global_exception_handler(request, exc): + """Handle uncaught exceptions.""" + logger.error(f"Unhandled exception: {exc}") + return Response( + content=f'{{"error": "Internal server error", "detail": "{str(exc)}"}}', + status_code=500, + media_type="application/json", + ) + + +# ============================================================================ +# Main +# ============================================================================ + + +if __name__ == "__main__": + import uvicorn + + uvicorn.run(app, host="0.0.0.0", port=PORT) diff --git a/services/mana-tts/app/voice_manager.py b/services/mana-tts/app/voice_manager.py new file mode 100644 index 000000000..c489bd4e2 --- /dev/null +++ b/services/mana-tts/app/voice_manager.py @@ -0,0 +1,275 @@ +""" +Voice Manager for registering and managing custom voices. +Handles pre-defined voices from the voices/ directory and runtime-registered voices. +""" + +import json +import logging +import os +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Optional + +logger = logging.getLogger(__name__) + +# Base directory for voices +VOICES_DIR = Path(__file__).parent.parent / "voices" + +# Registry file for custom voices +REGISTRY_FILE = VOICES_DIR / "registry.json" + + +@dataclass +class CustomVoice: + """Custom voice registration.""" + + id: str + name: str + description: str + audio_path: str + transcript: str + created_at: str # ISO format timestamp + + +class VoiceManager: + """Manages custom voice registrations for F5-TTS.""" + + def __init__(self, voices_dir: Path = VOICES_DIR): + self.voices_dir = voices_dir + self.registry_file = voices_dir / "registry.json" + self._voices: dict[str, CustomVoice] = {} + self._load_registry() + self._scan_predefined_voices() + + def _load_registry(self) -> None: + """Load voice registry from disk.""" + if not self.registry_file.exists(): + logger.info("No voice registry found, starting fresh") + return + + try: + with open(self.registry_file, "r") as f: + data = json.load(f) + + for voice_id, voice_data in data.items(): + # Verify audio file exists + if Path(voice_data["audio_path"]).exists(): + self._voices[voice_id] = CustomVoice(**voice_data) + else: + logger.warning( + f"Voice '{voice_id}' audio file not found: {voice_data['audio_path']}" + ) + + logger.info(f"Loaded {len(self._voices)} custom voices from registry") + + except Exception as e: + logger.error(f"Failed to load voice registry: {e}") + + def _save_registry(self) -> None: + """Save voice registry to disk.""" + try: + data = { + voice_id: asdict(voice) + for voice_id, voice in self._voices.items() + } + with open(self.registry_file, "w") as f: + json.dump(data, f, indent=2) + logger.info("Voice registry saved") + except Exception as e: + logger.error(f"Failed to save voice registry: {e}") + + def _scan_predefined_voices(self) -> None: + """Scan voices directory for pre-defined voices.""" + if not self.voices_dir.exists(): + return + + # Look for voice directories with audio + transcript + for voice_dir in self.voices_dir.iterdir(): + if not voice_dir.is_dir(): + continue + + voice_id = voice_dir.name + if voice_id in self._voices: + continue # Already registered + + # Look for audio file + audio_file = None + for ext in [".wav", ".mp3", ".m4a", ".flac"]: + candidate = voice_dir / f"reference{ext}" + if candidate.exists(): + audio_file = candidate + break + + # Look for transcript + transcript_file = voice_dir / "transcript.txt" + if not transcript_file.exists(): + continue + + if not audio_file: + logger.warning(f"No reference audio found in {voice_dir}") + continue + + # Load transcript + try: + transcript = transcript_file.read_text().strip() + except Exception as e: + logger.warning(f"Failed to read transcript for {voice_id}: {e}") + continue + + # Load metadata if exists + metadata_file = voice_dir / "metadata.json" + name = voice_id + description = f"Pre-defined voice: {voice_id}" + + if metadata_file.exists(): + try: + with open(metadata_file, "r") as f: + metadata = json.load(f) + name = metadata.get("name", name) + description = metadata.get("description", description) + except Exception: + pass + + # Register pre-defined voice + from datetime import datetime + + self._voices[voice_id] = CustomVoice( + id=voice_id, + name=name, + description=description, + audio_path=str(audio_file), + transcript=transcript, + created_at=datetime.now().isoformat(), + ) + logger.info(f"Found pre-defined voice: {voice_id}") + + def register_voice( + self, + voice_id: str, + name: str, + description: str, + audio_bytes: bytes, + transcript: str, + audio_extension: str = ".wav", + ) -> CustomVoice: + """ + Register a new custom voice. + + Args: + voice_id: Unique voice identifier + name: Display name + description: Voice description + audio_bytes: Reference audio data + transcript: Transcript of the reference audio + audio_extension: Audio file extension + + Returns: + Registered CustomVoice + + Raises: + ValueError: If voice_id already exists + """ + if voice_id in self._voices: + raise ValueError(f"Voice '{voice_id}' already exists") + + # Validate voice_id format + if not voice_id.replace("_", "").replace("-", "").isalnum(): + raise ValueError("Voice ID must be alphanumeric (with _ or -)") + + # Create voice directory + voice_dir = self.voices_dir / voice_id + voice_dir.mkdir(parents=True, exist_ok=True) + + # Save audio file + audio_path = voice_dir / f"reference{audio_extension}" + with open(audio_path, "wb") as f: + f.write(audio_bytes) + + # Save transcript + transcript_file = voice_dir / "transcript.txt" + with open(transcript_file, "w") as f: + f.write(transcript) + + # Create voice entry + from datetime import datetime + + voice = CustomVoice( + id=voice_id, + name=name, + description=description, + audio_path=str(audio_path), + transcript=transcript, + created_at=datetime.now().isoformat(), + ) + + # Save metadata + metadata_file = voice_dir / "metadata.json" + with open(metadata_file, "w") as f: + json.dump( + {"name": name, "description": description}, + f, + indent=2, + ) + + # Add to registry + self._voices[voice_id] = voice + self._save_registry() + + logger.info(f"Registered new voice: {voice_id}") + return voice + + def get_voice(self, voice_id: str) -> Optional[CustomVoice]: + """Get a voice by ID.""" + return self._voices.get(voice_id) + + def delete_voice(self, voice_id: str) -> bool: + """ + Delete a custom voice. + + Args: + voice_id: Voice to delete + + Returns: + True if deleted, False if not found + """ + if voice_id not in self._voices: + return False + + voice = self._voices[voice_id] + + # Remove voice directory + voice_dir = self.voices_dir / voice_id + if voice_dir.exists(): + import shutil + + try: + shutil.rmtree(voice_dir) + except Exception as e: + logger.error(f"Failed to delete voice directory: {e}") + + # Remove from registry + del self._voices[voice_id] + self._save_registry() + + logger.info(f"Deleted voice: {voice_id}") + return True + + def list_voices(self) -> list[CustomVoice]: + """List all registered custom voices.""" + return list(self._voices.values()) + + def voice_exists(self, voice_id: str) -> bool: + """Check if a voice exists.""" + return voice_id in self._voices + + +# Global singleton instance +_voice_manager: Optional[VoiceManager] = None + + +def get_voice_manager() -> VoiceManager: + """Get the global VoiceManager instance.""" + global _voice_manager + if _voice_manager is None: + _voice_manager = VoiceManager() + return _voice_manager diff --git a/services/mana-tts/requirements.txt b/services/mana-tts/requirements.txt new file mode 100644 index 000000000..50cf6a88b --- /dev/null +++ b/services/mana-tts/requirements.txt @@ -0,0 +1,22 @@ +# Web Framework +fastapi>=0.115.0 +uvicorn[standard]>=0.34.0 +python-multipart>=0.0.20 + +# TTS Models (MLX optimized for Apple Silicon) +f5-tts-mlx>=0.2.6 +mlx-audio>=0.1.0 +mlx>=0.21.0 + +# Kokoro dependencies (phonemizer) +misaki[en]>=0.9.0 + +# Audio Processing +soundfile>=0.13.0 +scipy>=1.11.0 +numpy>=1.26.0 +pydub>=0.25.1 +tqdm>=4.67.0 + +# Utilities +aiofiles>=24.1.0 diff --git a/services/mana-tts/setup.sh b/services/mana-tts/setup.sh new file mode 100755 index 000000000..280bfa625 --- /dev/null +++ b/services/mana-tts/setup.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# Setup script for Mana TTS service +# Optimized for Apple Silicon (MLX) + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VENV_DIR="$SCRIPT_DIR/.venv" +PYTHON_VERSION="3.11" + +echo "==========================================" +echo "Mana TTS Setup" +echo "==========================================" +echo "" + +# Check platform +if [[ "$(uname)" != "Darwin" ]]; then + echo "Warning: This service is optimized for macOS with Apple Silicon." + echo "Some features may not work on other platforms." + echo "" +fi + +# Check for Apple Silicon +if [[ "$(uname -m)" != "arm64" ]]; then + echo "Warning: This service is optimized for Apple Silicon (arm64)." + echo "Performance may be reduced on Intel Macs." + echo "" +fi + +# Find Python +if command -v python3.11 &> /dev/null; then + PYTHON_CMD="python3.11" +elif command -v python3 &> /dev/null; then + PYTHON_CMD="python3" +else + echo "Error: Python 3 not found. Please install Python 3.11 or later." + exit 1 +fi + +echo "Using Python: $PYTHON_CMD" +$PYTHON_CMD --version +echo "" + +# Check Python version +PYTHON_MAJOR=$($PYTHON_CMD -c "import sys; print(sys.version_info.major)") +PYTHON_MINOR=$($PYTHON_CMD -c "import sys; print(sys.version_info.minor)") + +if [[ $PYTHON_MAJOR -lt 3 ]] || [[ $PYTHON_MINOR -lt 10 ]]; then + echo "Error: Python 3.10 or later required. Found $PYTHON_MAJOR.$PYTHON_MINOR" + exit 1 +fi + +# Create or recreate virtual environment +if [[ -d "$VENV_DIR" ]]; then + echo "Virtual environment exists at $VENV_DIR" + read -p "Recreate it? (y/N) " -n 1 -r + echo "" + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Removing existing virtual environment..." + rm -rf "$VENV_DIR" + echo "Creating new virtual environment..." + $PYTHON_CMD -m venv "$VENV_DIR" + fi +else + echo "Creating virtual environment..." + $PYTHON_CMD -m venv "$VENV_DIR" +fi + +# Activate virtual environment +echo "Activating virtual environment..." +source "$VENV_DIR/bin/activate" + +# Upgrade pip +echo "" +echo "Upgrading pip..." +pip install --upgrade pip + +# Install dependencies +echo "" +echo "Installing dependencies..." +pip install -r "$SCRIPT_DIR/requirements.txt" + +# Install ffmpeg check (for MP3 support) +echo "" +echo "Checking for ffmpeg (required for MP3 output)..." +if command -v ffmpeg &> /dev/null; then + echo "ffmpeg found: $(which ffmpeg)" +else + echo "Warning: ffmpeg not found. MP3 output will not work." + echo "Install with: brew install ffmpeg" +fi + +# Verify installations +echo "" +echo "Verifying installations..." + +# Test FastAPI +python -c "import fastapi; print(f'FastAPI {fastapi.__version__}')" || { + echo "Error: FastAPI not installed correctly" + exit 1 +} + +# Test soundfile +python -c "import soundfile; print(f'soundfile {soundfile.__version__}')" || { + echo "Error: soundfile not installed correctly" + exit 1 +} + +# Test MLX (on Apple Silicon) +if [[ "$(uname -m)" == "arm64" ]]; then + python -c "import mlx; print(f'MLX {mlx.__version__}')" || { + echo "Warning: MLX not installed correctly. TTS may not work." + } +fi + +# Test mlx-audio +python -c "import mlx_audio; print('mlx-audio installed')" 2>/dev/null || { + echo "Warning: mlx-audio not imported successfully." + echo "You may need to install it manually or models won't load." +} + +# Create directories +echo "" +echo "Creating required directories..." +mkdir -p "$SCRIPT_DIR/voices" +mkdir -p "$SCRIPT_DIR/mlx_models" + +echo "" +echo "==========================================" +echo "Setup Complete!" +echo "==========================================" +echo "" +echo "To start the service:" +echo "" +echo " cd $SCRIPT_DIR" +echo " source .venv/bin/activate" +echo " uvicorn app.main:app --host 0.0.0.0 --port 3022" +echo "" +echo "Or for development with auto-reload:" +echo "" +echo " uvicorn app.main:app --host 0.0.0.0 --port 3022 --reload" +echo "" +echo "Test the service:" +echo "" +echo " curl http://localhost:3022/health" +echo "" +echo "For Mac Mini deployment, run:" +echo "" +echo " ./../../scripts/mac-mini/setup-tts.sh" +echo "" diff --git a/services/mana-tts/voices/.gitkeep b/services/mana-tts/voices/.gitkeep new file mode 100644 index 000000000..e69de29bb