mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-22 02:06:42 +02:00
- Add vllm_service.py as proxy to vLLM server for Voxtral 3B/4B - Add voxtral_api_service.py for Mistral API fallback - Update main.py with /transcribe/voxtral endpoint using vLLM - Add /transcribe/auto endpoint with automatic fallback chain - Create setup-vllm.sh and start-vllm-voxtral.sh scripts - Add launchd plist files for Mac Mini deployment - Add install-services.sh for automated service installation Architecture: - vLLM server runs Voxtral models on port 8100 - mana-stt proxies to vLLM with Mistral API fallback - Fallback chain: vLLM -> Mistral API Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
31 lines
825 B
Text
31 lines
825 B
Text
# ManaCore STT Service Configuration
|
|
# Copy to .env and adjust values as needed
|
|
|
|
# Server
|
|
PORT=3020
|
|
|
|
# Whisper (Lightning MLX)
|
|
WHISPER_MODEL=large-v3
|
|
|
|
# Voxtral (Local Models)
|
|
# Options: voxtral-mini-3b, voxtral-realtime-4b, voxtral-small-24b
|
|
VOXTRAL_MODEL=voxtral-realtime-4b
|
|
|
|
# Model Loading
|
|
# Set to true to preload models on startup (slower startup, faster first request)
|
|
PRELOAD_MODELS=false
|
|
|
|
# Load Management
|
|
# Maximum concurrent transcription requests before API fallback
|
|
MAX_CONCURRENT_REQUESTS=3
|
|
|
|
# API Fallback
|
|
# Enable automatic fallback to Mistral API when overloaded
|
|
API_FALLBACK_ENABLED=true
|
|
|
|
# Mistral API Key (required for API fallback)
|
|
# Get your key at https://console.mistral.ai/
|
|
MISTRAL_API_KEY=
|
|
|
|
# CORS Origins (comma-separated)
|
|
CORS_ORIGINS=https://mana.how,https://chat.mana.how,http://localhost:5173
|