managarten/services/mana-stt/.env.example

# ManaCore STT Service Configuration
# Copy to .env and adjust values as needed

# Server
PORT=3020

# Whisper (Lightning MLX)
WHISPER_MODEL=large-v3

# Voxtral (Local Models)
# Options: voxtral-mini-3b, voxtral-realtime-4b, voxtral-small-24b
VOXTRAL_MODEL=voxtral-realtime-4b

# WhisperX (CUDA GPU Server)
# Enable WhisperX for rich transcription (diarization, word alignment)
# Requires NVIDIA GPU + requirements-cuda.txt
USE_WHISPERX=false

# WhisperX batch size (higher = faster but more VRAM, 16 works well for RTX 3090)
WHISPERX_BATCH_SIZE=16

# Device and compute type for CUDA
# WHISPER_DEVICE=cuda
# WHISPER_COMPUTE_TYPE=float16

# HuggingFace token for pyannote speaker diarization models
# Required for diarization. Accept terms at:
#   https://huggingface.co/pyannote/speaker-diarization-3.1
#   https://huggingface.co/pyannote/segmentation-3.0
HF_TOKEN=

# Model Loading
# Set to true to preload models on startup (slower startup, faster first request)
PRELOAD_MODELS=false

# Load Management
# Maximum concurrent transcription requests before API fallback
MAX_CONCURRENT_REQUESTS=3

# API Fallback
# Enable automatic fallback to Mistral API when overloaded
API_FALLBACK_ENABLED=true

# Mistral API Key (required for API fallback)
# Get your key at https://console.mistral.ai/
MISTRAL_API_KEY=

# CORS Origins (comma-separated)
CORS_ORIGINS=https://mana.how,https://chat.mana.how,http://localhost:5173

# ===========================================
# Authentication
# ===========================================

# Enable API key authentication (default: true for production)
REQUIRE_AUTH=true

# API Keys (comma-separated, format: key:name)
# Example: sk-abc123:myapp,sk-def456:testuser
API_KEYS=

# Internal API key (no rate limit, for internal services)
# Generate with: openssl rand -hex 32
INTERNAL_API_KEY=

# Rate Limiting
# Requests per window per API key
RATE_LIMIT_REQUESTS=60
# Window size in seconds
RATE_LIMIT_WINDOW=60