managarten/services/mana-stt/scripts/setup-vllm.sh
Till-JS 60394076e5 feat(mana-stt): add vLLM integration for Voxtral transcription
- Add vllm_service.py as proxy to vLLM server for Voxtral 3B/4B
- Add voxtral_api_service.py for Mistral API fallback
- Update main.py with /transcribe/voxtral endpoint using vLLM
- Add /transcribe/auto endpoint with automatic fallback chain
- Create setup-vllm.sh and start-vllm-voxtral.sh scripts
- Add launchd plist files for Mac Mini deployment
- Add install-services.sh for automated service installation

Architecture:
- vLLM server runs Voxtral models on port 8100
- mana-stt proxies to vLLM with Mistral API fallback
- Fallback chain: vLLM -> Mistral API

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-11 16:10:00 +01:00

83 lines
2.5 KiB
Bash
Executable file

#!/bin/bash
# Setup vLLM for Voxtral on Mac Mini M4
#
# vLLM runs in CPU mode on macOS (no CUDA), but still provides
# the optimized inference pipeline for Voxtral models.
#
# Usage: ./scripts/setup-vllm.sh
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SERVICE_DIR="$(dirname "$SCRIPT_DIR")"
VENV_DIR="$SERVICE_DIR/.venv-vllm"
echo "============================================"
echo "vLLM Setup for Voxtral on Mac Mini M4"
echo "============================================"
echo ""
# Check Python version
PYTHON_VERSION=$(python3 --version 2>&1 | awk '{print $2}')
PYTHON_MAJOR=$(echo $PYTHON_VERSION | cut -d. -f1)
PYTHON_MINOR=$(echo $PYTHON_VERSION | cut -d. -f2)
if [[ "$PYTHON_MAJOR" -lt 3 ]] || [[ "$PYTHON_MAJOR" -eq 3 && "$PYTHON_MINOR" -lt 10 ]]; then
echo "Error: Python 3.10+ required (found $PYTHON_VERSION)"
exit 1
fi
echo "Python version: $PYTHON_VERSION"
# Create separate venv for vLLM (to avoid conflicts with whisper)
echo ""
echo "Creating virtual environment for vLLM..."
python3 -m venv "$VENV_DIR"
source "$VENV_DIR/bin/activate"
# Upgrade pip
pip install --upgrade pip --quiet
# Install vLLM with audio support
echo ""
echo "Installing vLLM with audio support..."
echo "This may take a few minutes..."
# Install uv for faster package installation
pip install uv --quiet
# Install vLLM with audio support (nightly for best Voxtral support)
uv pip install "vllm[audio]>=0.10.0" --extra-index-url https://wheels.vllm.ai/nightly 2>&1 || {
echo "Nightly install failed, trying stable..."
uv pip install "vllm[audio]>=0.10.0"
}
# Install mistral-common with audio
uv pip install "mistral-common[audio]>=1.8.1"
echo ""
echo "============================================"
echo "Installation complete!"
echo "============================================"
echo ""
echo "To start Voxtral Mini 3B server:"
echo " source $VENV_DIR/bin/activate"
echo " vllm serve mistralai/Voxtral-Mini-3B-2507 \\"
echo " --tokenizer_mode mistral \\"
echo " --config_format mistral \\"
echo " --load_format mistral \\"
echo " --host 0.0.0.0 \\"
echo " --port 8100"
echo ""
echo "To start Voxtral Realtime 4B server:"
echo " source $VENV_DIR/bin/activate"
echo " vllm serve mistralai/Voxtral-Mini-4B-Realtime-2602 \\"
echo " --host 0.0.0.0 \\"
echo " --port 8100"
echo ""
echo "API Endpoint: http://localhost:8100/v1/audio/transcriptions"
echo ""
echo "Test with:"
echo " curl http://localhost:8100/v1/audio/transcriptions \\"
echo " -F file=@test.mp3 \\"
echo " -F model=mistralai/Voxtral-Mini-3B-2507 \\"
echo " -F language=de"