diff --git a/scripts/mac-mini/setup-stt.sh b/scripts/mac-mini/setup-stt.sh index e3b3b672e..eea3e4e1d 100755 --- a/scripts/mac-mini/setup-stt.sh +++ b/scripts/mac-mini/setup-stt.sh @@ -64,7 +64,7 @@ cat > "$PLIST_PATH" << EOF PORT 3020 WHISPER_MODEL - large-v3-turbo + large-v3 PRELOAD_MODELS false CORS_ORIGINS diff --git a/services/mana-stt/app/main.py b/services/mana-stt/app/main.py index a327a304e..717115f2f 100644 --- a/services/mana-stt/app/main.py +++ b/services/mana-stt/app/main.py @@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) # Environment PORT = int(os.getenv("PORT", "3020")) -DEFAULT_WHISPER_MODEL = os.getenv("WHISPER_MODEL", "large-v3-turbo") +DEFAULT_WHISPER_MODEL = os.getenv("WHISPER_MODEL", "large-v3") PRELOAD_MODELS = os.getenv("PRELOAD_MODELS", "false").lower() == "true" CORS_ORIGINS = os.getenv( "CORS_ORIGINS", diff --git a/services/mana-stt/app/whisper_service.py b/services/mana-stt/app/whisper_service.py index a41556c01..db5e17ca9 100644 --- a/services/mana-stt/app/whisper_service.py +++ b/services/mana-stt/app/whisper_service.py @@ -24,7 +24,7 @@ class TranscriptionResult: segments: Optional[list] = None -def get_whisper_model(model_name: str = "large-v3-turbo", batch_size: int = 12): +def get_whisper_model(model_name: str = "large-v3", batch_size: int = 12): """Get or create Whisper model instance (singleton pattern).""" global _whisper_model @@ -54,7 +54,7 @@ def get_whisper_model(model_name: str = "large-v3-turbo", batch_size: int = 12): def transcribe_audio( audio_path: str, language: Optional[str] = None, - model_name: str = "large-v3-turbo", + model_name: str = "large-v3", ) -> TranscriptionResult: """ Transcribe audio file using Lightning Whisper MLX. @@ -105,7 +105,7 @@ async def transcribe_audio_bytes( audio_bytes: bytes, filename: str, language: Optional[str] = None, - model_name: str = "large-v3-turbo", + model_name: str = "large-v3", ) -> TranscriptionResult: """ Transcribe audio from bytes (for API uploads). @@ -142,20 +142,15 @@ async def transcribe_audio_bytes( pass -# Available models for reference +# Available models for Lightning Whisper MLX AVAILABLE_MODELS = [ "tiny", - "tiny.en", - "base", - "base.en", "small", - "small.en", + "base", "medium", - "medium.en", "large", "large-v2", - "large-v3", - "large-v3-turbo", # Recommended for Mac Mini + "large-v3", # Recommended for Mac Mini "distil-small.en", "distil-medium.en", "distil-large-v2", diff --git a/services/mana-stt/setup.sh b/services/mana-stt/setup.sh index 1df05dd9e..2392daa92 100755 --- a/services/mana-stt/setup.sh +++ b/services/mana-stt/setup.sh @@ -89,13 +89,13 @@ python -c "import mlx; print(f' MLX installed')" 2>/dev/null || echo " MLX n python -c "import fastapi; print(f' FastAPI {fastapi.__version__}')" echo "" -echo "6. Downloading Whisper model (large-v3-turbo)..." -echo " This will download ~1.6 GB on first run..." +echo "6. Downloading Whisper model (large-v3)..." +echo " This will download ~2.9 GB on first run..." # Pre-download the model python -c " from lightning_whisper_mlx import LightningWhisperMLX print(' Initializing Whisper model...') -whisper = LightningWhisperMLX(model='large-v3-turbo', batch_size=12) +whisper = LightningWhisperMLX(model='large-v3', batch_size=12) print(' Whisper model ready!') " || echo " Note: Model will be downloaded on first transcription request"