fix(stt): change default model to large-v3 (large-v3-turbo not supported by lightning-whisper-mlx)

This commit is contained in:
Till-JS 2026-01-27 01:36:49 +01:00
parent bf0fa04e7e
commit 92a700ac7e
4 changed files with 11 additions and 16 deletions

View file

@ -64,7 +64,7 @@ cat > "$PLIST_PATH" << EOF
<key>PORT</key>
<string>3020</string>
<key>WHISPER_MODEL</key>
<string>large-v3-turbo</string>
<string>large-v3</string>
<key>PRELOAD_MODELS</key>
<string>false</string>
<key>CORS_ORIGINS</key>

View file

@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
# Environment
PORT = int(os.getenv("PORT", "3020"))
DEFAULT_WHISPER_MODEL = os.getenv("WHISPER_MODEL", "large-v3-turbo")
DEFAULT_WHISPER_MODEL = os.getenv("WHISPER_MODEL", "large-v3")
PRELOAD_MODELS = os.getenv("PRELOAD_MODELS", "false").lower() == "true"
CORS_ORIGINS = os.getenv(
"CORS_ORIGINS",

View file

@ -24,7 +24,7 @@ class TranscriptionResult:
segments: Optional[list] = None
def get_whisper_model(model_name: str = "large-v3-turbo", batch_size: int = 12):
def get_whisper_model(model_name: str = "large-v3", batch_size: int = 12):
"""Get or create Whisper model instance (singleton pattern)."""
global _whisper_model
@ -54,7 +54,7 @@ def get_whisper_model(model_name: str = "large-v3-turbo", batch_size: int = 12):
def transcribe_audio(
audio_path: str,
language: Optional[str] = None,
model_name: str = "large-v3-turbo",
model_name: str = "large-v3",
) -> TranscriptionResult:
"""
Transcribe audio file using Lightning Whisper MLX.
@ -105,7 +105,7 @@ async def transcribe_audio_bytes(
audio_bytes: bytes,
filename: str,
language: Optional[str] = None,
model_name: str = "large-v3-turbo",
model_name: str = "large-v3",
) -> TranscriptionResult:
"""
Transcribe audio from bytes (for API uploads).
@ -142,20 +142,15 @@ async def transcribe_audio_bytes(
pass
# Available models for reference
# Available models for Lightning Whisper MLX
AVAILABLE_MODELS = [
"tiny",
"tiny.en",
"base",
"base.en",
"small",
"small.en",
"base",
"medium",
"medium.en",
"large",
"large-v2",
"large-v3",
"large-v3-turbo", # Recommended for Mac Mini
"large-v3", # Recommended for Mac Mini
"distil-small.en",
"distil-medium.en",
"distil-large-v2",

View file

@ -89,13 +89,13 @@ python -c "import mlx; print(f' MLX installed')" 2>/dev/null || echo " MLX n
python -c "import fastapi; print(f' FastAPI {fastapi.__version__}')"
echo ""
echo "6. Downloading Whisper model (large-v3-turbo)..."
echo " This will download ~1.6 GB on first run..."
echo "6. Downloading Whisper model (large-v3)..."
echo " This will download ~2.9 GB on first run..."
# Pre-download the model
python -c "
from lightning_whisper_mlx import LightningWhisperMLX
print(' Initializing Whisper model...')
whisper = LightningWhisperMLX(model='large-v3-turbo', batch_size=12)
whisper = LightningWhisperMLX(model='large-v3', batch_size=12)
print(' Whisper model ready!')
" || echo " Note: Model will be downloaded on first transcription request"