mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-19 12:21:23 +02:00
fix(stt): change default model to large-v3 (large-v3-turbo not supported by lightning-whisper-mlx)
This commit is contained in:
parent
bf0fa04e7e
commit
92a700ac7e
4 changed files with 11 additions and 16 deletions
|
|
@ -64,7 +64,7 @@ cat > "$PLIST_PATH" << EOF
|
||||||
<key>PORT</key>
|
<key>PORT</key>
|
||||||
<string>3020</string>
|
<string>3020</string>
|
||||||
<key>WHISPER_MODEL</key>
|
<key>WHISPER_MODEL</key>
|
||||||
<string>large-v3-turbo</string>
|
<string>large-v3</string>
|
||||||
<key>PRELOAD_MODELS</key>
|
<key>PRELOAD_MODELS</key>
|
||||||
<string>false</string>
|
<string>false</string>
|
||||||
<key>CORS_ORIGINS</key>
|
<key>CORS_ORIGINS</key>
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
PORT = int(os.getenv("PORT", "3020"))
|
PORT = int(os.getenv("PORT", "3020"))
|
||||||
DEFAULT_WHISPER_MODEL = os.getenv("WHISPER_MODEL", "large-v3-turbo")
|
DEFAULT_WHISPER_MODEL = os.getenv("WHISPER_MODEL", "large-v3")
|
||||||
PRELOAD_MODELS = os.getenv("PRELOAD_MODELS", "false").lower() == "true"
|
PRELOAD_MODELS = os.getenv("PRELOAD_MODELS", "false").lower() == "true"
|
||||||
CORS_ORIGINS = os.getenv(
|
CORS_ORIGINS = os.getenv(
|
||||||
"CORS_ORIGINS",
|
"CORS_ORIGINS",
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ class TranscriptionResult:
|
||||||
segments: Optional[list] = None
|
segments: Optional[list] = None
|
||||||
|
|
||||||
|
|
||||||
def get_whisper_model(model_name: str = "large-v3-turbo", batch_size: int = 12):
|
def get_whisper_model(model_name: str = "large-v3", batch_size: int = 12):
|
||||||
"""Get or create Whisper model instance (singleton pattern)."""
|
"""Get or create Whisper model instance (singleton pattern)."""
|
||||||
global _whisper_model
|
global _whisper_model
|
||||||
|
|
||||||
|
|
@ -54,7 +54,7 @@ def get_whisper_model(model_name: str = "large-v3-turbo", batch_size: int = 12):
|
||||||
def transcribe_audio(
|
def transcribe_audio(
|
||||||
audio_path: str,
|
audio_path: str,
|
||||||
language: Optional[str] = None,
|
language: Optional[str] = None,
|
||||||
model_name: str = "large-v3-turbo",
|
model_name: str = "large-v3",
|
||||||
) -> TranscriptionResult:
|
) -> TranscriptionResult:
|
||||||
"""
|
"""
|
||||||
Transcribe audio file using Lightning Whisper MLX.
|
Transcribe audio file using Lightning Whisper MLX.
|
||||||
|
|
@ -105,7 +105,7 @@ async def transcribe_audio_bytes(
|
||||||
audio_bytes: bytes,
|
audio_bytes: bytes,
|
||||||
filename: str,
|
filename: str,
|
||||||
language: Optional[str] = None,
|
language: Optional[str] = None,
|
||||||
model_name: str = "large-v3-turbo",
|
model_name: str = "large-v3",
|
||||||
) -> TranscriptionResult:
|
) -> TranscriptionResult:
|
||||||
"""
|
"""
|
||||||
Transcribe audio from bytes (for API uploads).
|
Transcribe audio from bytes (for API uploads).
|
||||||
|
|
@ -142,20 +142,15 @@ async def transcribe_audio_bytes(
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Available models for reference
|
# Available models for Lightning Whisper MLX
|
||||||
AVAILABLE_MODELS = [
|
AVAILABLE_MODELS = [
|
||||||
"tiny",
|
"tiny",
|
||||||
"tiny.en",
|
|
||||||
"base",
|
|
||||||
"base.en",
|
|
||||||
"small",
|
"small",
|
||||||
"small.en",
|
"base",
|
||||||
"medium",
|
"medium",
|
||||||
"medium.en",
|
|
||||||
"large",
|
"large",
|
||||||
"large-v2",
|
"large-v2",
|
||||||
"large-v3",
|
"large-v3", # Recommended for Mac Mini
|
||||||
"large-v3-turbo", # Recommended for Mac Mini
|
|
||||||
"distil-small.en",
|
"distil-small.en",
|
||||||
"distil-medium.en",
|
"distil-medium.en",
|
||||||
"distil-large-v2",
|
"distil-large-v2",
|
||||||
|
|
|
||||||
|
|
@ -89,13 +89,13 @@ python -c "import mlx; print(f' MLX installed')" 2>/dev/null || echo " MLX n
|
||||||
python -c "import fastapi; print(f' FastAPI {fastapi.__version__}')"
|
python -c "import fastapi; print(f' FastAPI {fastapi.__version__}')"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "6. Downloading Whisper model (large-v3-turbo)..."
|
echo "6. Downloading Whisper model (large-v3)..."
|
||||||
echo " This will download ~1.6 GB on first run..."
|
echo " This will download ~2.9 GB on first run..."
|
||||||
# Pre-download the model
|
# Pre-download the model
|
||||||
python -c "
|
python -c "
|
||||||
from lightning_whisper_mlx import LightningWhisperMLX
|
from lightning_whisper_mlx import LightningWhisperMLX
|
||||||
print(' Initializing Whisper model...')
|
print(' Initializing Whisper model...')
|
||||||
whisper = LightningWhisperMLX(model='large-v3-turbo', batch_size=12)
|
whisper = LightningWhisperMLX(model='large-v3', batch_size=12)
|
||||||
print(' Whisper model ready!')
|
print(' Whisper model ready!')
|
||||||
" || echo " Note: Model will be downloaded on first transcription request"
|
" || echo " Note: Model will be downloaded on first transcription request"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue