mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-15 17:19:40 +02:00
- Add API key authentication to all GPU services (X-API-Key header) - /health and /docs remain public (no key needed) - Shared key configured via GPU_API_KEY env variable - Add VRAM auto-unload for mana-image-gen (5min) and mana-stt (10min) - FLUX.2 pipeline freed after idle, recovering ~13GB VRAM - WhisperX models freed after idle, recovering ~3GB VRAM - Install Piper TTS voices (Thorsten + Kerstin) for local German TTS - Update @manacore/shared-gpu client to support apiKey parameter - Add GPU_API_KEY to .env.development - Document API auth and VRAM management in setup guide Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
62 lines
2.1 KiB
TypeScript
62 lines
2.1 KiB
TypeScript
import type { TranscriptionResult, TranscribeOptions, GpuServiceConfig } from './types';
|
|
import { resolveServiceUrl } from './resolve-url';
|
|
|
|
export class SttClient {
|
|
private baseUrl: string;
|
|
private timeout: number;
|
|
private apiKey?: string;
|
|
|
|
constructor(config: GpuServiceConfig) {
|
|
this.baseUrl = resolveServiceUrl(config, 'stt');
|
|
this.timeout = config.timeout ?? 60_000;
|
|
this.apiKey = config.apiKey;
|
|
}
|
|
|
|
/** Transcribe audio with optional word timestamps and speaker diarization. */
|
|
async transcribe(
|
|
audioBuffer: Buffer | Blob,
|
|
filename: string,
|
|
options: TranscribeOptions = {}
|
|
): Promise<TranscriptionResult> {
|
|
const formData = new FormData();
|
|
const blob =
|
|
audioBuffer instanceof Blob ? audioBuffer : new Blob([new Uint8Array(audioBuffer)]);
|
|
formData.append('file', blob, filename);
|
|
|
|
if (options.language) formData.append('language', options.language);
|
|
if (options.model) formData.append('model', options.model);
|
|
formData.append('align', String(options.align ?? true));
|
|
formData.append('diarize', String(options.diarize ?? false));
|
|
if (options.minSpeakers != null) formData.append('min_speakers', String(options.minSpeakers));
|
|
if (options.maxSpeakers != null) formData.append('max_speakers', String(options.maxSpeakers));
|
|
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), this.timeout);
|
|
|
|
try {
|
|
const response = await fetch(`${this.baseUrl}/transcribe`, {
|
|
method: 'POST',
|
|
headers: this.apiKey ? { 'X-API-Key': this.apiKey } : {},
|
|
body: formData,
|
|
signal: controller.signal,
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const error = await response.json().catch(() => ({ detail: response.statusText }));
|
|
throw new Error(`STT error ${response.status}: ${(error as { detail: string }).detail}`);
|
|
}
|
|
|
|
return (await response.json()) as TranscriptionResult;
|
|
} finally {
|
|
clearTimeout(timer);
|
|
}
|
|
}
|
|
|
|
/** Check if the STT service is healthy. */
|
|
async health(): Promise<{ status: string; whisperx: boolean }> {
|
|
const response = await fetch(`${this.baseUrl}/health`, {
|
|
signal: AbortSignal.timeout(5000),
|
|
});
|
|
return (await response.json()) as { status: string; whisperx: boolean };
|
|
}
|
|
}
|