managarten/packages/shared-gpu/src/stt-client.ts
Till JS c67ed0df14 feat(gpu-server): add API key auth, VRAM management, and Piper TTS voices
- Add API key authentication to all GPU services (X-API-Key header)
  - /health and /docs remain public (no key needed)
  - Shared key configured via GPU_API_KEY env variable
- Add VRAM auto-unload for mana-image-gen (5min) and mana-stt (10min)
  - FLUX.2 pipeline freed after idle, recovering ~13GB VRAM
  - WhisperX models freed after idle, recovering ~3GB VRAM
- Install Piper TTS voices (Thorsten + Kerstin) for local German TTS
- Update @manacore/shared-gpu client to support apiKey parameter
- Add GPU_API_KEY to .env.development
- Document API auth and VRAM management in setup guide

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 21:54:35 +01:00

62 lines
2.1 KiB
TypeScript

import type { TranscriptionResult, TranscribeOptions, GpuServiceConfig } from './types';
import { resolveServiceUrl } from './resolve-url';
export class SttClient {
private baseUrl: string;
private timeout: number;
private apiKey?: string;
constructor(config: GpuServiceConfig) {
this.baseUrl = resolveServiceUrl(config, 'stt');
this.timeout = config.timeout ?? 60_000;
this.apiKey = config.apiKey;
}
/** Transcribe audio with optional word timestamps and speaker diarization. */
async transcribe(
audioBuffer: Buffer | Blob,
filename: string,
options: TranscribeOptions = {}
): Promise<TranscriptionResult> {
const formData = new FormData();
const blob =
audioBuffer instanceof Blob ? audioBuffer : new Blob([new Uint8Array(audioBuffer)]);
formData.append('file', blob, filename);
if (options.language) formData.append('language', options.language);
if (options.model) formData.append('model', options.model);
formData.append('align', String(options.align ?? true));
formData.append('diarize', String(options.diarize ?? false));
if (options.minSpeakers != null) formData.append('min_speakers', String(options.minSpeakers));
if (options.maxSpeakers != null) formData.append('max_speakers', String(options.maxSpeakers));
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), this.timeout);
try {
const response = await fetch(`${this.baseUrl}/transcribe`, {
method: 'POST',
headers: this.apiKey ? { 'X-API-Key': this.apiKey } : {},
body: formData,
signal: controller.signal,
});
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: response.statusText }));
throw new Error(`STT error ${response.status}: ${(error as { detail: string }).detail}`);
}
return (await response.json()) as TranscriptionResult;
} finally {
clearTimeout(timer);
}
}
/** Check if the STT service is healthy. */
async health(): Promise<{ status: string; whisperx: boolean }> {
const response = await fetch(`${this.baseUrl}/health`, {
signal: AbortSignal.timeout(5000),
});
return (await response.json()) as { status: string; whisperx: boolean };
}
}