feat(gpu-server): complete GPU server setup with AI services, monitoring, and public access

- Set up 5 AI services on Windows GPU server (RTX 3090):
  - mana-llm (Port 3025): OpenAI-compatible LLM gateway via Ollama
  - mana-stt (Port 3020): WhisperX with word timestamps + speaker diarization
  - mana-tts (Port 3022): Kokoro (EN) + Edge TTS (DE) + Piper (local DE)
  - mana-image-gen (Port 3023): FLUX.2 klein 4B image generation
  - Ollama (Port 11434): gemma3:4b/12b, qwen2.5-coder:14b, nomic-embed-text

- Add @manacore/shared-gpu TypeScript client package with SttClient, TtsClient, ImageClient
- Add CUDA-compatible whisper_service using faster-whisper for Windows
- Configure public access via Cloudflare Tunnel (gpu-llm/stt/tts/img.mana.how)
- Add Loki log aggregator (Docker on Mac Mini) + log shipper on GPU server
- Add GPU scrape targets to Prometheus/VictoriaMetrics config
- Add Grafana Loki datasource for GPU service logs
- Add health check with auto-restart, log rotation, and log shipping
- Document complete setup: Always-On config, troubleshooting, architecture

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Till JS 2026-03-27 21:35:30 +01:00
parent 7754cf6e00
commit 16e0d99c5a
13 changed files with 1245 additions and 7 deletions

View file

@ -0,0 +1,56 @@
import type { GpuServiceConfig } from './types';
import { SttClient } from './stt-client';
import { TtsClient } from './tts-client';
import { ImageClient } from './image-client';
/**
* Unified client for all Mana GPU services.
*
* @example Public URLs (from anywhere):
* ```ts
* const gpu = new GpuClient({ baseUrl: 'https://gpu.mana.how' });
* ```
*
* @example LAN (direct):
* ```ts
* const gpu = new GpuClient({ baseUrl: 'http://192.168.178.11' });
* ```
*
* @example Custom URLs:
* ```ts
* const gpu = new GpuClient({
* baseUrl: '',
* urls: { stt: 'https://gpu-stt.mana.how', tts: 'https://gpu-tts.mana.how' },
* });
* ```
*/
export class GpuClient {
public readonly stt: SttClient;
public readonly tts: TtsClient;
public readonly image: ImageClient;
constructor(config: GpuServiceConfig) {
this.stt = new SttClient(config);
this.tts = new TtsClient(config);
this.image = new ImageClient(config);
}
/** Check health of all GPU services. */
async healthCheck(): Promise<{
stt: boolean;
tts: boolean;
image: boolean;
}> {
const [sttHealth, ttsHealth, imageHealth] = await Promise.allSettled([
this.stt.health(),
this.tts.health(),
this.image.health(),
]);
return {
stt: sttHealth.status === 'fulfilled' && sttHealth.value.status === 'healthy',
tts: ttsHealth.status === 'fulfilled' && ttsHealth.value.status === 'healthy',
image: imageHealth.status === 'fulfilled' && imageHealth.value.status === 'healthy',
};
}
}

View file

@ -0,0 +1,72 @@
import type {
GenerateImageOptions,
GenerateImageResult,
ImageGenHealthResponse,
GpuServiceConfig,
} from './types';
import { resolveServiceUrl } from './resolve-url';
export class ImageClient {
private baseUrl: string;
private timeout: number;
constructor(config: GpuServiceConfig) {
this.baseUrl = resolveServiceUrl(config, 'image');
this.timeout = config.timeout ?? 120_000;
}
/** Generate an image from a text prompt. */
async generate(options: GenerateImageOptions): Promise<GenerateImageResult> {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), this.timeout);
try {
const response = await fetch(`${this.baseUrl}/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
prompt: options.prompt,
width: options.width ?? 1024,
height: options.height ?? 1024,
steps: options.steps ?? 4,
seed: options.seed,
output_format: options.outputFormat ?? 'png',
}),
signal: controller.signal,
});
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: response.statusText }));
throw new Error(
`Image generation error ${response.status}: ${(error as { detail: string }).detail}`
);
}
return (await response.json()) as GenerateImageResult;
} finally {
clearTimeout(timer);
}
}
/** Get the full URL for a generated image. */
imageUrl(relativePath: string): string {
return `${this.baseUrl}${relativePath}`;
}
/** Download a generated image as ArrayBuffer. */
async downloadImage(relativePath: string): Promise<ArrayBuffer> {
const response = await fetch(this.imageUrl(relativePath), {
signal: AbortSignal.timeout(30_000),
});
if (!response.ok) throw new Error(`Failed to download image: ${response.status}`);
return response.arrayBuffer();
}
/** Check if the image generation service is healthy. */
async health(): Promise<ImageGenHealthResponse> {
const response = await fetch(`${this.baseUrl}/health`, {
signal: AbortSignal.timeout(5000),
});
return (await response.json()) as ImageGenHealthResponse;
}
}

View file

@ -0,0 +1,24 @@
export { GpuClient } from './gpu-client';
export { SttClient } from './stt-client';
export { TtsClient } from './tts-client';
export { ImageClient } from './image-client';
export { resolveServiceUrl } from './resolve-url';
export { GPU_PUBLIC_URLS, GPU_LAN_URLS } from './types';
export type {
// Config
GpuServiceConfig,
// STT
TranscriptionResult,
TranscribeOptions,
WordTimestamp,
Segment,
// TTS
SynthesizeOptions,
TTSVoice,
TTSVoiceType,
TTSHealthResponse,
// Image
GenerateImageOptions,
GenerateImageResult,
ImageGenHealthResponse,
} from './types';

View file

@ -0,0 +1,31 @@
import type { GpuServiceConfig } from './types';
import { GPU_PUBLIC_URLS } from './types';
type ServiceKey = 'llm' | 'stt' | 'tts' | 'image' | 'ollama';
const LAN_PORTS: Record<ServiceKey, number> = {
llm: 3025,
stt: 3020,
tts: 3022,
image: 3023,
ollama: 11434,
};
/** Resolve the URL for a specific GPU service based on config. */
export function resolveServiceUrl(config: GpuServiceConfig, service: ServiceKey): string {
// 1. Explicit override
if (config.urls?.[service]) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return config.urls[service]!;
}
const base = config.baseUrl;
// 2. Public mode: "https://gpu.mana.how" → "https://gpu-stt.mana.how"
if (base.includes('gpu.mana.how')) {
return GPU_PUBLIC_URLS[service];
}
// 3. LAN mode: "http://192.168.178.11" → "http://192.168.178.11:3020"
return `${base.replace(/\/$/, '')}:${LAN_PORTS[service]}`;
}

View file

@ -0,0 +1,59 @@
import type { TranscriptionResult, TranscribeOptions, GpuServiceConfig } from './types';
import { resolveServiceUrl } from './resolve-url';
export class SttClient {
private baseUrl: string;
private timeout: number;
constructor(config: GpuServiceConfig) {
this.baseUrl = resolveServiceUrl(config, 'stt');
this.timeout = config.timeout ?? 60_000;
}
/** Transcribe audio with optional word timestamps and speaker diarization. */
async transcribe(
audioBuffer: Buffer | Blob,
filename: string,
options: TranscribeOptions = {}
): Promise<TranscriptionResult> {
const formData = new FormData();
const blob =
audioBuffer instanceof Blob ? audioBuffer : new Blob([new Uint8Array(audioBuffer)]);
formData.append('file', blob, filename);
if (options.language) formData.append('language', options.language);
if (options.model) formData.append('model', options.model);
formData.append('align', String(options.align ?? true));
formData.append('diarize', String(options.diarize ?? false));
if (options.minSpeakers != null) formData.append('min_speakers', String(options.minSpeakers));
if (options.maxSpeakers != null) formData.append('max_speakers', String(options.maxSpeakers));
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), this.timeout);
try {
const response = await fetch(`${this.baseUrl}/transcribe`, {
method: 'POST',
body: formData,
signal: controller.signal,
});
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: response.statusText }));
throw new Error(`STT error ${response.status}: ${(error as { detail: string }).detail}`);
}
return (await response.json()) as TranscriptionResult;
} finally {
clearTimeout(timer);
}
}
/** Check if the STT service is healthy. */
async health(): Promise<{ status: string; whisperx: boolean }> {
const response = await fetch(`${this.baseUrl}/health`, {
signal: AbortSignal.timeout(5000),
});
return (await response.json()) as { status: string; whisperx: boolean };
}
}

View file

@ -0,0 +1,67 @@
import type { SynthesizeOptions, TTSVoice, TTSHealthResponse, GpuServiceConfig } from './types';
import { resolveServiceUrl } from './resolve-url';
export class TtsClient {
private baseUrl: string;
private timeout: number;
constructor(config: GpuServiceConfig) {
this.baseUrl = resolveServiceUrl(config, 'tts');
this.timeout = config.timeout ?? 30_000;
}
/** Synthesize speech. Returns audio as ArrayBuffer. */
async synthesize(options: SynthesizeOptions): Promise<{
audio: ArrayBuffer;
contentType: string;
voice: string;
duration: number;
}> {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), this.timeout);
try {
const response = await fetch(`${this.baseUrl}/synthesize/auto`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text: options.text,
voice: options.voice,
speed: options.speed ?? 1.0,
output_format: options.outputFormat ?? 'wav',
}),
signal: controller.signal,
});
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: response.statusText }));
throw new Error(`TTS error ${response.status}: ${(error as { detail: string }).detail}`);
}
return {
audio: await response.arrayBuffer(),
contentType: response.headers.get('content-type') ?? 'audio/wav',
voice: response.headers.get('x-voice') ?? options.voice ?? 'default',
duration: parseFloat(response.headers.get('x-duration') ?? '0'),
};
} finally {
clearTimeout(timer);
}
}
/** Get available voices. */
async voices(): Promise<{ kokoro_voices: TTSVoice[]; custom_voices: TTSVoice[] }> {
const response = await fetch(`${this.baseUrl}/voices`, {
signal: AbortSignal.timeout(5000),
});
return (await response.json()) as { kokoro_voices: TTSVoice[]; custom_voices: TTSVoice[] };
}
/** Check if the TTS service is healthy. */
async health(): Promise<TTSHealthResponse> {
const response = await fetch(`${this.baseUrl}/health`, {
signal: AbortSignal.timeout(5000),
});
return (await response.json()) as TTSHealthResponse;
}
}

View file

@ -0,0 +1,142 @@
// ============================================================================
// STT Types
// ============================================================================
export interface WordTimestamp {
word: string;
start: number;
end: number;
score?: number;
speaker?: string;
}
export interface Segment {
start: number;
end: number;
text: string;
speaker?: string;
}
export interface TranscriptionResult {
text: string;
language?: string;
model: string;
latency_ms?: number;
duration_seconds?: number;
words?: WordTimestamp[];
segments?: Segment[];
speakers?: string[];
}
export interface TranscribeOptions {
language?: string;
model?: string;
/** Enable word-level timestamp alignment (default: true) */
align?: boolean;
/** Enable speaker diarization (default: false) */
diarize?: boolean;
minSpeakers?: number;
maxSpeakers?: number;
}
// ============================================================================
// TTS Types
// ============================================================================
export interface SynthesizeOptions {
text: string;
voice?: string;
speed?: number;
outputFormat?: 'wav' | 'mp3';
}
export type TTSVoiceType = 'kokoro' | 'piper' | 'edge' | 'f5_custom';
export interface TTSVoice {
id: string;
name: string;
description: string;
type: TTSVoiceType;
}
export interface TTSHealthResponse {
status: string;
service: string;
models_loaded: Record<string, boolean>;
auth_required: boolean;
}
// ============================================================================
// Image Generation Types
// ============================================================================
export interface GenerateImageOptions {
prompt: string;
width?: number;
height?: number;
steps?: number;
seed?: number;
outputFormat?: 'png' | 'jpg';
}
export interface GenerateImageResult {
success: boolean;
image_url: string;
prompt: string;
width: number;
height: number;
steps: number;
seed: number;
generation_time: number;
}
export interface ImageGenHealthResponse {
status: string;
service: string;
flux_available: boolean;
}
// ============================================================================
// GPU Service Config
// ============================================================================
export interface GpuServiceConfig {
/**
* Base URL of the GPU server.
*
* LAN mode (single host, different ports):
* `http://192.168.178.11` :3025, :3020, :3022, :3023
*
* Public mode (different hostnames):
* `https://gpu.mana.how` gpu-llm.mana.how, gpu-stt.mana.how, etc.
*/
baseUrl: string;
/** Override individual service URLs (takes precedence over baseUrl) */
urls?: {
llm?: string;
stt?: string;
tts?: string;
image?: string;
ollama?: string;
};
/** Request timeout in ms (default: 30000) */
timeout?: number;
}
/** Default public URLs */
export const GPU_PUBLIC_URLS = {
llm: 'https://gpu-llm.mana.how',
stt: 'https://gpu-stt.mana.how',
tts: 'https://gpu-tts.mana.how',
image: 'https://gpu-img.mana.how',
ollama: 'https://gpu-ollama.mana.how',
} as const;
/** Default LAN URLs */
export const GPU_LAN_URLS = {
llm: 'http://192.168.178.11:3025',
stt: 'http://192.168.178.11:3020',
tts: 'http://192.168.178.11:3022',
image: 'http://192.168.178.11:3023',
ollama: 'http://192.168.178.11:11434',
} as const;