feat(gpu-server): complete GPU server setup with AI services, monitoring, and public access

- Set up 5 AI services on Windows GPU server (RTX 3090): - mana-llm (Port 3025): OpenAI-compatible LLM gateway via Ollama - mana-stt (Port 3020): WhisperX with word timestamps + speaker diarization - mana-tts (Port 3022): Kokoro (EN) + Edge TTS (DE) + Piper (local DE) - mana-image-gen (Port 3023): FLUX.2 klein 4B image generation - Ollama (Port 11434): gemma3:4b/12b, qwen2.5-coder:14b, nomic-embed-text - Add @manacore/shared-gpu TypeScript client package with SttClient, TtsClient, ImageClient - Add CUDA-compatible whisper_service using faster-whisper for Windows - Configure public access via Cloudflare Tunnel (gpu-llm/stt/tts/img.mana.how) - Add Loki log aggregator (Docker on Mac Mini) + log shipper on GPU server - Add GPU scrape targets to Prometheus/VictoriaMetrics config - Add Grafana Loki datasource for GPU service logs - Add health check with auto-restart, log rotation, and log shipping - Document complete setup: Always-On config, troubleshooting, architecture Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-20 01:21:24 +02:00 · 2026-03-27 21:35:30 +01:00 · 2026-03-27 21:35:30 +01:00 · 16e0d99c5a
commit 16e0d99c5a
parent 7754cf6e00
13 changed files with 1245 additions and 7 deletions
--- a/packages/shared-gpu/src/gpu-client.ts
+++ b/packages/shared-gpu/src/gpu-client.ts
@ -0,0 +1,56 @@
+import type { GpuServiceConfig } from './types';
+import { SttClient } from './stt-client';
+import { TtsClient } from './tts-client';
+import { ImageClient } from './image-client';
+
+/**
+ * Unified client for all Mana GPU services.
+ *
+ * @example Public URLs (from anywhere):
+ * ```ts
+ * const gpu = new GpuClient({ baseUrl: 'https://gpu.mana.how' });
+ * ```
+ *
+ * @example LAN (direct):
+ * ```ts
+ * const gpu = new GpuClient({ baseUrl: 'http://192.168.178.11' });
+ * ```
+ *
+ * @example Custom URLs:
+ * ```ts
+ * const gpu = new GpuClient({
+ *   baseUrl: '',
+ *   urls: { stt: 'https://gpu-stt.mana.how', tts: 'https://gpu-tts.mana.how' },
+ * });
+ * ```
+ */
+export class GpuClient {
+	public readonly stt: SttClient;
+	public readonly tts: TtsClient;
+	public readonly image: ImageClient;
+
+	constructor(config: GpuServiceConfig) {
+		this.stt = new SttClient(config);
+		this.tts = new TtsClient(config);
+		this.image = new ImageClient(config);
+	}
+
+	/** Check health of all GPU services. */
+	async healthCheck(): Promise<{
+		stt: boolean;
+		tts: boolean;
+		image: boolean;
+	}> {
+		const [sttHealth, ttsHealth, imageHealth] = await Promise.allSettled([
+			this.stt.health(),
+			this.tts.health(),
+			this.image.health(),
+		]);
+
+		return {
+			stt: sttHealth.status === 'fulfilled' && sttHealth.value.status === 'healthy',
+			tts: ttsHealth.status === 'fulfilled' && ttsHealth.value.status === 'healthy',
+			image: imageHealth.status === 'fulfilled' && imageHealth.value.status === 'healthy',
+		};
+	}
+}
--- a/packages/shared-gpu/src/image-client.ts
+++ b/packages/shared-gpu/src/image-client.ts
@ -0,0 +1,72 @@
+import type {
+	GenerateImageOptions,
+	GenerateImageResult,
+	ImageGenHealthResponse,
+	GpuServiceConfig,
+} from './types';
+import { resolveServiceUrl } from './resolve-url';
+
+export class ImageClient {
+	private baseUrl: string;
+	private timeout: number;
+
+	constructor(config: GpuServiceConfig) {
+		this.baseUrl = resolveServiceUrl(config, 'image');
+		this.timeout = config.timeout ?? 120_000;
+	}
+
+	/** Generate an image from a text prompt. */
+	async generate(options: GenerateImageOptions): Promise<GenerateImageResult> {
+		const controller = new AbortController();
+		const timer = setTimeout(() => controller.abort(), this.timeout);
+
+		try {
+			const response = await fetch(`${this.baseUrl}/generate`, {
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify({
+					prompt: options.prompt,
+					width: options.width ?? 1024,
+					height: options.height ?? 1024,
+					steps: options.steps ?? 4,
+					seed: options.seed,
+					output_format: options.outputFormat ?? 'png',
+				}),
+				signal: controller.signal,
+			});
+
+			if (!response.ok) {
+				const error = await response.json().catch(() => ({ detail: response.statusText }));
+				throw new Error(
+					`Image generation error ${response.status}: ${(error as { detail: string }).detail}`
+				);
+			}
+
+			return (await response.json()) as GenerateImageResult;
+		} finally {
+			clearTimeout(timer);
+		}
+	}
+
+	/** Get the full URL for a generated image. */
+	imageUrl(relativePath: string): string {
+		return `${this.baseUrl}${relativePath}`;
+	}
+
+	/** Download a generated image as ArrayBuffer. */
+	async downloadImage(relativePath: string): Promise<ArrayBuffer> {
+		const response = await fetch(this.imageUrl(relativePath), {
+			signal: AbortSignal.timeout(30_000),
+		});
+		if (!response.ok) throw new Error(`Failed to download image: ${response.status}`);
+		return response.arrayBuffer();
+	}
+
+	/** Check if the image generation service is healthy. */
+	async health(): Promise<ImageGenHealthResponse> {
+		const response = await fetch(`${this.baseUrl}/health`, {
+			signal: AbortSignal.timeout(5000),
+		});
+		return (await response.json()) as ImageGenHealthResponse;
+	}
+}
--- a/packages/shared-gpu/src/index.ts
+++ b/packages/shared-gpu/src/index.ts
@ -0,0 +1,24 @@
+export { GpuClient } from './gpu-client';
+export { SttClient } from './stt-client';
+export { TtsClient } from './tts-client';
+export { ImageClient } from './image-client';
+export { resolveServiceUrl } from './resolve-url';
+export { GPU_PUBLIC_URLS, GPU_LAN_URLS } from './types';
+export type {
+	// Config
+	GpuServiceConfig,
+	// STT
+	TranscriptionResult,
+	TranscribeOptions,
+	WordTimestamp,
+	Segment,
+	// TTS
+	SynthesizeOptions,
+	TTSVoice,
+	TTSVoiceType,
+	TTSHealthResponse,
+	// Image
+	GenerateImageOptions,
+	GenerateImageResult,
+	ImageGenHealthResponse,
+} from './types';
--- a/packages/shared-gpu/src/resolve-url.ts
+++ b/packages/shared-gpu/src/resolve-url.ts
@ -0,0 +1,31 @@
+import type { GpuServiceConfig } from './types';
+import { GPU_PUBLIC_URLS } from './types';
+
+type ServiceKey = 'llm' | 'stt' | 'tts' | 'image' | 'ollama';
+
+const LAN_PORTS: Record<ServiceKey, number> = {
+	llm: 3025,
+	stt: 3020,
+	tts: 3022,
+	image: 3023,
+	ollama: 11434,
+};
+
+/** Resolve the URL for a specific GPU service based on config. */
+export function resolveServiceUrl(config: GpuServiceConfig, service: ServiceKey): string {
+	// 1. Explicit override
+	if (config.urls?.[service]) {
+		// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+		return config.urls[service]!;
+	}
+
+	const base = config.baseUrl;
+
+	// 2. Public mode: "https://gpu.mana.how" → "https://gpu-stt.mana.how"
+	if (base.includes('gpu.mana.how')) {
+		return GPU_PUBLIC_URLS[service];
+	}
+
+	// 3. LAN mode: "http://192.168.178.11" → "http://192.168.178.11:3020"
+	return `${base.replace(/\/$/, '')}:${LAN_PORTS[service]}`;
+}
--- a/packages/shared-gpu/src/stt-client.ts
+++ b/packages/shared-gpu/src/stt-client.ts
@ -0,0 +1,59 @@
+import type { TranscriptionResult, TranscribeOptions, GpuServiceConfig } from './types';
+import { resolveServiceUrl } from './resolve-url';
+
+export class SttClient {
+	private baseUrl: string;
+	private timeout: number;
+
+	constructor(config: GpuServiceConfig) {
+		this.baseUrl = resolveServiceUrl(config, 'stt');
+		this.timeout = config.timeout ?? 60_000;
+	}
+
+	/** Transcribe audio with optional word timestamps and speaker diarization. */
+	async transcribe(
+		audioBuffer: Buffer | Blob,
+		filename: string,
+		options: TranscribeOptions = {}
+	): Promise<TranscriptionResult> {
+		const formData = new FormData();
+		const blob =
+			audioBuffer instanceof Blob ? audioBuffer : new Blob([new Uint8Array(audioBuffer)]);
+		formData.append('file', blob, filename);
+
+		if (options.language) formData.append('language', options.language);
+		if (options.model) formData.append('model', options.model);
+		formData.append('align', String(options.align ?? true));
+		formData.append('diarize', String(options.diarize ?? false));
+		if (options.minSpeakers != null) formData.append('min_speakers', String(options.minSpeakers));
+		if (options.maxSpeakers != null) formData.append('max_speakers', String(options.maxSpeakers));
+
+		const controller = new AbortController();
+		const timer = setTimeout(() => controller.abort(), this.timeout);
+
+		try {
+			const response = await fetch(`${this.baseUrl}/transcribe`, {
+				method: 'POST',
+				body: formData,
+				signal: controller.signal,
+			});
+
+			if (!response.ok) {
+				const error = await response.json().catch(() => ({ detail: response.statusText }));
+				throw new Error(`STT error ${response.status}: ${(error as { detail: string }).detail}`);
+			}
+
+			return (await response.json()) as TranscriptionResult;
+		} finally {
+			clearTimeout(timer);
+		}
+	}
+
+	/** Check if the STT service is healthy. */
+	async health(): Promise<{ status: string; whisperx: boolean }> {
+		const response = await fetch(`${this.baseUrl}/health`, {
+			signal: AbortSignal.timeout(5000),
+		});
+		return (await response.json()) as { status: string; whisperx: boolean };
+	}
+}
--- a/packages/shared-gpu/src/tts-client.ts
+++ b/packages/shared-gpu/src/tts-client.ts
@ -0,0 +1,67 @@
+import type { SynthesizeOptions, TTSVoice, TTSHealthResponse, GpuServiceConfig } from './types';
+import { resolveServiceUrl } from './resolve-url';
+
+export class TtsClient {
+	private baseUrl: string;
+	private timeout: number;
+
+	constructor(config: GpuServiceConfig) {
+		this.baseUrl = resolveServiceUrl(config, 'tts');
+		this.timeout = config.timeout ?? 30_000;
+	}
+
+	/** Synthesize speech. Returns audio as ArrayBuffer. */
+	async synthesize(options: SynthesizeOptions): Promise<{
+		audio: ArrayBuffer;
+		contentType: string;
+		voice: string;
+		duration: number;
+	}> {
+		const controller = new AbortController();
+		const timer = setTimeout(() => controller.abort(), this.timeout);
+
+		try {
+			const response = await fetch(`${this.baseUrl}/synthesize/auto`, {
+				method: 'POST',
+				headers: { 'Content-Type': 'application/json' },
+				body: JSON.stringify({
+					text: options.text,
+					voice: options.voice,
+					speed: options.speed ?? 1.0,
+					output_format: options.outputFormat ?? 'wav',
+				}),
+				signal: controller.signal,
+			});
+
+			if (!response.ok) {
+				const error = await response.json().catch(() => ({ detail: response.statusText }));
+				throw new Error(`TTS error ${response.status}: ${(error as { detail: string }).detail}`);
+			}
+
+			return {
+				audio: await response.arrayBuffer(),
+				contentType: response.headers.get('content-type') ?? 'audio/wav',
+				voice: response.headers.get('x-voice') ?? options.voice ?? 'default',
+				duration: parseFloat(response.headers.get('x-duration') ?? '0'),
+			};
+		} finally {
+			clearTimeout(timer);
+		}
+	}
+
+	/** Get available voices. */
+	async voices(): Promise<{ kokoro_voices: TTSVoice[]; custom_voices: TTSVoice[] }> {
+		const response = await fetch(`${this.baseUrl}/voices`, {
+			signal: AbortSignal.timeout(5000),
+		});
+		return (await response.json()) as { kokoro_voices: TTSVoice[]; custom_voices: TTSVoice[] };
+	}
+
+	/** Check if the TTS service is healthy. */
+	async health(): Promise<TTSHealthResponse> {
+		const response = await fetch(`${this.baseUrl}/health`, {
+			signal: AbortSignal.timeout(5000),
+		});
+		return (await response.json()) as TTSHealthResponse;
+	}
+}
--- a/packages/shared-gpu/src/types.ts
+++ b/packages/shared-gpu/src/types.ts
@ -0,0 +1,142 @@
+// ============================================================================
+// STT Types
+// ============================================================================
+
+export interface WordTimestamp {
+	word: string;
+	start: number;
+	end: number;
+	score?: number;
+	speaker?: string;
+}
+
+export interface Segment {
+	start: number;
+	end: number;
+	text: string;
+	speaker?: string;
+}
+
+export interface TranscriptionResult {
+	text: string;
+	language?: string;
+	model: string;
+	latency_ms?: number;
+	duration_seconds?: number;
+	words?: WordTimestamp[];
+	segments?: Segment[];
+	speakers?: string[];
+}
+
+export interface TranscribeOptions {
+	language?: string;
+	model?: string;
+	/** Enable word-level timestamp alignment (default: true) */
+	align?: boolean;
+	/** Enable speaker diarization (default: false) */
+	diarize?: boolean;
+	minSpeakers?: number;
+	maxSpeakers?: number;
+}
+
+// ============================================================================
+// TTS Types
+// ============================================================================
+
+export interface SynthesizeOptions {
+	text: string;
+	voice?: string;
+	speed?: number;
+	outputFormat?: 'wav' | 'mp3';
+}
+
+export type TTSVoiceType = 'kokoro' | 'piper' | 'edge' | 'f5_custom';
+
+export interface TTSVoice {
+	id: string;
+	name: string;
+	description: string;
+	type: TTSVoiceType;
+}
+
+export interface TTSHealthResponse {
+	status: string;
+	service: string;
+	models_loaded: Record<string, boolean>;
+	auth_required: boolean;
+}
+
+// ============================================================================
+// Image Generation Types
+// ============================================================================
+
+export interface GenerateImageOptions {
+	prompt: string;
+	width?: number;
+	height?: number;
+	steps?: number;
+	seed?: number;
+	outputFormat?: 'png' | 'jpg';
+}
+
+export interface GenerateImageResult {
+	success: boolean;
+	image_url: string;
+	prompt: string;
+	width: number;
+	height: number;
+	steps: number;
+	seed: number;
+	generation_time: number;
+}
+
+export interface ImageGenHealthResponse {
+	status: string;
+	service: string;
+	flux_available: boolean;
+}
+
+// ============================================================================
+// GPU Service Config
+// ============================================================================
+
+export interface GpuServiceConfig {
+	/**
+	 * Base URL of the GPU server.
+	 *
+	 * LAN mode (single host, different ports):
+	 *   `http://192.168.178.11` → :3025, :3020, :3022, :3023
+	 *
+	 * Public mode (different hostnames):
+	 *   `https://gpu.mana.how` → gpu-llm.mana.how, gpu-stt.mana.how, etc.
+	 */
+	baseUrl: string;
+	/** Override individual service URLs (takes precedence over baseUrl) */
+	urls?: {
+		llm?: string;
+		stt?: string;
+		tts?: string;
+		image?: string;
+		ollama?: string;
+	};
+	/** Request timeout in ms (default: 30000) */
+	timeout?: number;
+}
+
+/** Default public URLs */
+export const GPU_PUBLIC_URLS = {
+	llm: 'https://gpu-llm.mana.how',
+	stt: 'https://gpu-stt.mana.how',
+	tts: 'https://gpu-tts.mana.how',
+	image: 'https://gpu-img.mana.how',
+	ollama: 'https://gpu-ollama.mana.how',
+} as const;
+
+/** Default LAN URLs */
+export const GPU_LAN_URLS = {
+	llm: 'http://192.168.178.11:3025',
+	stt: 'http://192.168.178.11:3020',
+	tts: 'http://192.168.178.11:3022',
+	image: 'http://192.168.178.11:3023',
+	ollama: 'http://192.168.178.11:11434',
+} as const;