mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-20 01:21:24 +02:00
feat(gpu-server): complete GPU server setup with AI services, monitoring, and public access
- Set up 5 AI services on Windows GPU server (RTX 3090): - mana-llm (Port 3025): OpenAI-compatible LLM gateway via Ollama - mana-stt (Port 3020): WhisperX with word timestamps + speaker diarization - mana-tts (Port 3022): Kokoro (EN) + Edge TTS (DE) + Piper (local DE) - mana-image-gen (Port 3023): FLUX.2 klein 4B image generation - Ollama (Port 11434): gemma3:4b/12b, qwen2.5-coder:14b, nomic-embed-text - Add @manacore/shared-gpu TypeScript client package with SttClient, TtsClient, ImageClient - Add CUDA-compatible whisper_service using faster-whisper for Windows - Configure public access via Cloudflare Tunnel (gpu-llm/stt/tts/img.mana.how) - Add Loki log aggregator (Docker on Mac Mini) + log shipper on GPU server - Add GPU scrape targets to Prometheus/VictoriaMetrics config - Add Grafana Loki datasource for GPU service logs - Add health check with auto-restart, log rotation, and log shipping - Document complete setup: Always-On config, troubleshooting, architecture Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
7754cf6e00
commit
16e0d99c5a
13 changed files with 1245 additions and 7 deletions
56
packages/shared-gpu/src/gpu-client.ts
Normal file
56
packages/shared-gpu/src/gpu-client.ts
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import type { GpuServiceConfig } from './types';
|
||||
import { SttClient } from './stt-client';
|
||||
import { TtsClient } from './tts-client';
|
||||
import { ImageClient } from './image-client';
|
||||
|
||||
/**
|
||||
* Unified client for all Mana GPU services.
|
||||
*
|
||||
* @example Public URLs (from anywhere):
|
||||
* ```ts
|
||||
* const gpu = new GpuClient({ baseUrl: 'https://gpu.mana.how' });
|
||||
* ```
|
||||
*
|
||||
* @example LAN (direct):
|
||||
* ```ts
|
||||
* const gpu = new GpuClient({ baseUrl: 'http://192.168.178.11' });
|
||||
* ```
|
||||
*
|
||||
* @example Custom URLs:
|
||||
* ```ts
|
||||
* const gpu = new GpuClient({
|
||||
* baseUrl: '',
|
||||
* urls: { stt: 'https://gpu-stt.mana.how', tts: 'https://gpu-tts.mana.how' },
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export class GpuClient {
|
||||
public readonly stt: SttClient;
|
||||
public readonly tts: TtsClient;
|
||||
public readonly image: ImageClient;
|
||||
|
||||
constructor(config: GpuServiceConfig) {
|
||||
this.stt = new SttClient(config);
|
||||
this.tts = new TtsClient(config);
|
||||
this.image = new ImageClient(config);
|
||||
}
|
||||
|
||||
/** Check health of all GPU services. */
|
||||
async healthCheck(): Promise<{
|
||||
stt: boolean;
|
||||
tts: boolean;
|
||||
image: boolean;
|
||||
}> {
|
||||
const [sttHealth, ttsHealth, imageHealth] = await Promise.allSettled([
|
||||
this.stt.health(),
|
||||
this.tts.health(),
|
||||
this.image.health(),
|
||||
]);
|
||||
|
||||
return {
|
||||
stt: sttHealth.status === 'fulfilled' && sttHealth.value.status === 'healthy',
|
||||
tts: ttsHealth.status === 'fulfilled' && ttsHealth.value.status === 'healthy',
|
||||
image: imageHealth.status === 'fulfilled' && imageHealth.value.status === 'healthy',
|
||||
};
|
||||
}
|
||||
}
|
||||
72
packages/shared-gpu/src/image-client.ts
Normal file
72
packages/shared-gpu/src/image-client.ts
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
import type {
|
||||
GenerateImageOptions,
|
||||
GenerateImageResult,
|
||||
ImageGenHealthResponse,
|
||||
GpuServiceConfig,
|
||||
} from './types';
|
||||
import { resolveServiceUrl } from './resolve-url';
|
||||
|
||||
export class ImageClient {
|
||||
private baseUrl: string;
|
||||
private timeout: number;
|
||||
|
||||
constructor(config: GpuServiceConfig) {
|
||||
this.baseUrl = resolveServiceUrl(config, 'image');
|
||||
this.timeout = config.timeout ?? 120_000;
|
||||
}
|
||||
|
||||
/** Generate an image from a text prompt. */
|
||||
async generate(options: GenerateImageOptions): Promise<GenerateImageResult> {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), this.timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
prompt: options.prompt,
|
||||
width: options.width ?? 1024,
|
||||
height: options.height ?? 1024,
|
||||
steps: options.steps ?? 4,
|
||||
seed: options.seed,
|
||||
output_format: options.outputFormat ?? 'png',
|
||||
}),
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: response.statusText }));
|
||||
throw new Error(
|
||||
`Image generation error ${response.status}: ${(error as { detail: string }).detail}`
|
||||
);
|
||||
}
|
||||
|
||||
return (await response.json()) as GenerateImageResult;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the full URL for a generated image. */
|
||||
imageUrl(relativePath: string): string {
|
||||
return `${this.baseUrl}${relativePath}`;
|
||||
}
|
||||
|
||||
/** Download a generated image as ArrayBuffer. */
|
||||
async downloadImage(relativePath: string): Promise<ArrayBuffer> {
|
||||
const response = await fetch(this.imageUrl(relativePath), {
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
if (!response.ok) throw new Error(`Failed to download image: ${response.status}`);
|
||||
return response.arrayBuffer();
|
||||
}
|
||||
|
||||
/** Check if the image generation service is healthy. */
|
||||
async health(): Promise<ImageGenHealthResponse> {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
return (await response.json()) as ImageGenHealthResponse;
|
||||
}
|
||||
}
|
||||
24
packages/shared-gpu/src/index.ts
Normal file
24
packages/shared-gpu/src/index.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
export { GpuClient } from './gpu-client';
|
||||
export { SttClient } from './stt-client';
|
||||
export { TtsClient } from './tts-client';
|
||||
export { ImageClient } from './image-client';
|
||||
export { resolveServiceUrl } from './resolve-url';
|
||||
export { GPU_PUBLIC_URLS, GPU_LAN_URLS } from './types';
|
||||
export type {
|
||||
// Config
|
||||
GpuServiceConfig,
|
||||
// STT
|
||||
TranscriptionResult,
|
||||
TranscribeOptions,
|
||||
WordTimestamp,
|
||||
Segment,
|
||||
// TTS
|
||||
SynthesizeOptions,
|
||||
TTSVoice,
|
||||
TTSVoiceType,
|
||||
TTSHealthResponse,
|
||||
// Image
|
||||
GenerateImageOptions,
|
||||
GenerateImageResult,
|
||||
ImageGenHealthResponse,
|
||||
} from './types';
|
||||
31
packages/shared-gpu/src/resolve-url.ts
Normal file
31
packages/shared-gpu/src/resolve-url.ts
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import type { GpuServiceConfig } from './types';
|
||||
import { GPU_PUBLIC_URLS } from './types';
|
||||
|
||||
type ServiceKey = 'llm' | 'stt' | 'tts' | 'image' | 'ollama';
|
||||
|
||||
const LAN_PORTS: Record<ServiceKey, number> = {
|
||||
llm: 3025,
|
||||
stt: 3020,
|
||||
tts: 3022,
|
||||
image: 3023,
|
||||
ollama: 11434,
|
||||
};
|
||||
|
||||
/** Resolve the URL for a specific GPU service based on config. */
|
||||
export function resolveServiceUrl(config: GpuServiceConfig, service: ServiceKey): string {
|
||||
// 1. Explicit override
|
||||
if (config.urls?.[service]) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||
return config.urls[service]!;
|
||||
}
|
||||
|
||||
const base = config.baseUrl;
|
||||
|
||||
// 2. Public mode: "https://gpu.mana.how" → "https://gpu-stt.mana.how"
|
||||
if (base.includes('gpu.mana.how')) {
|
||||
return GPU_PUBLIC_URLS[service];
|
||||
}
|
||||
|
||||
// 3. LAN mode: "http://192.168.178.11" → "http://192.168.178.11:3020"
|
||||
return `${base.replace(/\/$/, '')}:${LAN_PORTS[service]}`;
|
||||
}
|
||||
59
packages/shared-gpu/src/stt-client.ts
Normal file
59
packages/shared-gpu/src/stt-client.ts
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
import type { TranscriptionResult, TranscribeOptions, GpuServiceConfig } from './types';
|
||||
import { resolveServiceUrl } from './resolve-url';
|
||||
|
||||
export class SttClient {
|
||||
private baseUrl: string;
|
||||
private timeout: number;
|
||||
|
||||
constructor(config: GpuServiceConfig) {
|
||||
this.baseUrl = resolveServiceUrl(config, 'stt');
|
||||
this.timeout = config.timeout ?? 60_000;
|
||||
}
|
||||
|
||||
/** Transcribe audio with optional word timestamps and speaker diarization. */
|
||||
async transcribe(
|
||||
audioBuffer: Buffer | Blob,
|
||||
filename: string,
|
||||
options: TranscribeOptions = {}
|
||||
): Promise<TranscriptionResult> {
|
||||
const formData = new FormData();
|
||||
const blob =
|
||||
audioBuffer instanceof Blob ? audioBuffer : new Blob([new Uint8Array(audioBuffer)]);
|
||||
formData.append('file', blob, filename);
|
||||
|
||||
if (options.language) formData.append('language', options.language);
|
||||
if (options.model) formData.append('model', options.model);
|
||||
formData.append('align', String(options.align ?? true));
|
||||
formData.append('diarize', String(options.diarize ?? false));
|
||||
if (options.minSpeakers != null) formData.append('min_speakers', String(options.minSpeakers));
|
||||
if (options.maxSpeakers != null) formData.append('max_speakers', String(options.maxSpeakers));
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), this.timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/transcribe`, {
|
||||
method: 'POST',
|
||||
body: formData,
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: response.statusText }));
|
||||
throw new Error(`STT error ${response.status}: ${(error as { detail: string }).detail}`);
|
||||
}
|
||||
|
||||
return (await response.json()) as TranscriptionResult;
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
/** Check if the STT service is healthy. */
|
||||
async health(): Promise<{ status: string; whisperx: boolean }> {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
return (await response.json()) as { status: string; whisperx: boolean };
|
||||
}
|
||||
}
|
||||
67
packages/shared-gpu/src/tts-client.ts
Normal file
67
packages/shared-gpu/src/tts-client.ts
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
import type { SynthesizeOptions, TTSVoice, TTSHealthResponse, GpuServiceConfig } from './types';
|
||||
import { resolveServiceUrl } from './resolve-url';
|
||||
|
||||
export class TtsClient {
|
||||
private baseUrl: string;
|
||||
private timeout: number;
|
||||
|
||||
constructor(config: GpuServiceConfig) {
|
||||
this.baseUrl = resolveServiceUrl(config, 'tts');
|
||||
this.timeout = config.timeout ?? 30_000;
|
||||
}
|
||||
|
||||
/** Synthesize speech. Returns audio as ArrayBuffer. */
|
||||
async synthesize(options: SynthesizeOptions): Promise<{
|
||||
audio: ArrayBuffer;
|
||||
contentType: string;
|
||||
voice: string;
|
||||
duration: number;
|
||||
}> {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), this.timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.baseUrl}/synthesize/auto`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
text: options.text,
|
||||
voice: options.voice,
|
||||
speed: options.speed ?? 1.0,
|
||||
output_format: options.outputFormat ?? 'wav',
|
||||
}),
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({ detail: response.statusText }));
|
||||
throw new Error(`TTS error ${response.status}: ${(error as { detail: string }).detail}`);
|
||||
}
|
||||
|
||||
return {
|
||||
audio: await response.arrayBuffer(),
|
||||
contentType: response.headers.get('content-type') ?? 'audio/wav',
|
||||
voice: response.headers.get('x-voice') ?? options.voice ?? 'default',
|
||||
duration: parseFloat(response.headers.get('x-duration') ?? '0'),
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
/** Get available voices. */
|
||||
async voices(): Promise<{ kokoro_voices: TTSVoice[]; custom_voices: TTSVoice[] }> {
|
||||
const response = await fetch(`${this.baseUrl}/voices`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
return (await response.json()) as { kokoro_voices: TTSVoice[]; custom_voices: TTSVoice[] };
|
||||
}
|
||||
|
||||
/** Check if the TTS service is healthy. */
|
||||
async health(): Promise<TTSHealthResponse> {
|
||||
const response = await fetch(`${this.baseUrl}/health`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
return (await response.json()) as TTSHealthResponse;
|
||||
}
|
||||
}
|
||||
142
packages/shared-gpu/src/types.ts
Normal file
142
packages/shared-gpu/src/types.ts
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
// ============================================================================
|
||||
// STT Types
|
||||
// ============================================================================
|
||||
|
||||
export interface WordTimestamp {
|
||||
word: string;
|
||||
start: number;
|
||||
end: number;
|
||||
score?: number;
|
||||
speaker?: string;
|
||||
}
|
||||
|
||||
export interface Segment {
|
||||
start: number;
|
||||
end: number;
|
||||
text: string;
|
||||
speaker?: string;
|
||||
}
|
||||
|
||||
export interface TranscriptionResult {
|
||||
text: string;
|
||||
language?: string;
|
||||
model: string;
|
||||
latency_ms?: number;
|
||||
duration_seconds?: number;
|
||||
words?: WordTimestamp[];
|
||||
segments?: Segment[];
|
||||
speakers?: string[];
|
||||
}
|
||||
|
||||
export interface TranscribeOptions {
|
||||
language?: string;
|
||||
model?: string;
|
||||
/** Enable word-level timestamp alignment (default: true) */
|
||||
align?: boolean;
|
||||
/** Enable speaker diarization (default: false) */
|
||||
diarize?: boolean;
|
||||
minSpeakers?: number;
|
||||
maxSpeakers?: number;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// TTS Types
|
||||
// ============================================================================
|
||||
|
||||
export interface SynthesizeOptions {
|
||||
text: string;
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
outputFormat?: 'wav' | 'mp3';
|
||||
}
|
||||
|
||||
export type TTSVoiceType = 'kokoro' | 'piper' | 'edge' | 'f5_custom';
|
||||
|
||||
export interface TTSVoice {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
type: TTSVoiceType;
|
||||
}
|
||||
|
||||
export interface TTSHealthResponse {
|
||||
status: string;
|
||||
service: string;
|
||||
models_loaded: Record<string, boolean>;
|
||||
auth_required: boolean;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Image Generation Types
|
||||
// ============================================================================
|
||||
|
||||
export interface GenerateImageOptions {
|
||||
prompt: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
steps?: number;
|
||||
seed?: number;
|
||||
outputFormat?: 'png' | 'jpg';
|
||||
}
|
||||
|
||||
export interface GenerateImageResult {
|
||||
success: boolean;
|
||||
image_url: string;
|
||||
prompt: string;
|
||||
width: number;
|
||||
height: number;
|
||||
steps: number;
|
||||
seed: number;
|
||||
generation_time: number;
|
||||
}
|
||||
|
||||
export interface ImageGenHealthResponse {
|
||||
status: string;
|
||||
service: string;
|
||||
flux_available: boolean;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// GPU Service Config
|
||||
// ============================================================================
|
||||
|
||||
export interface GpuServiceConfig {
|
||||
/**
|
||||
* Base URL of the GPU server.
|
||||
*
|
||||
* LAN mode (single host, different ports):
|
||||
* `http://192.168.178.11` → :3025, :3020, :3022, :3023
|
||||
*
|
||||
* Public mode (different hostnames):
|
||||
* `https://gpu.mana.how` → gpu-llm.mana.how, gpu-stt.mana.how, etc.
|
||||
*/
|
||||
baseUrl: string;
|
||||
/** Override individual service URLs (takes precedence over baseUrl) */
|
||||
urls?: {
|
||||
llm?: string;
|
||||
stt?: string;
|
||||
tts?: string;
|
||||
image?: string;
|
||||
ollama?: string;
|
||||
};
|
||||
/** Request timeout in ms (default: 30000) */
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
/** Default public URLs */
|
||||
export const GPU_PUBLIC_URLS = {
|
||||
llm: 'https://gpu-llm.mana.how',
|
||||
stt: 'https://gpu-stt.mana.how',
|
||||
tts: 'https://gpu-tts.mana.how',
|
||||
image: 'https://gpu-img.mana.how',
|
||||
ollama: 'https://gpu-ollama.mana.how',
|
||||
} as const;
|
||||
|
||||
/** Default LAN URLs */
|
||||
export const GPU_LAN_URLS = {
|
||||
llm: 'http://192.168.178.11:3025',
|
||||
stt: 'http://192.168.178.11:3020',
|
||||
tts: 'http://192.168.178.11:3022',
|
||||
image: 'http://192.168.178.11:3023',
|
||||
ollama: 'http://192.168.178.11:11434',
|
||||
} as const;
|
||||
Loading…
Add table
Add a link
Reference in a new issue