mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-16 06:39:41 +02:00
feat(apps): integrate GPU services into Picture and Chat apps
Picture App: - Update LocalImageGenService to use GPU server (gpu-img.mana.how) - Add API key authentication (GPU_API_KEY) - Increase timeout to 120s (VRAM may need model loading time) Chat App: - Add VoiceModule with STT/TTS integration via GPU server - POST /api/v1/voice/transcribe — Upload audio, get text + word timestamps - POST /api/v1/voice/synthesize — Send text, get audio response - GET /api/v1/voice/health — Check GPU voice services availability - Supports speaker diarization and language selection Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4b0f5a29fd
commit
02bd9d3117
5 changed files with 239 additions and 3 deletions
|
|
@ -12,6 +12,7 @@ import { SpaceModule } from './space/space.module';
|
|||
import { DocumentModule } from './document/document.module';
|
||||
import { ModelModule } from './model/model.module';
|
||||
import { AdminModule } from './admin/admin.module';
|
||||
import { VoiceModule } from './voice/voice.module';
|
||||
import { HealthModule } from '@manacore/shared-nestjs-health';
|
||||
|
||||
@Module({
|
||||
|
|
@ -51,6 +52,7 @@ import { HealthModule } from '@manacore/shared-nestjs-health';
|
|||
DocumentModule,
|
||||
ModelModule,
|
||||
AdminModule,
|
||||
VoiceModule,
|
||||
HealthModule.forRoot({ serviceName: 'chat-backend' }),
|
||||
],
|
||||
})
|
||||
|
|
|
|||
76
apps/chat/apps/backend/src/voice/voice.controller.ts
Normal file
76
apps/chat/apps/backend/src/voice/voice.controller.ts
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
import {
|
||||
Controller,
|
||||
Post,
|
||||
Body,
|
||||
Get,
|
||||
UseGuards,
|
||||
UseInterceptors,
|
||||
UploadedFile,
|
||||
Res,
|
||||
Query,
|
||||
} from '@nestjs/common';
|
||||
import { FileInterceptor } from '@nestjs/platform-express';
|
||||
import type { Response } from 'express';
|
||||
import { JwtAuthGuard } from '@manacore/shared-nestjs-auth';
|
||||
import { VoiceService } from './voice.service';
|
||||
|
||||
@Controller('voice')
|
||||
@UseGuards(JwtAuthGuard)
|
||||
export class VoiceController {
|
||||
constructor(private readonly voiceService: VoiceService) {}
|
||||
|
||||
/** Check GPU voice services availability. */
|
||||
@Get('health')
|
||||
async health() {
|
||||
return this.voiceService.healthCheck();
|
||||
}
|
||||
|
||||
/**
|
||||
* Transcribe audio to text.
|
||||
* POST /api/v1/voice/transcribe
|
||||
*
|
||||
* Body: multipart/form-data with "file" field
|
||||
* Query: ?language=de&diarize=true
|
||||
*/
|
||||
@Post('transcribe')
|
||||
@UseInterceptors(FileInterceptor('file'))
|
||||
async transcribe(
|
||||
@UploadedFile() file: Express.Multer.File,
|
||||
@Query('language') language?: string,
|
||||
@Query('diarize') diarize?: string
|
||||
) {
|
||||
if (!file) {
|
||||
return { error: 'No audio file provided' };
|
||||
}
|
||||
|
||||
return this.voiceService.transcribe(file.buffer, file.originalname, {
|
||||
language: language || 'de',
|
||||
diarize: diarize === 'true',
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Synthesize text to speech.
|
||||
* POST /api/v1/voice/synthesize
|
||||
*
|
||||
* Returns audio file directly.
|
||||
*/
|
||||
@Post('synthesize')
|
||||
async synthesize(
|
||||
@Body() body: { text: string; voice?: string; speed?: number; format?: 'wav' | 'mp3' },
|
||||
@Res() res: Response
|
||||
) {
|
||||
const result = await this.voiceService.synthesize(body.text, {
|
||||
voice: body.voice,
|
||||
speed: body.speed,
|
||||
format: body.format,
|
||||
});
|
||||
|
||||
res.set({
|
||||
'Content-Type': result.contentType,
|
||||
'Content-Length': result.audio.length.toString(),
|
||||
'X-Duration': result.duration.toString(),
|
||||
});
|
||||
res.send(result.audio);
|
||||
}
|
||||
}
|
||||
10
apps/chat/apps/backend/src/voice/voice.module.ts
Normal file
10
apps/chat/apps/backend/src/voice/voice.module.ts
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { VoiceController } from './voice.controller';
|
||||
import { VoiceService } from './voice.service';
|
||||
|
||||
@Module({
|
||||
controllers: [VoiceController],
|
||||
providers: [VoiceService],
|
||||
exports: [VoiceService],
|
||||
})
|
||||
export class VoiceModule {}
|
||||
142
apps/chat/apps/backend/src/voice/voice.service.ts
Normal file
142
apps/chat/apps/backend/src/voice/voice.service.ts
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
|
||||
/**
|
||||
* Voice service for speech-to-text and text-to-speech
|
||||
* using the GPU server's mana-stt and mana-tts services.
|
||||
*/
|
||||
@Injectable()
|
||||
export class VoiceService {
|
||||
private readonly logger = new Logger(VoiceService.name);
|
||||
private readonly sttUrl: string;
|
||||
private readonly ttsUrl: string;
|
||||
private readonly apiKey: string;
|
||||
private readonly timeout: number;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.sttUrl = this.configService.get<string>('GPU_STT_URL') || 'https://gpu-stt.mana.how';
|
||||
this.ttsUrl = this.configService.get<string>('GPU_TTS_URL') || 'https://gpu-tts.mana.how';
|
||||
this.apiKey = this.configService.get<string>('GPU_API_KEY') || '';
|
||||
this.timeout = 60_000;
|
||||
}
|
||||
|
||||
private authHeaders(): Record<string, string> {
|
||||
const headers: Record<string, string> = {};
|
||||
if (this.apiKey) headers['X-API-Key'] = this.apiKey;
|
||||
return headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transcribe audio to text using WhisperX on the GPU server.
|
||||
* Supports word-level timestamps and speaker diarization.
|
||||
*/
|
||||
async transcribe(
|
||||
audioBuffer: Buffer,
|
||||
filename: string,
|
||||
options: {
|
||||
language?: string;
|
||||
diarize?: boolean;
|
||||
} = {}
|
||||
): Promise<{
|
||||
text: string;
|
||||
language?: string;
|
||||
words?: Array<{ word: string; start: number; end: number; speaker?: string }>;
|
||||
speakers?: string[];
|
||||
latencyMs?: number;
|
||||
}> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', new Blob([audioBuffer]), filename);
|
||||
if (options.language) formData.append('language', options.language);
|
||||
formData.append('align', 'true');
|
||||
formData.append('diarize', String(options.diarize ?? false));
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), this.timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.sttUrl}/transcribe`, {
|
||||
method: 'POST',
|
||||
headers: this.authHeaders(),
|
||||
body: formData,
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text().catch(() => '');
|
||||
throw new Error(`STT error ${response.status}: ${error}`);
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Synthesize text to speech using the GPU server's TTS service.
|
||||
* Returns audio as a Buffer.
|
||||
*/
|
||||
async synthesize(
|
||||
text: string,
|
||||
options: {
|
||||
voice?: string;
|
||||
speed?: number;
|
||||
format?: 'wav' | 'mp3';
|
||||
} = {}
|
||||
): Promise<{
|
||||
audio: Buffer;
|
||||
contentType: string;
|
||||
duration: number;
|
||||
}> {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), this.timeout);
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.ttsUrl}/synthesize/auto`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
...this.authHeaders(),
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
voice: options.voice ?? 'de_katja',
|
||||
speed: options.speed ?? 1.0,
|
||||
output_format: options.format ?? 'mp3',
|
||||
}),
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text().catch(() => '');
|
||||
throw new Error(`TTS error ${response.status}: ${error}`);
|
||||
}
|
||||
|
||||
const arrayBuffer = await response.arrayBuffer();
|
||||
return {
|
||||
audio: Buffer.from(arrayBuffer),
|
||||
contentType: response.headers.get('content-type') ?? 'audio/mpeg',
|
||||
duration: parseFloat(response.headers.get('x-duration') ?? '0'),
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
/** Check if GPU voice services are available. */
|
||||
async healthCheck(): Promise<{ stt: boolean; tts: boolean }> {
|
||||
const check = async (url: string): Promise<boolean> => {
|
||||
try {
|
||||
const res = await fetch(`${url}/health`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
});
|
||||
return res.ok;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
const [stt, tts] = await Promise.all([check(this.sttUrl), check(this.ttsUrl)]);
|
||||
return { stt, tts };
|
||||
}
|
||||
}
|
||||
|
|
@ -16,10 +16,13 @@ export class LocalImageGenService {
|
|||
private readonly timeout: number;
|
||||
private isAvailable = false;
|
||||
|
||||
private readonly apiKey?: string;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.baseUrl =
|
||||
this.configService.get<string>('IMAGE_GEN_SERVICE_URL') || 'http://localhost:3025';
|
||||
this.timeout = 60_000; // 60s (FLUX.2 klein is fast, but allow margin)
|
||||
this.configService.get<string>('IMAGE_GEN_SERVICE_URL') || 'https://gpu-img.mana.how';
|
||||
this.apiKey = this.configService.get<string>('GPU_API_KEY');
|
||||
this.timeout = 120_000; // 120s (first request may need to load model into VRAM)
|
||||
this.checkHealth();
|
||||
}
|
||||
|
||||
|
|
@ -63,9 +66,12 @@ export class LocalImageGenService {
|
|||
const controller = new AbortController();
|
||||
setTimeout(() => controller.abort(), this.timeout);
|
||||
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
|
||||
if (this.apiKey) headers['X-API-Key'] = this.apiKey;
|
||||
|
||||
const response = await fetch(`${this.baseUrl}/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
prompt: params.prompt,
|
||||
width: params.width || 1024,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue