feat(lightwrite): add STT lyrics transcription integration

Integrate mana-stt service for automatic lyrics transcription:

- Add SttService to call mana-stt /transcribe endpoint
- Extend beats schema with transcription status fields
- Add POST /beats/:id/transcribe endpoint
- Add GET /beats/stt/available endpoint
- Update BeatUploader with auto-transcription after upload
- Show transcription progress and retry button on failure
This commit is contained in:
Till-JS 2026-02-16 15:19:14 +01:00
parent 20db01628a
commit 9dc6c111d3
11 changed files with 319 additions and 4 deletions

View file

@ -13,3 +13,7 @@ S3_REGION=us-east-1
S3_ACCESS_KEY=minioadmin
S3_SECRET_KEY=minioadmin
S3_BUCKET=lightwrite-storage
# STT (Speech-to-Text)
MANA_STT_URL=http://localhost:3020
# MANA_STT_API_KEY= # Optional, only if mana-stt requires auth

View file

@ -6,6 +6,7 @@ import { BeatModule } from './beat/beat.module';
import { MarkerModule } from './marker/marker.module';
import { LyricsModule } from './lyrics/lyrics.module';
import { ExportModule } from './export/export.module';
import { SttModule } from './stt/stt.module';
import { HealthModule } from '@manacore/shared-nestjs-health';
@Module({
@ -20,6 +21,7 @@ import { HealthModule } from '@manacore/shared-nestjs-health';
MarkerModule,
LyricsModule,
ExportModule,
SttModule,
HealthModule.forRoot({ serviceName: 'lightwrite-backend' }),
],
})

View file

@ -51,6 +51,14 @@ export class BeatController {
return { beat };
}
// ==================== STT Transcription ====================
@Get('stt/available')
async getSttAvailability() {
const available = await this.beatService.isSttAvailable();
return { available };
}
// ==================== User Beats (Protected) ====================
@Get('project/:projectId')
@ -107,4 +115,14 @@ export class BeatController {
await this.beatService.delete(id, user.userId);
return { success: true };
}
@Post(':id/transcribe')
@UseGuards(JwtAuthGuard)
async transcribeBeat(
@CurrentUser() user: CurrentUserData,
@Param('id', ParseUUIDPipe) id: string
) {
const result = await this.beatService.transcribeBeat(id, user.userId);
return result;
}
}

View file

@ -1,8 +1,11 @@
import { Module } from '@nestjs/common';
import { Module, forwardRef } from '@nestjs/common';
import { BeatController } from './beat.controller';
import { BeatService } from './beat.service';
import { SttModule } from '../stt/stt.module';
import { LyricsModule } from '../lyrics/lyrics.module';
@Module({
imports: [SttModule, forwardRef(() => LyricsModule)],
controllers: [BeatController],
providers: [BeatService],
exports: [BeatService],

View file

@ -1,4 +1,4 @@
import { Injectable, Inject, NotFoundException, BadRequestException } from '@nestjs/common';
import { Injectable, Inject, NotFoundException, BadRequestException, Logger } from '@nestjs/common';
import { eq, and } from 'drizzle-orm';
import { DATABASE_CONNECTION } from '../db/database.module';
import { Database } from '../db/connection';
@ -10,12 +10,19 @@ import {
getContentType,
type StorageClient,
} from '@manacore/shared-storage';
import { SttService } from '../stt/stt.service';
import { LyricsService } from '../lyrics/lyrics.service';
@Injectable()
export class BeatService {
private readonly logger = new Logger(BeatService.name);
private storage: StorageClient;
constructor(@Inject(DATABASE_CONNECTION) private db: Database) {
constructor(
@Inject(DATABASE_CONNECTION) private db: Database,
private sttService: SttService,
private lyricsService: LyricsService
) {
this.storage = createLightWriteStorage();
}
@ -179,4 +186,82 @@ export class BeatService {
return beat;
}
// ==================== STT Transcription ====================
/**
* Check if STT service is available
*/
async isSttAvailable(): Promise<boolean> {
return this.sttService.isAvailable();
}
/**
* Transcribe beat audio and save lyrics to the project
*/
async transcribeBeat(
beatId: string,
userId: string
): Promise<{ beat: Beat; lyrics: string | null }> {
const beat = await this.findByIdOrThrow(beatId);
await this.verifyProjectOwnership(beat.projectId, userId);
// Set status to pending
await this.db
.update(beats)
.set({
transcriptionStatus: 'pending',
transcriptionError: null,
})
.where(eq(beats.id, beatId));
try {
this.logger.log(`Starting transcription for beat ${beatId}`);
// Download audio from storage
const audioBuffer = await this.storage.download(beat.storagePath);
// Call STT service
const result = await this.sttService.transcribe(audioBuffer, beat.filename || 'audio.mp3');
// Save transcribed text as lyrics
const lyricsRecord = await this.lyricsService.createOrUpdate(
beat.projectId,
userId,
result.text
);
// Update beat status to completed
const [updatedBeat] = await this.db
.update(beats)
.set({
transcriptionStatus: 'completed',
transcribedAt: new Date(),
transcriptionError: null,
})
.where(eq(beats.id, beatId))
.returning();
this.logger.log(`Transcription completed for beat ${beatId}: ${result.text.length} chars`);
return {
beat: updatedBeat,
lyrics: lyricsRecord.content,
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
this.logger.error(`Transcription failed for beat ${beatId}: ${errorMessage}`);
// Update beat status to failed
await this.db
.update(beats)
.set({
transcriptionStatus: 'failed',
transcriptionError: errorMessage,
})
.where(eq(beats.id, beatId));
throw error;
}
}
}

View file

@ -12,6 +12,10 @@ export const beats = pgTable('beats', {
bpm: real('bpm'),
bpmConfidence: real('bpm_confidence'),
waveformData: jsonb('waveform_data'),
// STT Transcription fields
transcriptionStatus: varchar('transcription_status', { length: 50 }).default('none'), // 'none' | 'pending' | 'completed' | 'failed'
transcriptionError: text('transcription_error'),
transcribedAt: timestamp('transcribed_at', { withTimezone: true }),
createdAt: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
});

View file

@ -0,0 +1,8 @@
import { Module } from '@nestjs/common';
import { SttService } from './stt.service';
@Module({
providers: [SttService],
exports: [SttService],
})
export class SttModule {}

View file

@ -0,0 +1,89 @@
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
export interface TranscriptionResult {
text: string;
language: string | null;
model: string;
latencyMs: number | null;
durationSeconds: number | null;
}
@Injectable()
export class SttService {
private readonly logger = new Logger(SttService.name);
private readonly sttUrl: string;
private readonly apiKey: string | undefined;
constructor(private configService: ConfigService) {
this.sttUrl = this.configService.get('MANA_STT_URL') || 'http://localhost:3020';
this.apiKey = this.configService.get('MANA_STT_API_KEY');
}
/**
* Check if mana-stt service is available
*/
async isAvailable(): Promise<boolean> {
try {
const response = await fetch(`${this.sttUrl}/health`, {
method: 'GET',
signal: AbortSignal.timeout(5000),
});
return response.ok;
} catch (error) {
this.logger.warn(`STT service not available: ${error}`);
return false;
}
}
/**
* Transcribe audio buffer using Whisper via mana-stt
*/
async transcribe(
audioBuffer: Buffer,
filename: string,
language?: string
): Promise<TranscriptionResult> {
this.logger.log(`Starting transcription for ${filename} (${audioBuffer.length} bytes)`);
const formData = new FormData();
// Convert Buffer to Uint8Array for Blob compatibility
const uint8Array = new Uint8Array(audioBuffer);
formData.append('file', new Blob([uint8Array]), filename);
if (language) {
formData.append('language', language);
}
const headers: Record<string, string> = {};
if (this.apiKey) {
headers['X-API-Key'] = this.apiKey;
}
const response = await fetch(`${this.sttUrl}/transcribe`, {
method: 'POST',
body: formData,
headers,
signal: AbortSignal.timeout(120000), // 2 minute timeout
});
if (!response.ok) {
const error = await response.text();
throw new Error(`STT transcription failed: ${response.status} - ${error}`);
}
const result = await response.json();
this.logger.log(
`Transcription complete: ${result.text?.length || 0} chars, language: ${result.language}, model: ${result.model}`
);
return {
text: result.text,
language: result.language || null,
model: result.model,
latencyMs: result.latency_ms || null,
durationSeconds: result.duration_seconds || null,
};
}
}

View file

@ -7,22 +7,49 @@
interface Props {
projectId: string;
onUploadComplete?: () => void;
onLyricsUpdate?: (lyrics: string) => void;
}
let { projectId, onUploadComplete }: Props = $props();
let { projectId, onUploadComplete, onLyricsUpdate }: Props = $props();
type Tab = 'upload' | 'library';
let activeTab = $state<Tab>('upload');
let isUploading = $state(false);
let isDetectingBpm = $state(false);
let isTranscribing = $state(false);
let uploadProgress = $state(0);
let errorMessage = $state<string | null>(null);
let transcriptionError = $state<string | null>(null);
let currentBeatId = $state<string | null>(null);
let fileInputRef: HTMLInputElement;
const acceptedTypes = ['audio/mpeg', 'audio/wav', 'audio/ogg', 'audio/mp3', 'audio/x-wav'];
const acceptedExtensions = '.mp3,.wav,.ogg';
async function startTranscription(beatId: string) {
isTranscribing = true;
transcriptionError = null;
currentBeatId = beatId;
try {
const result = await projectStore.transcribeBeat(beatId);
if (result.lyrics) {
onLyricsUpdate?.(result.lyrics);
}
} catch (err) {
transcriptionError = err instanceof Error ? err.message : 'Transcription failed';
} finally {
isTranscribing = false;
}
}
async function retryTranscription() {
if (currentBeatId) {
await startTranscription(currentBeatId);
}
}
async function handleFileSelect(e: Event) {
const input = e.target as HTMLInputElement;
const file = input.files?.[0];
@ -35,6 +62,7 @@
}
errorMessage = null;
transcriptionError = null;
isUploading = true;
uploadProgress = 0;
@ -42,6 +70,7 @@
// Upload the file
uploadProgress = 30;
const beat = await projectStore.uploadBeat(projectId, file);
currentBeatId = beat.id;
uploadProgress = 60;
// Detect BPM
@ -67,6 +96,9 @@
uploadProgress = 100;
onUploadComplete?.();
// Auto-start transcription
startTranscription(beat.id);
} catch (err) {
errorMessage = err instanceof Error ? err.message : 'Failed to upload beat';
} finally {
@ -217,6 +249,49 @@
<p class="text-red-500 mt-4 text-sm">{errorMessage}</p>
{/if}
</div>
<!-- Transcription Status -->
{#if isTranscribing}
<div
class="flex items-center gap-3 p-4 bg-surface-hover rounded-lg border border-border animate-pulse"
>
<div
class="w-5 h-5 border-2 border-primary border-t-transparent rounded-full animate-spin"
></div>
<div class="flex-1">
<p class="text-sm font-medium">Transcribing lyrics...</p>
<p class="text-xs text-foreground-secondary">
Analyzing audio to extract lyrics automatically
</p>
</div>
</div>
{:else if transcriptionError}
<div class="flex items-center gap-3 p-4 bg-red-500/10 rounded-lg border border-red-500/30">
<svg
class="w-5 h-5 text-red-500 flex-shrink-0"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
>
<path
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"
/>
</svg>
<div class="flex-1">
<p class="text-sm font-medium text-red-500">Transcription failed</p>
<p class="text-xs text-foreground-secondary">{transcriptionError}</p>
</div>
<button
onclick={retryTranscription}
class="px-3 py-1.5 text-sm font-medium bg-red-500 text-white rounded-md hover:bg-red-600 transition-colors"
>
Retry
</button>
</div>
{/if}
{:else}
<BeatLibrary {projectId} onSelectBeat={onUploadComplete} />
{/if}

View file

@ -196,6 +196,27 @@ function createProjectStore() {
state.currentMarkers = [];
},
async checkSttAvailable(): Promise<boolean> {
try {
const data = await fetchApi<{ available: boolean }>('/beats/stt/available');
return data.available;
} catch {
return false;
}
},
async transcribeBeat(beatId: string): Promise<{ beat: Beat; lyrics: string | null }> {
const data = await fetchApi<{ beat: Beat; lyrics: string | null }>(
`/beats/${beatId}/transcribe`,
{ method: 'POST' }
);
state.currentBeat = data.beat;
if (data.lyrics) {
state.currentLyrics = { ...state.currentLyrics!, content: data.lyrics };
}
return data;
},
async updateLyrics(projectId: string, content: string) {
const data = await fetchApi<{ lyrics: Lyrics }>(`/lyrics/project/${projectId}`, {
method: 'POST',

View file

@ -1,3 +1,5 @@
export type TranscriptionStatus = 'none' | 'pending' | 'completed' | 'failed';
export interface Beat {
id: string;
projectId: string;
@ -7,6 +9,10 @@ export interface Beat {
bpm?: number | null;
bpmConfidence?: number | null;
waveformData?: WaveformData | null;
// STT Transcription fields
transcriptionStatus?: TranscriptionStatus | null;
transcriptionError?: string | null;
transcribedAt?: Date | null;
createdAt: Date;
}