mirror of
https://github.com/Memo-2023/mana-monorepo.git
synced 2026-05-22 11:06:43 +02:00
chore: archive inactive projects to apps-archived/
Move inactive projects out of active workspace: - bauntown (community website) - maerchenzauber (AI story generation) - memoro (voice memo app) - news (news aggregation) - nutriphi (nutrition tracking) - reader (reading app) - uload (URL shortener) - wisekeep (AI wisdom extraction) Update CLAUDE.md documentation: - Add presi to active projects - Document archived projects section - Update workspace configuration Archived apps can be re-activated by moving back to apps/ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
b97149ac12
commit
61d181fbc2
3148 changed files with 437 additions and 46640 deletions
24
apps-archived/wisekeep/apps/backend/src/app.module.ts
Normal file
24
apps-archived/wisekeep/apps/backend/src/app.module.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { ConfigModule } from '@nestjs/config';
|
||||
import { TranscriptionModule } from './transcription/transcription.module';
|
||||
import { PlaylistModule } from './playlist/playlist.module';
|
||||
import { YoutubeModule } from './youtube/youtube.module';
|
||||
import { WhisperModule } from './whisper/whisper.module';
|
||||
import { WebsocketModule } from './websocket/websocket.module';
|
||||
import { HealthModule } from './health/health.module';
|
||||
|
||||
@Module({
|
||||
imports: [
|
||||
ConfigModule.forRoot({
|
||||
isGlobal: true,
|
||||
envFilePath: '.env',
|
||||
}),
|
||||
TranscriptionModule,
|
||||
PlaylistModule,
|
||||
YoutubeModule,
|
||||
WhisperModule,
|
||||
WebsocketModule,
|
||||
HealthModule,
|
||||
],
|
||||
})
|
||||
export class AppModule {}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
import { Controller, Get } from '@nestjs/common';
|
||||
|
||||
@Controller('health')
|
||||
export class HealthController {
|
||||
@Get()
|
||||
check() {
|
||||
return {
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
service: 'transcriber-backend',
|
||||
version: '1.0.0',
|
||||
};
|
||||
}
|
||||
|
||||
@Get('ready')
|
||||
ready() {
|
||||
return {
|
||||
status: 'ready',
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
@Get('live')
|
||||
live() {
|
||||
return {
|
||||
status: 'alive',
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { HealthController } from './health.controller';
|
||||
|
||||
@Module({
|
||||
controllers: [HealthController],
|
||||
})
|
||||
export class HealthModule {}
|
||||
31
apps-archived/wisekeep/apps/backend/src/main.ts
Normal file
31
apps-archived/wisekeep/apps/backend/src/main.ts
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import { NestFactory } from '@nestjs/core';
|
||||
import { ValidationPipe } from '@nestjs/common';
|
||||
import { AppModule } from './app.module';
|
||||
|
||||
async function bootstrap() {
|
||||
const app = await NestFactory.create(AppModule);
|
||||
|
||||
app.enableCors({
|
||||
origin: [
|
||||
'http://localhost:5173', // SvelteKit dev
|
||||
'http://localhost:4321', // Astro dev
|
||||
'http://localhost:3000', // Alternative dev
|
||||
],
|
||||
credentials: true,
|
||||
});
|
||||
|
||||
app.useGlobalPipes(
|
||||
new ValidationPipe({
|
||||
whitelist: true,
|
||||
transform: true,
|
||||
forbidNonWhitelisted: true,
|
||||
})
|
||||
);
|
||||
|
||||
const port = process.env.PORT || 3006;
|
||||
await app.listen(port);
|
||||
|
||||
console.log(`[Transcriber Backend] Running on http://localhost:${port}`);
|
||||
}
|
||||
|
||||
bootstrap();
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import { Controller, Get, Post, Delete, Param, Body } from '@nestjs/common';
|
||||
import { PlaylistService, CreatePlaylistDto } from './playlist.service';
|
||||
|
||||
@Controller('playlist')
|
||||
export class PlaylistController {
|
||||
constructor(private readonly playlistService: PlaylistService) {}
|
||||
|
||||
@Get()
|
||||
async getAll() {
|
||||
return this.playlistService.getAll();
|
||||
}
|
||||
|
||||
@Get(':category/:name')
|
||||
async getOne(@Param('category') category: string, @Param('name') name: string) {
|
||||
return this.playlistService.getOne(category, name);
|
||||
}
|
||||
|
||||
@Post()
|
||||
async create(@Body() dto: CreatePlaylistDto) {
|
||||
return this.playlistService.create(dto);
|
||||
}
|
||||
|
||||
@Delete(':category/:name')
|
||||
async delete(@Param('category') category: string, @Param('name') name: string) {
|
||||
await this.playlistService.delete(category, name);
|
||||
return { message: 'Playlist deleted' };
|
||||
}
|
||||
|
||||
@Post(':category/:name/url')
|
||||
async addUrl(
|
||||
@Param('category') category: string,
|
||||
@Param('name') name: string,
|
||||
@Body('url') url: string
|
||||
) {
|
||||
return this.playlistService.addUrl(category, name, url);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { PlaylistController } from './playlist.controller';
|
||||
import { PlaylistService } from './playlist.service';
|
||||
|
||||
@Module({
|
||||
controllers: [PlaylistController],
|
||||
providers: [PlaylistService],
|
||||
exports: [PlaylistService],
|
||||
})
|
||||
export class PlaylistModule {}
|
||||
|
|
@ -0,0 +1,173 @@
|
|||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
|
||||
export interface Playlist {
|
||||
category: string;
|
||||
name: string;
|
||||
path: string;
|
||||
urlCount: number;
|
||||
urls: string[];
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export interface CreatePlaylistDto {
|
||||
name: string;
|
||||
description?: string;
|
||||
urls: string[];
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class PlaylistService {
|
||||
private readonly logger = new Logger(PlaylistService.name);
|
||||
private readonly playlistsDir: string;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.playlistsDir = this.configService.get<string>('PLAYLISTS_DIR') || './data/playlists';
|
||||
|
||||
// Ensure playlists directory exists
|
||||
if (!fs.existsSync(this.playlistsDir)) {
|
||||
fs.mkdirSync(this.playlistsDir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
async getAll(): Promise<Playlist[]> {
|
||||
const playlists: Playlist[] = [];
|
||||
|
||||
if (!fs.existsSync(this.playlistsDir)) {
|
||||
return playlists;
|
||||
}
|
||||
|
||||
const categories = fs
|
||||
.readdirSync(this.playlistsDir, { withFileTypes: true })
|
||||
.filter((d) => d.isDirectory());
|
||||
|
||||
for (const category of categories) {
|
||||
const categoryPath = path.join(this.playlistsDir, category.name);
|
||||
const files = fs.readdirSync(categoryPath).filter((f) => f.endsWith('.txt'));
|
||||
|
||||
for (const file of files) {
|
||||
const filePath = path.join(categoryPath, file);
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
const lines = content.split('\n');
|
||||
|
||||
let description: string | undefined;
|
||||
const urls: string[] = [];
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.startsWith('# ') && !description) {
|
||||
description = trimmed.substring(2);
|
||||
} else if (trimmed && !trimmed.startsWith('#')) {
|
||||
urls.push(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
playlists.push({
|
||||
category: category.name,
|
||||
name: file.replace('.txt', ''),
|
||||
path: filePath,
|
||||
urlCount: urls.length,
|
||||
urls,
|
||||
description,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return playlists;
|
||||
}
|
||||
|
||||
async getOne(category: string, name: string): Promise<Playlist> {
|
||||
const filePath = path.join(this.playlistsDir, category, `${name}.txt`);
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
throw new NotFoundException(`Playlist ${category}/${name} not found`);
|
||||
}
|
||||
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
const lines = content.split('\n');
|
||||
|
||||
let description: string | undefined;
|
||||
const urls: string[] = [];
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.startsWith('# ') && !description) {
|
||||
description = trimmed.substring(2);
|
||||
} else if (trimmed && !trimmed.startsWith('#')) {
|
||||
urls.push(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
category,
|
||||
name,
|
||||
path: filePath,
|
||||
urlCount: urls.length,
|
||||
urls,
|
||||
description,
|
||||
};
|
||||
}
|
||||
|
||||
async create(dto: CreatePlaylistDto): Promise<Playlist> {
|
||||
// Parse category/name format
|
||||
const parts = dto.name.split('/');
|
||||
const category = parts.length > 1 ? parts[0] : 'general';
|
||||
const name = parts.length > 1 ? parts[1] : dto.name;
|
||||
|
||||
const categoryDir = path.join(this.playlistsDir, category);
|
||||
if (!fs.existsSync(categoryDir)) {
|
||||
fs.mkdirSync(categoryDir, { recursive: true });
|
||||
}
|
||||
|
||||
const filePath = path.join(categoryDir, `${name}.txt`);
|
||||
|
||||
let content = '';
|
||||
if (dto.description) {
|
||||
content += `# ${dto.description}\n`;
|
||||
}
|
||||
content += '# One URL per line\n\n';
|
||||
content += dto.urls.join('\n') + '\n';
|
||||
|
||||
fs.writeFileSync(filePath, content, 'utf-8');
|
||||
|
||||
this.logger.log(`Created playlist: ${category}/${name}`);
|
||||
|
||||
return {
|
||||
category,
|
||||
name,
|
||||
path: filePath,
|
||||
urlCount: dto.urls.length,
|
||||
urls: dto.urls,
|
||||
description: dto.description,
|
||||
};
|
||||
}
|
||||
|
||||
async delete(category: string, name: string): Promise<void> {
|
||||
const filePath = path.join(this.playlistsDir, category, `${name}.txt`);
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
throw new NotFoundException(`Playlist ${category}/${name} not found`);
|
||||
}
|
||||
|
||||
fs.unlinkSync(filePath);
|
||||
this.logger.log(`Deleted playlist: ${category}/${name}`);
|
||||
}
|
||||
|
||||
async addUrl(category: string, name: string, url: string): Promise<Playlist> {
|
||||
const playlist = await this.getOne(category, name);
|
||||
playlist.urls.push(url);
|
||||
|
||||
const content =
|
||||
(playlist.description ? `# ${playlist.description}\n` : '') +
|
||||
'# One URL per line\n\n' +
|
||||
playlist.urls.join('\n') +
|
||||
'\n';
|
||||
|
||||
fs.writeFileSync(playlist.path, content, 'utf-8');
|
||||
|
||||
playlist.urlCount = playlist.urls.length;
|
||||
return playlist;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
import { IsString, IsOptional, IsUrl, IsEnum } from 'class-validator';
|
||||
|
||||
export enum WhisperProviderEnum {
|
||||
GROQ = 'groq',
|
||||
LOCAL = 'local',
|
||||
}
|
||||
|
||||
export enum WhisperModelEnum {
|
||||
// Groq models (cloud)
|
||||
WHISPER_LARGE_V3_TURBO = 'whisper-large-v3-turbo',
|
||||
WHISPER_LARGE_V3 = 'whisper-large-v3',
|
||||
// Local models
|
||||
TINY = 'tiny',
|
||||
BASE = 'base',
|
||||
SMALL = 'small',
|
||||
MEDIUM = 'medium',
|
||||
LARGE = 'large',
|
||||
}
|
||||
|
||||
export class TranscribeRequestDto {
|
||||
@IsUrl()
|
||||
url: string;
|
||||
|
||||
@IsString()
|
||||
@IsOptional()
|
||||
language?: string = 'de';
|
||||
|
||||
@IsEnum(WhisperProviderEnum)
|
||||
@IsOptional()
|
||||
provider?: WhisperProviderEnum;
|
||||
|
||||
@IsEnum(WhisperModelEnum)
|
||||
@IsOptional()
|
||||
model?: WhisperModelEnum;
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
export enum JobStatus {
|
||||
PENDING = 'pending',
|
||||
DOWNLOADING = 'downloading',
|
||||
TRANSCRIBING = 'transcribing',
|
||||
COMPLETED = 'completed',
|
||||
FAILED = 'failed',
|
||||
CANCELLED = 'cancelled',
|
||||
}
|
||||
|
||||
export interface VideoInfo {
|
||||
id: string;
|
||||
title: string;
|
||||
description: string;
|
||||
duration: number;
|
||||
channel: string;
|
||||
channelId: string;
|
||||
thumbnail: string;
|
||||
uploadDate: string;
|
||||
}
|
||||
|
||||
export class TranscriptionJob {
|
||||
id: string;
|
||||
url: string;
|
||||
language: string;
|
||||
provider: string;
|
||||
model?: string;
|
||||
status: JobStatus;
|
||||
progress: number;
|
||||
createdAt: Date;
|
||||
completedAt?: Date;
|
||||
videoInfo?: VideoInfo;
|
||||
transcriptPath?: string;
|
||||
transcriptText?: string;
|
||||
error?: string;
|
||||
|
||||
constructor(id: string, url: string, language: string, provider: string, model?: string) {
|
||||
this.id = id;
|
||||
this.url = url;
|
||||
this.language = language;
|
||||
this.provider = provider;
|
||||
this.model = model;
|
||||
this.status = JobStatus.PENDING;
|
||||
this.progress = 0;
|
||||
this.createdAt = new Date();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
import { Controller, Get, Post, Delete, Param, Body } from '@nestjs/common';
|
||||
import { TranscriptionService } from './transcription.service';
|
||||
import { TranscribeRequestDto } from './dto/transcribe-request.dto';
|
||||
|
||||
@Controller('transcription')
|
||||
export class TranscriptionController {
|
||||
constructor(private readonly transcriptionService: TranscriptionService) {}
|
||||
|
||||
@Post()
|
||||
async createJob(@Body() dto: TranscribeRequestDto) {
|
||||
return this.transcriptionService.createJob(dto);
|
||||
}
|
||||
|
||||
@Get()
|
||||
async getAllJobs() {
|
||||
return this.transcriptionService.getAllJobs();
|
||||
}
|
||||
|
||||
@Get('stats')
|
||||
async getStats() {
|
||||
return this.transcriptionService.getStats();
|
||||
}
|
||||
|
||||
@Get(':id')
|
||||
async getJob(@Param('id') id: string) {
|
||||
return this.transcriptionService.getJob(id);
|
||||
}
|
||||
|
||||
@Delete(':id')
|
||||
async cancelJob(@Param('id') id: string) {
|
||||
return this.transcriptionService.cancelJob(id);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { TranscriptionController } from './transcription.controller';
|
||||
import { TranscriptionService } from './transcription.service';
|
||||
import { YoutubeModule } from '../youtube/youtube.module';
|
||||
import { WhisperModule } from '../whisper/whisper.module';
|
||||
import { WebsocketModule } from '../websocket/websocket.module';
|
||||
|
||||
@Module({
|
||||
imports: [YoutubeModule, WhisperModule, WebsocketModule],
|
||||
controllers: [TranscriptionController],
|
||||
providers: [TranscriptionService],
|
||||
exports: [TranscriptionService],
|
||||
})
|
||||
export class TranscriptionModule {}
|
||||
|
|
@ -0,0 +1,254 @@
|
|||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import { YoutubeService } from '../youtube/youtube.service';
|
||||
import { WhisperService, WhisperProvider, WhisperModel } from '../whisper/whisper.service';
|
||||
import { ProgressGateway } from '../websocket/progress.gateway';
|
||||
import { TranscriptionJob, JobStatus } from './entities/transcription-job.entity';
|
||||
import { TranscribeRequestDto } from './dto/transcribe-request.dto';
|
||||
|
||||
@Injectable()
|
||||
export class TranscriptionService {
|
||||
private readonly logger = new Logger(TranscriptionService.name);
|
||||
private readonly jobs: Map<string, TranscriptionJob> = new Map();
|
||||
private readonly transcriptsDir: string;
|
||||
|
||||
constructor(
|
||||
private readonly configService: ConfigService,
|
||||
private readonly youtubeService: YoutubeService,
|
||||
private readonly whisperService: WhisperService,
|
||||
private readonly progressGateway: ProgressGateway
|
||||
) {
|
||||
this.transcriptsDir = this.configService.get<string>('TRANSCRIPTS_DIR') || './data/transcripts';
|
||||
|
||||
// Ensure transcripts directory exists
|
||||
if (!fs.existsSync(this.transcriptsDir)) {
|
||||
fs.mkdirSync(this.transcriptsDir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
async createJob(dto: TranscribeRequestDto): Promise<TranscriptionJob> {
|
||||
const jobId = uuidv4();
|
||||
const job = new TranscriptionJob(
|
||||
jobId,
|
||||
dto.url,
|
||||
dto.language || 'de',
|
||||
dto.provider || 'openai',
|
||||
dto.model
|
||||
);
|
||||
|
||||
this.jobs.set(jobId, job);
|
||||
|
||||
// Start processing in background
|
||||
this.processJob(job);
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
async getJob(id: string): Promise<TranscriptionJob> {
|
||||
const job = this.jobs.get(id);
|
||||
if (!job) {
|
||||
throw new NotFoundException(`Job ${id} not found`);
|
||||
}
|
||||
return job;
|
||||
}
|
||||
|
||||
async getAllJobs(): Promise<TranscriptionJob[]> {
|
||||
return Array.from(this.jobs.values());
|
||||
}
|
||||
|
||||
async cancelJob(id: string): Promise<TranscriptionJob> {
|
||||
const job = this.jobs.get(id);
|
||||
if (!job) {
|
||||
throw new NotFoundException(`Job ${id} not found`);
|
||||
}
|
||||
|
||||
if (
|
||||
job.status === JobStatus.PENDING ||
|
||||
job.status === JobStatus.DOWNLOADING ||
|
||||
job.status === JobStatus.TRANSCRIBING
|
||||
) {
|
||||
job.status = JobStatus.CANCELLED;
|
||||
job.error = 'Cancelled by user';
|
||||
|
||||
this.progressGateway.broadcastJobUpdate(job.id, {
|
||||
status: job.status,
|
||||
error: job.error,
|
||||
});
|
||||
}
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
private async processJob(job: TranscriptionJob): Promise<void> {
|
||||
let audioPath: string | null = null;
|
||||
const jobId = job.id;
|
||||
|
||||
// Helper to check if job was cancelled (re-reads from map to get current status)
|
||||
const isCancelled = (): boolean => {
|
||||
const currentJob = this.jobs.get(jobId);
|
||||
return currentJob?.status === JobStatus.CANCELLED;
|
||||
};
|
||||
|
||||
try {
|
||||
// Step 1: Get video info
|
||||
this.updateJobProgress(job, JobStatus.DOWNLOADING, 5);
|
||||
|
||||
const videoInfo = await this.youtubeService.getVideoInfo(job.url);
|
||||
job.videoInfo = videoInfo;
|
||||
this.updateJobProgress(job, JobStatus.DOWNLOADING, 10);
|
||||
|
||||
this.logger.log(`Processing: ${videoInfo.title}`);
|
||||
|
||||
// Check if cancelled
|
||||
if (isCancelled()) return;
|
||||
|
||||
// Step 2: Download audio
|
||||
audioPath = await this.youtubeService.downloadAudio(job.url, (progress) => {
|
||||
const overallProgress = 10 + progress.percent * 0.4; // 10-50%
|
||||
this.updateJobProgress(job, JobStatus.DOWNLOADING, Math.round(overallProgress));
|
||||
});
|
||||
|
||||
this.updateJobProgress(job, JobStatus.DOWNLOADING, 50);
|
||||
|
||||
// Check if cancelled
|
||||
if (isCancelled()) {
|
||||
if (audioPath) await this.youtubeService.cleanupFile(audioPath);
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 3: Transcribe
|
||||
this.updateJobProgress(job, JobStatus.TRANSCRIBING, 55);
|
||||
|
||||
const result = await this.whisperService.transcribe(
|
||||
audioPath,
|
||||
job.language,
|
||||
job.provider as WhisperProvider,
|
||||
job.model as WhisperModel
|
||||
);
|
||||
|
||||
this.updateJobProgress(job, JobStatus.TRANSCRIBING, 90);
|
||||
|
||||
// Check if cancelled
|
||||
if (isCancelled()) {
|
||||
if (audioPath) await this.youtubeService.cleanupFile(audioPath);
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 4: Save transcript
|
||||
const transcriptPath = await this.saveTranscript(job, videoInfo, result.text);
|
||||
|
||||
job.transcriptPath = transcriptPath;
|
||||
job.transcriptText = result.text;
|
||||
job.status = JobStatus.COMPLETED;
|
||||
job.progress = 100;
|
||||
job.completedAt = new Date();
|
||||
|
||||
this.progressGateway.broadcastJobUpdate(job.id, {
|
||||
status: job.status,
|
||||
progress: job.progress,
|
||||
transcriptPath: job.transcriptPath,
|
||||
});
|
||||
|
||||
this.logger.log(`Completed: ${videoInfo.title}`);
|
||||
} catch (error) {
|
||||
job.status = JobStatus.FAILED;
|
||||
job.error = error instanceof Error ? error.message : 'Unknown error';
|
||||
|
||||
this.progressGateway.broadcastJobUpdate(job.id, {
|
||||
status: job.status,
|
||||
error: job.error,
|
||||
});
|
||||
|
||||
this.logger.error(`Job failed: ${job.error}`);
|
||||
} finally {
|
||||
// Cleanup audio file
|
||||
if (audioPath) {
|
||||
await this.youtubeService.cleanupFile(audioPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private updateJobProgress(job: TranscriptionJob, status: JobStatus, progress: number): void {
|
||||
job.status = status;
|
||||
job.progress = progress;
|
||||
|
||||
this.progressGateway.broadcastJobUpdate(job.id, {
|
||||
status: job.status,
|
||||
progress: job.progress,
|
||||
videoInfo: job.videoInfo,
|
||||
});
|
||||
}
|
||||
|
||||
private async saveTranscript(
|
||||
job: TranscriptionJob,
|
||||
videoInfo: { channel: string; title: string; id: string },
|
||||
text: string
|
||||
): Promise<string> {
|
||||
// Sanitize names for filesystem
|
||||
const sanitize = (str: string) => str.replace(/[^a-z0-9äöüß\-_]/gi, '_').substring(0, 50);
|
||||
|
||||
const channelDir = path.join(this.transcriptsDir, sanitize(videoInfo.channel));
|
||||
|
||||
if (!fs.existsSync(channelDir)) {
|
||||
fs.mkdirSync(channelDir, { recursive: true });
|
||||
}
|
||||
|
||||
const filename = `${sanitize(videoInfo.title)}_${videoInfo.id}.txt`;
|
||||
const filePath = path.join(channelDir, filename);
|
||||
|
||||
const content = `# ${videoInfo.title}
|
||||
Channel: ${videoInfo.channel}
|
||||
Video ID: ${videoInfo.id}
|
||||
Language: ${job.language}
|
||||
Transcribed: ${new Date().toISOString()}
|
||||
Provider: ${job.provider}
|
||||
|
||||
---
|
||||
|
||||
${text}
|
||||
`;
|
||||
|
||||
fs.writeFileSync(filePath, content, 'utf-8');
|
||||
|
||||
return filePath;
|
||||
}
|
||||
|
||||
async getStats() {
|
||||
const jobs = Array.from(this.jobs.values());
|
||||
|
||||
let totalTranscripts = 0;
|
||||
let totalSize = 0;
|
||||
|
||||
if (fs.existsSync(this.transcriptsDir)) {
|
||||
const countFiles = (dir: string) => {
|
||||
const items = fs.readdirSync(dir, { withFileTypes: true });
|
||||
for (const item of items) {
|
||||
const fullPath = path.join(dir, item.name);
|
||||
if (item.isDirectory()) {
|
||||
countFiles(fullPath);
|
||||
} else if (item.name.endsWith('.txt')) {
|
||||
totalTranscripts++;
|
||||
totalSize += fs.statSync(fullPath).size;
|
||||
}
|
||||
}
|
||||
};
|
||||
countFiles(this.transcriptsDir);
|
||||
}
|
||||
|
||||
return {
|
||||
totalTranscripts,
|
||||
totalSizeMB: Math.round((totalSize / 1024 / 1024) * 100) / 100,
|
||||
activeJobs: jobs.filter(
|
||||
(j) =>
|
||||
j.status === JobStatus.PENDING ||
|
||||
j.status === JobStatus.DOWNLOADING ||
|
||||
j.status === JobStatus.TRANSCRIBING
|
||||
).length,
|
||||
completedJobs: jobs.filter((j) => j.status === JobStatus.COMPLETED).length,
|
||||
failedJobs: jobs.filter((j) => j.status === JobStatus.FAILED).length,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
import {
|
||||
WebSocketGateway,
|
||||
WebSocketServer,
|
||||
OnGatewayConnection,
|
||||
OnGatewayDisconnect,
|
||||
} from '@nestjs/websockets';
|
||||
import { Logger } from '@nestjs/common';
|
||||
import { Server, Socket } from 'socket.io';
|
||||
|
||||
export interface JobUpdatePayload {
|
||||
status: string;
|
||||
progress?: number;
|
||||
error?: string;
|
||||
videoInfo?: {
|
||||
id: string;
|
||||
title: string;
|
||||
channel: string;
|
||||
thumbnail: string;
|
||||
};
|
||||
transcriptPath?: string;
|
||||
}
|
||||
|
||||
@WebSocketGateway({
|
||||
cors: {
|
||||
origin: ['http://localhost:5173', 'http://localhost:4321', 'http://localhost:3000'],
|
||||
credentials: true,
|
||||
},
|
||||
namespace: '/progress',
|
||||
})
|
||||
export class ProgressGateway implements OnGatewayConnection, OnGatewayDisconnect {
|
||||
private readonly logger = new Logger(ProgressGateway.name);
|
||||
|
||||
@WebSocketServer()
|
||||
server: Server;
|
||||
|
||||
handleConnection(client: Socket) {
|
||||
this.logger.log(`Client connected: ${client.id}`);
|
||||
|
||||
// Send heartbeat every 10 seconds
|
||||
const interval = setInterval(() => {
|
||||
client.emit('heartbeat', { timestamp: Date.now() });
|
||||
}, 10000);
|
||||
|
||||
client.on('disconnect', () => {
|
||||
clearInterval(interval);
|
||||
});
|
||||
}
|
||||
|
||||
handleDisconnect(client: Socket) {
|
||||
this.logger.log(`Client disconnected: ${client.id}`);
|
||||
}
|
||||
|
||||
broadcastJobUpdate(jobId: string, payload: JobUpdatePayload) {
|
||||
this.server.emit('job_update', {
|
||||
type: 'job_update',
|
||||
jobId,
|
||||
...payload,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
broadcastJobComplete(jobId: string, payload: JobUpdatePayload) {
|
||||
this.server.emit('job_complete', {
|
||||
type: 'job_complete',
|
||||
jobId,
|
||||
...payload,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
broadcastJobError(jobId: string, error: string) {
|
||||
this.server.emit('job_error', {
|
||||
type: 'job_error',
|
||||
jobId,
|
||||
error,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
import { Module, Global } from '@nestjs/common';
|
||||
import { ProgressGateway } from './progress.gateway';
|
||||
|
||||
@Global()
|
||||
@Module({
|
||||
providers: [ProgressGateway],
|
||||
exports: [ProgressGateway],
|
||||
})
|
||||
export class WebsocketModule {}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
import { Controller, Get } from '@nestjs/common';
|
||||
import { WhisperService } from './whisper.service';
|
||||
|
||||
@Controller('whisper')
|
||||
export class WhisperController {
|
||||
constructor(private readonly whisperService: WhisperService) {}
|
||||
|
||||
@Get('models')
|
||||
getModels() {
|
||||
return {
|
||||
models: this.whisperService.getAvailableModels(),
|
||||
defaultProvider: this.whisperService.getDefaultProvider(),
|
||||
defaultModel: this.whisperService.getDefaultModel(),
|
||||
groqAvailable: this.whisperService.isGroqAvailable(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { WhisperService } from './whisper.service';
|
||||
import { WhisperController } from './whisper.controller';
|
||||
|
||||
@Module({
|
||||
controllers: [WhisperController],
|
||||
providers: [WhisperService],
|
||||
exports: [WhisperService],
|
||||
})
|
||||
export class WhisperModule {}
|
||||
|
|
@ -0,0 +1,219 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { spawn } from 'child_process';
|
||||
import * as fs from 'fs';
|
||||
import OpenAI from 'openai';
|
||||
|
||||
export type WhisperProvider = 'groq' | 'local';
|
||||
export type GroqWhisperModel = 'whisper-large-v3-turbo' | 'whisper-large-v3';
|
||||
export type LocalWhisperModel = 'tiny' | 'base' | 'small' | 'medium' | 'large';
|
||||
export type WhisperModel = GroqWhisperModel | LocalWhisperModel;
|
||||
|
||||
export interface TranscriptionResult {
|
||||
text: string;
|
||||
language: string;
|
||||
duration: number;
|
||||
provider: WhisperProvider;
|
||||
}
|
||||
|
||||
export interface WhisperModelInfo {
|
||||
name: string;
|
||||
provider: WhisperProvider;
|
||||
speed: string;
|
||||
accuracy: string;
|
||||
cost?: string;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class WhisperService {
|
||||
private readonly logger = new Logger(WhisperService.name);
|
||||
private readonly groqClient: OpenAI | null;
|
||||
private readonly defaultProvider: WhisperProvider;
|
||||
private readonly defaultModel: WhisperModel;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
const groqApiKey = this.configService.get<string>('GROQ_API_KEY');
|
||||
|
||||
if (groqApiKey) {
|
||||
// Groq uses OpenAI-compatible API
|
||||
this.groqClient = new OpenAI({
|
||||
apiKey: groqApiKey,
|
||||
baseURL: 'https://api.groq.com/openai/v1',
|
||||
});
|
||||
this.logger.log('Groq API configured successfully');
|
||||
} else {
|
||||
this.groqClient = null;
|
||||
this.logger.warn('Groq API key not configured. Only local Whisper available.');
|
||||
}
|
||||
|
||||
this.defaultProvider =
|
||||
(this.configService.get<string>('WHISPER_PROVIDER') as WhisperProvider) || 'groq';
|
||||
this.defaultModel =
|
||||
(this.configService.get<string>('WHISPER_MODEL') as WhisperModel) || 'whisper-large-v3-turbo';
|
||||
}
|
||||
|
||||
async transcribe(
|
||||
audioPath: string,
|
||||
language: string = 'de',
|
||||
provider?: WhisperProvider,
|
||||
model?: WhisperModel
|
||||
): Promise<TranscriptionResult> {
|
||||
const selectedProvider = provider || this.defaultProvider;
|
||||
const selectedModel = model || this.defaultModel;
|
||||
|
||||
// Fallback to local if Groq not available
|
||||
if (selectedProvider === 'groq' && !this.groqClient) {
|
||||
this.logger.warn('Groq not configured, falling back to local Whisper');
|
||||
return this.transcribeWithLocalWhisper(
|
||||
audioPath,
|
||||
language,
|
||||
selectedModel as LocalWhisperModel
|
||||
);
|
||||
}
|
||||
|
||||
if (selectedProvider === 'groq') {
|
||||
return this.transcribeWithGroq(audioPath, language, selectedModel as GroqWhisperModel);
|
||||
}
|
||||
|
||||
return this.transcribeWithLocalWhisper(audioPath, language, selectedModel as LocalWhisperModel);
|
||||
}
|
||||
|
||||
private async transcribeWithGroq(
|
||||
audioPath: string,
|
||||
language: string,
|
||||
model: GroqWhisperModel = 'whisper-large-v3-turbo'
|
||||
): Promise<TranscriptionResult> {
|
||||
if (!this.groqClient) {
|
||||
throw new Error('Groq API not configured');
|
||||
}
|
||||
|
||||
this.logger.log(`Transcribing with Groq Whisper API (${model}): ${audioPath}`);
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
const transcription = await this.groqClient.audio.transcriptions.create({
|
||||
file: fs.createReadStream(audioPath),
|
||||
model: model,
|
||||
language,
|
||||
response_format: 'verbose_json',
|
||||
});
|
||||
|
||||
const duration = (Date.now() - startTime) / 1000;
|
||||
|
||||
this.logger.log(`Groq transcription completed in ${duration.toFixed(2)}s`);
|
||||
|
||||
return {
|
||||
text: transcription.text,
|
||||
language: transcription.language || language,
|
||||
duration,
|
||||
provider: 'groq',
|
||||
};
|
||||
}
|
||||
|
||||
private async transcribeWithLocalWhisper(
|
||||
audioPath: string,
|
||||
language: string,
|
||||
model: WhisperModel
|
||||
): Promise<TranscriptionResult> {
|
||||
this.logger.log(`Transcribing with local Whisper (model: ${model}): ${audioPath}`);
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
// Python script to run Whisper
|
||||
const pythonScript = `
|
||||
import whisper
|
||||
import json
|
||||
import sys
|
||||
|
||||
model = whisper.load_model("${model}")
|
||||
result = model.transcribe("${audioPath}", language="${language}")
|
||||
print(json.dumps({"text": result["text"], "language": result.get("language", "${language}")}))
|
||||
`.trim();
|
||||
|
||||
const python = spawn('python3', ['-c', pythonScript]);
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
||||
python.stdout.on('data', (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
python.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
// Whisper outputs progress to stderr, log it
|
||||
this.logger.debug(data.toString());
|
||||
});
|
||||
|
||||
python.on('close', (code) => {
|
||||
const duration = (Date.now() - startTime) / 1000;
|
||||
|
||||
if (code !== 0) {
|
||||
this.logger.error(`Local Whisper error: ${stderr}`);
|
||||
reject(new Error(`Transcription failed: ${stderr}`));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = JSON.parse(stdout.trim());
|
||||
resolve({
|
||||
text: result.text,
|
||||
language: result.language,
|
||||
duration,
|
||||
provider: 'local',
|
||||
});
|
||||
} catch (e) {
|
||||
reject(new Error('Failed to parse transcription result'));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
getAvailableModels(): WhisperModelInfo[] {
|
||||
const models: WhisperModelInfo[] = [];
|
||||
|
||||
// Groq models (cloud, ultra-fast)
|
||||
if (this.groqClient) {
|
||||
models.push(
|
||||
{
|
||||
name: 'whisper-large-v3-turbo',
|
||||
provider: 'groq',
|
||||
speed: '~300x realtime',
|
||||
accuracy: '95%',
|
||||
cost: '$0.04/hour',
|
||||
},
|
||||
{
|
||||
name: 'whisper-large-v3',
|
||||
provider: 'groq',
|
||||
speed: '~250x realtime',
|
||||
accuracy: '97%',
|
||||
cost: '$0.111/hour',
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
// Local models
|
||||
models.push(
|
||||
{ name: 'tiny', provider: 'local', speed: '~10x realtime', accuracy: '75%' },
|
||||
{ name: 'base', provider: 'local', speed: '~7x realtime', accuracy: '85%' },
|
||||
{ name: 'small', provider: 'local', speed: '~4x realtime', accuracy: '91%' },
|
||||
{ name: 'medium', provider: 'local', speed: '~2x realtime', accuracy: '94%' },
|
||||
{ name: 'large', provider: 'local', speed: '~1x realtime', accuracy: '96-98%' }
|
||||
);
|
||||
|
||||
return models;
|
||||
}
|
||||
|
||||
isGroqAvailable(): boolean {
|
||||
return this.groqClient !== null;
|
||||
}
|
||||
|
||||
getDefaultProvider(): WhisperProvider {
|
||||
return this.defaultProvider;
|
||||
}
|
||||
|
||||
getDefaultModel(): WhisperModel {
|
||||
return this.defaultModel;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
import { Module } from '@nestjs/common';
|
||||
import { YoutubeService } from './youtube.service';
|
||||
|
||||
@Module({
|
||||
providers: [YoutubeService],
|
||||
exports: [YoutubeService],
|
||||
})
|
||||
export class YoutubeModule {}
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import { spawn } from 'child_process';
|
||||
import * as path from 'path';
|
||||
import * as fs from 'fs';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
export interface VideoInfo {
|
||||
id: string;
|
||||
title: string;
|
||||
description: string;
|
||||
duration: number;
|
||||
channel: string;
|
||||
channelId: string;
|
||||
thumbnail: string;
|
||||
uploadDate: string;
|
||||
}
|
||||
|
||||
export interface DownloadProgress {
|
||||
percent: number;
|
||||
speed: string;
|
||||
eta: string;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class YoutubeService {
|
||||
private readonly logger = new Logger(YoutubeService.name);
|
||||
private readonly tempDir: string;
|
||||
|
||||
constructor(private configService: ConfigService) {
|
||||
this.tempDir = this.configService.get<string>('TEMP_AUDIO_DIR') || './temp_audio';
|
||||
|
||||
// Ensure temp directory exists
|
||||
if (!fs.existsSync(this.tempDir)) {
|
||||
fs.mkdirSync(this.tempDir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
async getVideoInfo(url: string): Promise<VideoInfo> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const ytdlp = spawn('yt-dlp', ['--dump-json', '--no-download', url]);
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
||||
ytdlp.stdout.on('data', (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
ytdlp.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
ytdlp.on('close', (code) => {
|
||||
if (code !== 0) {
|
||||
this.logger.error(`yt-dlp info error: ${stderr}`);
|
||||
reject(new Error(`Failed to get video info: ${stderr}`));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const info = JSON.parse(stdout);
|
||||
resolve({
|
||||
id: info.id,
|
||||
title: info.title,
|
||||
description: info.description || '',
|
||||
duration: info.duration,
|
||||
channel: info.channel || info.uploader,
|
||||
channelId: info.channel_id || info.uploader_id,
|
||||
thumbnail: info.thumbnail,
|
||||
uploadDate: info.upload_date,
|
||||
});
|
||||
} catch (e) {
|
||||
reject(new Error('Failed to parse video info'));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async downloadAudio(
|
||||
url: string,
|
||||
onProgress?: (progress: DownloadProgress) => void
|
||||
): Promise<string> {
|
||||
const outputId = uuidv4();
|
||||
const outputPath = path.join(this.tempDir, `${outputId}.mp3`);
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const ytdlp = spawn('yt-dlp', [
|
||||
'-x',
|
||||
'--audio-format',
|
||||
'mp3',
|
||||
'--audio-quality',
|
||||
'0',
|
||||
'-o',
|
||||
outputPath.replace('.mp3', '.%(ext)s'),
|
||||
'--newline',
|
||||
url,
|
||||
]);
|
||||
|
||||
let stderr = '';
|
||||
|
||||
ytdlp.stdout.on('data', (data) => {
|
||||
const line = data.toString();
|
||||
|
||||
// Parse download progress
|
||||
const progressMatch = line.match(/(\d+\.?\d*)%.*?(\d+\.?\d*\w+\/s).*?ETA\s+(\d+:\d+)/);
|
||||
if (progressMatch && onProgress) {
|
||||
onProgress({
|
||||
percent: parseFloat(progressMatch[1]),
|
||||
speed: progressMatch[2],
|
||||
eta: progressMatch[3],
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
ytdlp.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
ytdlp.on('close', (code) => {
|
||||
if (code !== 0) {
|
||||
this.logger.error(`yt-dlp download error: ${stderr}`);
|
||||
reject(new Error(`Download failed: ${stderr}`));
|
||||
return;
|
||||
}
|
||||
|
||||
// Find the actual output file (might have different extension initially)
|
||||
const files = fs.readdirSync(this.tempDir);
|
||||
const outputFile = files.find((f) => f.startsWith(outputId));
|
||||
|
||||
if (!outputFile) {
|
||||
reject(new Error('Output file not found'));
|
||||
return;
|
||||
}
|
||||
|
||||
const actualPath = path.join(this.tempDir, outputFile);
|
||||
this.logger.log(`Downloaded audio to: ${actualPath}`);
|
||||
resolve(actualPath);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async cleanupFile(filePath: string): Promise<void> {
|
||||
try {
|
||||
if (fs.existsSync(filePath)) {
|
||||
fs.unlinkSync(filePath);
|
||||
this.logger.log(`Cleaned up: ${filePath}`);
|
||||
}
|
||||
} catch (e) {
|
||||
this.logger.warn(`Failed to cleanup file: ${filePath}`);
|
||||
}
|
||||
}
|
||||
|
||||
isValidYoutubeUrl(url: string): boolean {
|
||||
const patterns = [
|
||||
/^(https?:\/\/)?(www\.)?(youtube\.com|youtu\.be)\//,
|
||||
/^(https?:\/\/)?(www\.)?youtube\.com\/watch\?v=/,
|
||||
/^(https?:\/\/)?youtu\.be\//,
|
||||
];
|
||||
|
||||
return patterns.some((pattern) => pattern.test(url));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue